42sh/src/lexer/lexer_utils.h
2026-01-30 19:13:27 +01:00

144 lines
3.3 KiB
C

#ifndef LEXER_UTILS_H
#define LEXER_UTILS_H
#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>
struct lexer_context
{
char *end_previous_token;
ssize_t remaining_chars;
struct token *previous_token;
struct token *current_token;
};
/* @brief: frees all fields of ctx and sets ctx to NULL.
*/
void destroy_lexer_context(struct lexer_context **ctx);
enum lexing_mode
{
LEXER_NORMAL,
LEXER_QUOTE,
LEXER_DOUBLE_QUOTE
};
enum token_type
{
// Blanks
TOKEN_NULL = 0,
TOKEN_EOF,
TOKEN_NEWLINE,
// words
TOKEN_WORD,
TOKEN_ASSIGNMENT_WORD,
// Special characters
TOKEN_GRAVE,
TOKEN_SEMICOLON,
TOKEN_COMMENT,
TOKEN_STAR,
TOKEN_BACKSLASH,
TOKEN_DOLLAR,
TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN,
TOKEN_LEFT_BRACKET,
TOKEN_RIGHT_BRACKET,
TOKEN_PIPE,
TOKEN_NEGATION,
// Redirections
TOKEN_REDIR_LEFT,
TOKEN_REDIR_RIGHT,
TOKEN_REDIR_LEFT_RIGHT,
TOKEN_REDIR_DOUBLE_RIGHT,
TOKEN_REDIR_LEFT_AMP,
TOKEN_REDIR_RIGHT_AMP,
TOKEN_REDIR_RIGHT_PIPE,
TOKEN_IONUMBER,
// Keywords
TOKEN_IF,
TOKEN_THEN,
TOKEN_ELSE,
TOKEN_FI,
TOKEN_ELIF,
TOKEN_AND,
TOKEN_OR,
TOKEN_FOR,
TOKEN_WHILE,
TOKEN_UNTIL,
TOKEN_CASE,
TOKEN_EXPORT
};
struct token
{
enum token_type type;
char *data;
};
// used to give info from lexing when creating a new token.
struct token_info
{
// usefull to detect IO numbers.
// tells us if we only lexed digits in current token.
bool only_digits;
// usefull to detect assignments, and syntax errors with '='.
bool has_equal;
};
/* @return: true if a special character from the grammar was found at stream[i],
* false otherwise.
*/
bool is_special_char(char *stream, ssize_t i);
/* @brief: return a newly allocated token, with the type corresponding
* to the info given in arguments.
* The data contains [size] char, starting from [begin].
*
* @return: NULL on error, a token otherwise.
*/
struct token *new_token(char *begin, ssize_t size, struct token_info *info);
/* @brief: frees the token given in argument
*/
void free_token(struct token **tok);
/*
* @brief: checks if the stream used for the last token creation is empty.
* If it is, it calls stream_read() from IO_backend,
* and sets [remaining_chars].
* If not, it starts from the end of the last token.
* Also trims left blanks before returning.
*
* @return: char* stream from which we tokenise.
*/
void stream_init(struct lexer_context *ctx);
/* @brief: finds the next '\n' or EOF character,
* starting at [ctx->end_previous_token],
* and updates the stream and remaining_chars accordingly.
*
* @note: Daft Punk. bang.
*/
void go_end_of_line(struct lexer_context *ctx);
/* @brief: this function is called when we found a special character
* in the stream. This can either be an operator (ig '>>' or '<&' etc),
* or a special char (ig '\' or '#' etc).
* @return: the length of the operator/special char found (can be 1, 2 or 3).
* -1 on error.
*/
ssize_t len_op_sepchar(char *stream, ssize_t i);
/* @brief: drops the current stream and asks IOB for a new one
*/
void get_next_stream(struct lexer_context *ctx);
#endif /* LEXER_UTILS_H */