#ifndef LEXER_UTILS_H #define LEXER_UTILS_H #include #include #include struct lexer_context { char *end_previous_token; ssize_t remaining_chars; struct token *previous_token; struct token *current_token; }; /* @brief: frees all fields of ctx and sets ctx to NULL. */ void destroy_lexer_context(struct lexer_context **ctx); enum lexing_mode { LEXER_NORMAL, LEXER_QUOTE, LEXER_DOUBLE_QUOTE }; enum token_type { // Blanks TOKEN_NULL = 0, TOKEN_EOF, TOKEN_NEWLINE, // words TOKEN_WORD, TOKEN_ASSIGNMENT_WORD, // Special characters TOKEN_GRAVE, TOKEN_SEMICOLON, TOKEN_COMMENT, TOKEN_STAR, TOKEN_BACKSLASH, TOKEN_DOLLAR, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET, TOKEN_PIPE, TOKEN_NEGATION, // Redirections TOKEN_REDIR_LEFT, TOKEN_REDIR_RIGHT, TOKEN_REDIR_LEFT_RIGHT, TOKEN_REDIR_DOUBLE_RIGHT, TOKEN_REDIR_LEFT_AMP, TOKEN_REDIR_RIGHT_AMP, TOKEN_REDIR_RIGHT_PIPE, TOKEN_IONUMBER, // Keywords TOKEN_IF, TOKEN_THEN, TOKEN_ELSE, TOKEN_FI, TOKEN_ELIF, TOKEN_AND, TOKEN_OR, TOKEN_FOR, TOKEN_WHILE, TOKEN_UNTIL, TOKEN_CASE, TOKEN_EXPORT }; struct token { enum token_type type; char *data; }; // used to give info from lexing when creating a new token. struct token_info { // usefull to detect IO numbers. // tells us if we only lexed digits in current token. bool only_digits; // usefull to detect assignments, and syntax errors with '='. bool has_equal; }; /* @return: true if a special character from the grammar was found at stream[i], * false otherwise. */ bool is_special_char(char *stream, ssize_t i); /* @brief: return a newly allocated token, with the type corresponding * to the info given in arguments. * The data contains [size] char, starting from [begin]. * * @return: NULL on error, a token otherwise. */ struct token *new_token(char *begin, ssize_t size, struct token_info *info); /* @brief: frees the token given in argument */ void free_token(struct token **tok); /* * @brief: checks if the stream used for the last token creation is empty. * If it is, it calls stream_read() from IO_backend, * and sets [remaining_chars]. * If not, it starts from the end of the last token. * Also trims left blanks before returning. * * @return: char* stream from which we tokenise. */ void stream_init(struct lexer_context *ctx); /* @brief: finds the next '\n' or EOF character, * starting at [ctx->end_previous_token], * and updates the stream and remaining_chars accordingly. * * @note: Daft Punk. bang. */ void go_end_of_line(struct lexer_context *ctx); /* @brief: this function is called when we found a special character * in the stream. This can either be an operator (ig '>>' or '<&' etc), * or a special char (ig '\' or '#' etc). * @return: the length of the operator/special char found (can be 1, 2 or 3). * -1 on error. */ ssize_t len_op_sepchar(char *stream, ssize_t i); /* @brief: drops the current stream and asks IOB for a new one */ void get_next_stream(struct lexer_context *ctx); #endif /* LEXER_UTILS_H */