diff --git a/src/lexer/Makefile.am b/src/lexer/Makefile.am index dd11411..4dad036 100644 --- a/src/lexer/Makefile.am +++ b/src/lexer/Makefile.am @@ -2,7 +2,7 @@ lib_LIBRARIES = liblexer.a liblexer_a_SOURCES = \ lexer.c \ - lexer.h + lexer_utils.c liblexer_a_CPPFLAGS = -I$(top_srcdir)/src diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9097d9e..b6ea0ad 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -7,10 +7,21 @@ #include #include +#include "lexer_utils.h" #include "../io_backend/io_backend.h" #include "../utils/string_utils/string_utils.h" -// ######## STATIC FUNCTIONS ############## +/* @return: true if a special character from the grammar was found, + * false otherwise. + */ +static bool is_special_char(char c) +{ + if (c == EOF) + return true; + + char special_chars[] = "\n'\"`;#|&\\$(){}<>*"; + return strchr(special_chars, c) != NULL; +} /* @brief: sets the ctx->current_token to [tok]. * this function is called by token_peek(). @@ -29,7 +40,6 @@ static void update_previous_token(struct token *tok, struct lexer_context *ctx) free_token(&ctx->previous_token); ctx->previous_token = tok; } - /* @brief: updates the current position in the stream. * [stream] += [i] * Also frees the last sent token, and sets it to ctx->current_token. @@ -45,194 +55,6 @@ static void save_state(char *stream, ssize_t i, struct lexer_context *ctx) update_current_token(NULL, ctx); } -/* @return: true if a special character from the grammar was found, - * false otherwise. - */ -static bool is_special_char(char c) -{ - if (c == EOF) - return true; - - char special_chars[] = "\n'\"`;#|&\\$(){}<>*"; - return strchr(special_chars, c) != NULL; -} - -/* @brief: if a special character is found at [begin], - * [tok->token_type] is set accordingly - */ -static void set_token_spechar(struct token *tok, char *begin, ssize_t size) -{ - if (size != 1) - return; - switch (begin[0]) - { - case EOF: - tok->type = TOKEN_EOF; - break; - case ';': - tok->type = TOKEN_SEMICOLON; - break; - case '\n': - tok->type = TOKEN_NEWLINE; - break; - case '\'': - tok->type = TOKEN_QUOTE; - break; - case '"': - tok->type = TOKEN_DOUBLE_QUOTE; - break; - case '`': - tok->type = TOKEN_GRAVE; - break; - case '#': - tok->type = TOKEN_COMMENT; - break; - case '|': - tok->type = TOKEN_PIPE; - break; - case '&': - tok->type = TOKEN_AMPERSAND; - break; - case '\\': - tok->type = TOKEN_BACKSLASH; - break; - case '$': - tok->type = TOKEN_DOLLAR; - break; - case '(': - tok->type = TOKEN_LEFT_PAREN; - break; - case ')': - tok->type = TOKEN_RIGHT_PAREN; - break; - case '{': - tok->type = TOKEN_LEFT_BRACKET; - break; - case '}': - tok->type = TOKEN_RIGHT_BRACKET; - break; - case '<': - tok->type = TOKEN_LESS; - break; - case '>': - tok->type = TOKEN_GREATER; - break; - case '*': - tok->type = TOKEN_STAR; - break; - default: - break; - } -} - -/* @brief: if a keyword is found at [begin], - * [tok->token_type] is set accordingly - */ -static void set_token_keyword(struct token *tok, char *begin, ssize_t size) -{ - if (tok->type != TOKEN_NULL || size == 0) - return; - if (strncmp(begin, "if", size) == 0) - { - tok->type = TOKEN_IF; - } - else if (strncmp(begin, "fi", size) == 0) - { - tok->type = TOKEN_FI; - } - else if (strncmp(begin, "then", size) == 0) - { - tok->type = TOKEN_THEN; - } - else if (strncmp(begin, "else", size) == 0) - { - tok->type = TOKEN_ELSE; - } - else if (strncmp(begin, "elif", size) == 0) - { - tok->type = TOKEN_ELIF; - } - - // no keywords found. - if (tok->type == TOKEN_NULL) - return; - - tok->data = calloc(size + 1, sizeof(char)); - if (tok->data == NULL) - return; - strncpy(tok->data, begin, size); -} - -/* @brief: if token_type has not yet been set, then it is a TOKEN_WORD - * Also allocates the data and fills it. - */ -static void set_token_word(struct token *tok, char *begin, ssize_t size) -{ - if (tok->type == TOKEN_NULL && size != 0) - { - tok->type = TOKEN_WORD; - tok->data = calloc(size + 1, sizeof(char)); - if (tok->data == NULL) - return; - strncpy(tok->data, begin, size); - } -} - -void destroy_lexer_context(struct lexer_context **ctx) -{ - if (ctx == NULL || *ctx == NULL) - return; - if ((*ctx)->previous_token != NULL) - free((*ctx)->previous_token); - if ((*ctx)->current_token != NULL) - free((*ctx)->current_token); - free(*ctx); - *ctx = NULL; -} - -struct token *new_token(char *begin, ssize_t size) -{ - struct token *tok = calloc(1, sizeof(struct token)); - if (tok == NULL) - return NULL; - - set_token_spechar(tok, begin, size); - set_token_keyword(tok, begin, size); - set_token_word(tok, begin, size); - - return tok; -} - -void free_token(struct token **tok) -{ - if (tok == NULL || *tok == NULL) - return; - if ((*tok)->data != NULL) - free((*tok)->data); - free(*tok); - *tok = NULL; -} - -char *stream_init(struct lexer_context *ctx) -{ - char *stream; - - if (ctx->previous_token == NULL) // at the begining - { - ctx->remaining_chars = stream_read(&stream); - } - else - { - stream = ctx->end_previous_token; - } - - char *trimed_stream = trim_blank_left(stream); - ctx->remaining_chars -= trimed_stream - stream; - stream = trimed_stream; - - return stream; -} - /* * @brief: Updates the lexing_mode to LEXER_NORMAL * if the SECOND quote is found at stream[i]. @@ -268,6 +90,7 @@ static bool update_lexing_mode(char *stream, ssize_t i, return *lexing_mode != mode_before_update; } + struct token *peek_token(struct lexer_context *ctx) { // we already created the upcoming token during the previous call to peek() @@ -311,6 +134,12 @@ struct token *peek_token(struct lexer_context *ctx) } struct token *tok = new_token(stream, i); + // if token is comment, we don't want it + + if (tok->type == TOKEN_COMMENT) + { + // tok = peek_token(); + } update_current_token(tok, ctx); return tok; } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 50003b2..0d9ed32 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -1,69 +1,9 @@ #ifndef LEXER_H #define LEXER_H +#include "lexer_utils.h" + #include - -struct lexer_context -{ - char *end_previous_token; - ssize_t remaining_chars; - - struct token *previous_token; - struct token *current_token; -}; - -/* @brief: frees all fields of ctx and sets ctx to NULL. - */ -void destroy_lexer_context(struct lexer_context **ctx); - -enum lexing_mode -{ - LEXER_NORMAL, - LEXER_QUOTE, - LEXER_DOUBLE_QUOTE -}; - -enum token_type -{ - // Special characters - TOKEN_NULL = 0, - TOKEN_EOF, - TOKEN_WORD, - TOKEN_NEWLINE, - - // WARNING: quote and double quote should never be used inside a token. - TOKEN_QUOTE, - TOKEN_DOUBLE_QUOTE, - - TOKEN_GRAVE, - TOKEN_SEMICOLON, - TOKEN_COMMENT, - TOKEN_PIPE, - TOKEN_AMPERSAND, - TOKEN_BACKSLASH, - TOKEN_DOLLAR, - TOKEN_LEFT_PAREN, - TOKEN_RIGHT_PAREN, - TOKEN_LEFT_BRACKET, - TOKEN_RIGHT_BRACKET, - TOKEN_LESS, - TOKEN_GREATER, - TOKEN_STAR, - - // Keywords - TOKEN_IF, - TOKEN_THEN, - TOKEN_ELSE, - TOKEN_FI, - TOKEN_ELIF -}; - -struct token -{ - enum token_type type; - char *data; -}; - /* * @brief: returns the next (newly allocated) token without consuming it. * if end of input is reached, returns a token of type TOKEN_EOF. @@ -88,27 +28,4 @@ struct token *pop_token(struct lexer_context *ctx); struct token *get_token_str(void); -/* - * @brief: return a newly allocated token, with the corresponding type. - * The data contains [size] char, starting from [begin]. - * - * @return: NULL on error, a token otherwise. - */ -struct token *new_token(char *begin, ssize_t size); - -/* @brief: frees the token given in argument - */ -void free_token(struct token **tok); - -/* - * @brief: checks if the stream used for the last token creation is empty. - * If it is, it calls stream_read() from IO_backend, - * and sets [remaing_chars]. - * If not, it starts from the end of the last token. - * Also trims left blanks before returning. - * - * @return: char* stream from which we tokenise. - */ -char *stream_init(struct lexer_context *ctx); - #endif /* ! LEXER_H */ diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c new file mode 100644 index 0000000..a956cb9 --- /dev/null +++ b/src/lexer/lexer_utils.c @@ -0,0 +1,183 @@ +#include "lexer_utils.h" + +#include +#include + +#include "../io_backend/io_backend.h" +#include "../utils/string_utils/string_utils.h" + +/* @brief: if a special character is found at [begin], + * [tok->token_type] is set accordingly + */ +static void set_token_spechar(struct token *tok, char *begin, ssize_t size) +{ + if (size != 1) + return; + switch (begin[0]) + { + case EOF: + tok->type = TOKEN_EOF; + break; + case ';': + tok->type = TOKEN_SEMICOLON; + break; + case '\n': + tok->type = TOKEN_NEWLINE; + break; + case '\'': + tok->type = TOKEN_QUOTE; + break; + case '"': + tok->type = TOKEN_DOUBLE_QUOTE; + break; + case '`': + tok->type = TOKEN_GRAVE; + break; + case '#': + tok->type = TOKEN_COMMENT; + break; + case '|': + tok->type = TOKEN_PIPE; + break; + case '&': + tok->type = TOKEN_AMPERSAND; + break; + case '\\': + tok->type = TOKEN_BACKSLASH; + break; + case '$': + tok->type = TOKEN_DOLLAR; + break; + case '(': + tok->type = TOKEN_LEFT_PAREN; + break; + case ')': + tok->type = TOKEN_RIGHT_PAREN; + break; + case '{': + tok->type = TOKEN_LEFT_BRACKET; + break; + case '}': + tok->type = TOKEN_RIGHT_BRACKET; + break; + case '<': + tok->type = TOKEN_LESS; + break; + case '>': + tok->type = TOKEN_GREATER; + break; + case '*': + tok->type = TOKEN_STAR; + break; + default: + break; + } +} + +/* @brief: if a keyword is found at [begin], + * [tok->token_type] is set accordingly + */ +static void set_token_keyword(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type != TOKEN_NULL || size == 0) + return; + if (strncmp(begin, "if", size) == 0) + { + tok->type = TOKEN_IF; + } + else if (strncmp(begin, "fi", size) == 0) + { + tok->type = TOKEN_FI; + } + else if (strncmp(begin, "then", size) == 0) + { + tok->type = TOKEN_THEN; + } + else if (strncmp(begin, "else", size) == 0) + { + tok->type = TOKEN_ELSE; + } + else if (strncmp(begin, "elif", size) == 0) + { + tok->type = TOKEN_ELIF; + } + + // no keywords found. + if (tok->type == TOKEN_NULL) + return; + + tok->data = calloc(size + 1, sizeof(char)); + if (tok->data == NULL) + return; + strncpy(tok->data, begin, size); +} + +/* @brief: if token_type has not yet been set, then it is a TOKEN_WORD + * Also allocates the data and fills it. + */ +static void set_token_word(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type == TOKEN_NULL && size != 0) + { + tok->type = TOKEN_WORD; + tok->data = calloc(size + 1, sizeof(char)); + if (tok->data == NULL) + return; + strncpy(tok->data, begin, size); + } +} + +struct token *new_token(char *begin, ssize_t size) +{ + struct token *tok = calloc(1, sizeof(struct token)); + if (tok == NULL) + return NULL; + + set_token_spechar(tok, begin, size); + set_token_keyword(tok, begin, size); + set_token_word(tok, begin, size); + + return tok; +} + +void destroy_lexer_context(struct lexer_context **ctx) +{ + if (ctx == NULL || *ctx == NULL) + return; + if ((*ctx)->previous_token != NULL) + free((*ctx)->previous_token); + if ((*ctx)->current_token != NULL) + free((*ctx)->current_token); + free(*ctx); + *ctx = NULL; +} + +void free_token(struct token **tok) +{ + if (tok == NULL || *tok == NULL) + return; + if ((*tok)->data != NULL) + free((*tok)->data); + free(*tok); + *tok = NULL; +} + +char *stream_init(struct lexer_context *ctx) +{ + char *stream; + + if (ctx->previous_token == NULL) // at the begining + { + ctx->remaining_chars = stream_read(&stream); + } + else + { + stream = ctx->end_previous_token; + } + + char *trimed_stream = trim_blank_left(stream); + ctx->remaining_chars -= trimed_stream - stream; + stream = trimed_stream; + + return stream; +} diff --git a/src/lexer/lexer_utils.h b/src/lexer/lexer_utils.h new file mode 100644 index 0000000..503c1ec --- /dev/null +++ b/src/lexer/lexer_utils.h @@ -0,0 +1,91 @@ +#ifndef LEXER_UTILS_H +#define LEXER_UTILS_H + +#include +#include + +struct lexer_context +{ + char *end_previous_token; + ssize_t remaining_chars; + + struct token *previous_token; + struct token *current_token; +}; + +/* @brief: frees all fields of ctx and sets ctx to NULL. + */ +void destroy_lexer_context(struct lexer_context **ctx); + +enum lexing_mode +{ + LEXER_NORMAL, + LEXER_QUOTE, + LEXER_DOUBLE_QUOTE +}; + +enum token_type +{ + // Special characters + TOKEN_NULL = 0, + TOKEN_EOF, + TOKEN_WORD, + TOKEN_NEWLINE, + + // WARNING: quote and double quote should never be used inside a token. + TOKEN_QUOTE, + TOKEN_DOUBLE_QUOTE, + + TOKEN_GRAVE, + TOKEN_SEMICOLON, + TOKEN_COMMENT, + TOKEN_PIPE, + TOKEN_AMPERSAND, + TOKEN_BACKSLASH, + TOKEN_DOLLAR, + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACKET, + TOKEN_RIGHT_BRACKET, + TOKEN_LESS, + TOKEN_GREATER, + TOKEN_STAR, + + // Keywords + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELSE, + TOKEN_FI, + TOKEN_ELIF +}; + +struct token +{ + enum token_type type; + char *data; +}; + +/* + * @brief: return a newly allocated token, with the corresponding type. + * The data contains [size] char, starting from [begin]. + * + * @return: NULL on error, a token otherwise. + */ +struct token *new_token(char *begin, ssize_t size); + +/* @brief: frees the token given in argument + */ +void free_token(struct token **tok); + +/* + * @brief: checks if the stream used for the last token creation is empty. + * If it is, it calls stream_read() from IO_backend, + * and sets [remaing_chars]. + * If not, it starts from the end of the last token. + * Also trims left blanks before returning. + * + * @return: char* stream from which we tokenise. + */ +char *stream_init(struct lexer_context *ctx); + +#endif /* LEXER_UTILS_H */