diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 4e246e2..00c8fa6 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,33 +29,95 @@ static void save_state(char *stream, ssize_t i) */ static bool is_special_char(char c) { - return c == '\'' || c == '\n' || c == ';'; + return c == '\'' || c == '\n' || c == ';' || c == EOF; } -/* @return: true if a keyword from the grammar was found, false otherwise. +/* @brief: if a special character is found at [begin], + * [tok->token_type] is set accordingly * */ -static bool is_keyword(char *stream, ssize_t i) +static void set_token_spechar(struct token *tok, char *begin, ssize_t size) { - if (i == 2) + if (size != 1) + return; + if (begin[0] == EOF) { - return strcmp(stream, "if") == 0 || strcmp(stream, "fi") == 0; + tok->type = TOKEN_EOF; } - if (i == 4) + else if (begin[0] == ';') { - return strcmp(stream, "then") || strcmp(stream, "else") - || strcmp(stream, "elif"); + tok->type = TOKEN_NEWLINE; + } + else if (begin[0] == '\'') + { + tok->type = TOKEN_QUOTE; + } + else if (begin[0] == ';') + { + tok->type = TOKEN_SEMICOLON; } - return false; } -char *new_token(char *begin, ssize_t size) +/* @brief: if a keyword is found at [begin], + * [tok->token_type] is set accordingly + * + */ +static void set_token_keyword(struct token *tok, char *begin, ssize_t size) { - char *res = calloc(size + 1, sizeof(char)); - if (res == NULL) + if (tok->type != TOKEN_NULL) + return; + if (strncmp(begin, "if", size) == 0) + { + tok->type = TOKEN_IF; + } + else if (strncmp(begin, "fi", size) == 0) + { + tok->type = TOKEN_FI; + } + else if (strncmp(begin, "then", size) == 0) + { + tok->type = TOKEN_THEN; + } + else if (strncmp(begin, "else", size) == 0) + { + tok->type = TOKEN_ELSE; + } +} + +/* @brief: if token_type has not yet been set, then it is a TOKEN_WORD + * Also allocates the data and fills it. + */ +static void set_token_word(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type == TOKEN_NULL) + { + tok->data = calloc(size + 1, sizeof(char)); + if (tok->data == NULL) + return; + strncpy(tok->data, begin, size); + } +} + +struct token *new_token(char *begin, ssize_t size) +{ + struct token *tok = calloc(1, sizeof(struct token)); + if (tok == NULL) return NULL; - strncpy(res, begin, size); - return res; + + set_token_spechar(tok, begin, size); + set_token_keyword(tok, begin, size); + set_token_word(tok, begin, size); + + return tok; +} + +void free_token(struct token *tok) +{ + if (tok == NULL) + return; + if (tok->data != NULL) + free(tok->data); + free(tok); } char *stream_init(void) @@ -77,7 +140,7 @@ char *stream_init(void) return stream; } -char *peek_token(void) +struct token *peek_token(void) { char *stream = stream_init(); @@ -95,18 +158,13 @@ char *peek_token(void) { break; } - else if (is_keyword(stream, i)) - { - i++; - break; - } i++; } return new_token(stream, i); } -char *pop_token(void) +struct token *pop_token(void) { char *stream = stream_init(); @@ -124,11 +182,6 @@ char *pop_token(void) { break; } - else if (is_keyword(stream, i)) - { - i++; - break; - } i++; } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index b1b8827..9d9ea06 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -3,19 +3,39 @@ #include +enum token_type +{ + TOKEN_NULL = 0, + TOKEN_EOF, + TOKEN_WORD, + TOKEN_NEWLINE, + TOKEN_QUOTE, + TOKEN_SEMICOLON, + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELSE, + TOKEN_FI +}; + +struct token +{ + enum token_type type; + char *data; +}; + /* * @brief: returns the next (newly allocated) token without consuming it. - * if end of input is reached, returns EOF. + * if end of input is reached, returns a token of type TOKEN_EOF. * */ -char *peek_token(void); +struct token *peek_token(void); /* * @brief: returns the next (newly allocated) token and consumes it. - * if end of input is reached, returns EOF. + * if end of input is reached, returns a token of type TOKEN_EOF. * */ -char *pop_token(void); +struct token *pop_token(void); /* * @warning: NOT IMPLEMENTED. @@ -23,16 +43,21 @@ char *pop_token(void); * @note: maybe usefull for subshells. */ -char *get_token_str(void); +struct token *get_token_str(void); /* - * @brief: return a newly allocated token. - * This token contains [size] chars, starting from [begin]. + * @brief: return a newly allocated token, with the corresponding type. + * The data contains [size] char, starting from [begin]. * - * @return: NULL on error, null-terminated char* otherwise. + * @return: NULL on error, a token otherwise. * */ -char *new_token(char *begin, ssize_t size); +struct token *new_token(char *begin, ssize_t size); + +/* @brief: frees the token given in argument + * + */ +void free_token(struct token *tok); /* * @brief: checks if the stream used for the last token creation is empty.