From 204bb515afab0e337e53140a075eb7c43a7c0109 Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Tue, 13 Jan 2026 19:44:56 +0100 Subject: [PATCH 1/3] feat(lexer): rework using a struct token and functions associated - unstable --- src/lexer/lexer.c | 52 ++++++++++++++++++++++++----------------------- src/lexer/lexer.h | 42 ++++++++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 4e246e2..08dd74a 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -28,31 +28,43 @@ static void save_state(char *stream, ssize_t i) */ static bool is_special_char(char c) { - return c == '\'' || c == '\n' || c == ';'; + return c == '\'' || c == '\n' || c == ';' || c == 'EOF'; } -/* @return: true if a keyword from the grammar was found, false otherwise. - * - */ -static bool is_keyword(char *stream, ssize_t i) +static void new_token_keyword(struct token *tok, char *begin, ssize_t size) { - if (i == 2) + if (strncmp(begin, "if", size) == 0) { - return strcmp(stream, "if") == 0 || strcmp(stream, "fi") == 0; + tok->type = TOKEN_IF; } - if (i == 4) + if (strncmp(begin, "fi", size) == 0) { - return strcmp(stream, "then") || strcmp(stream, "else") - || strcmp(stream, "elif"); + tok->type = TOKEN_FI; + } + if (strncmp(begin, "then", size) == 0) + { + tok->type = TOKEN_THEN; + } + if (strncmp(begin, "else", size) == 0) + { + tok->type = TOKEN_ELSE; } - return false; } -char *new_token(char *begin, ssize_t size) +struct token *new_token(char *begin, ssize_t size) { - char *res = calloc(size + 1, sizeof(char)); + struct token *res = calloc(1, sizeof(struct token)); if (res == NULL) return NULL; + + // checks which type of token + + // is special char + + // is keyword + + // otherwise -> WORD + char *token_data = calloc(size + 1, sizeof(char)); strncpy(res, begin, size); return res; } @@ -77,7 +89,7 @@ char *stream_init(void) return stream; } -char *peek_token(void) +struct token *peek_token(void) { char *stream = stream_init(); @@ -95,18 +107,13 @@ char *peek_token(void) { break; } - else if (is_keyword(stream, i)) - { - i++; - break; - } i++; } return new_token(stream, i); } -char *pop_token(void) +struct token *pop_token(void) { char *stream = stream_init(); @@ -124,11 +131,6 @@ char *pop_token(void) { break; } - else if (is_keyword(stream, i)) - { - i++; - break; - } i++; } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index b1b8827..a4d9e13 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -3,19 +3,38 @@ #include +enum token_type +{ + TOKEN_EOF, + TOKEN_WORD, + TOKEN_NEWLINE, + TOKEN_QUOTE, + TOKEN_SEMICOLON, + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELSE, + TOKEN_FI +}; + +struct token +{ + enum token_type type; + char* data; +}; + /* * @brief: returns the next (newly allocated) token without consuming it. - * if end of input is reached, returns EOF. + * if end of input is reached, returns a token of type TOKEN_EOF. * */ -char *peek_token(void); +struct token *peek_token(void); /* * @brief: returns the next (newly allocated) token and consumes it. - * if end of input is reached, returns EOF. + * if end of input is reached, returns a token of type TOKEN_EOF. * */ -char *pop_token(void); +struct token *pop_token(void); /* * @warning: NOT IMPLEMENTED. @@ -23,16 +42,21 @@ char *pop_token(void); * @note: maybe usefull for subshells. */ -char *get_token_str(void); +struct token *get_token_str(void); /* - * @brief: return a newly allocated token. - * This token contains [size] chars, starting from [begin]. + * @brief: return a newly allocated token, with the corresponding type. + * The data contains [size] char, starting from [begin]. * - * @return: NULL on error, null-terminated char* otherwise. + * @return: NULL on error, a token otherwise. * */ -char *new_token(char *begin, ssize_t size); +struct token *new_token(char *begin, ssize_t size); + +/* @brief: frees the token given in argument + * + */ +void free_token(struct token* tok); /* * @brief: checks if the stream used for the last token creation is empty. From 8262fdece89edf72eef760a8834b7d269bc40f34 Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Tue, 13 Jan 2026 22:00:02 +0100 Subject: [PATCH 2/3] feat(lexer): rework with struct token done --- src/lexer/lexer.c | 78 ++++++++++++++++++++++++++++++++++++++--------- src/lexer/lexer.h | 1 + 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 08dd74a..d42df48 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -31,42 +31,92 @@ static bool is_special_char(char c) return c == '\'' || c == '\n' || c == ';' || c == 'EOF'; } -static void new_token_keyword(struct token *tok, char *begin, ssize_t size) + +/* @brief: if a special character is found at [begin], + * [tok->token_type] is set accordingly + * + */ +static void set_token_spechar(struct token *tok, char *begin, ssize_t size) { + if (size != 1) + return; + if (begin[0] == 'EOF') + { + tok->type = TOKEN_EOF; + } + else if (begin[0] == ';') + { + tok->type = TOKEN_NEWLINE; + } + else if (begin[0] == '\'') + { + tok->type = TOKEN_QUOTE; + } + else if (begin[0] == ';') + { + tok->type = TOKEN_SEMICOLON; + } +} + + +/* @brief: if a keyword is found at [begin], + * [tok->token_type] is set accordingly + * + */ +static void set_token_keyword(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type != TOKEN_NULL) + return; if (strncmp(begin, "if", size) == 0) { tok->type = TOKEN_IF; } - if (strncmp(begin, "fi", size) == 0) + else if (strncmp(begin, "fi", size) == 0) { tok->type = TOKEN_FI; } - if (strncmp(begin, "then", size) == 0) + else if (strncmp(begin, "then", size) == 0) { tok->type = TOKEN_THEN; } - if (strncmp(begin, "else", size) == 0) + else if (strncmp(begin, "else", size) == 0) { tok->type = TOKEN_ELSE; } } +/* @brief: if token_type has not yet been set, then it is a TOKEN_WORD + * Also allocates the data and fills it. + */ +static void set_token_word(struct token *tok, char *begin, ssize_t size) +{ + if (tok->token_type == TOKEN_NULL) + { + char *token_data = calloc(size + 1, sizeof(char)); + strncpy(res, begin, size); + } +} + struct token *new_token(char *begin, ssize_t size) { - struct token *res = calloc(1, sizeof(struct token)); - if (res == NULL) + struct token *tok = calloc(1, sizeof(struct token)); + if (tok == NULL) return NULL; - // checks which type of token + set_token_spechar(tok, begin, size); + set_token_keyword(tok, begin, size); + set_token_word(tok, begin, size); - // is special char + return tok; +} - // is keyword - - // otherwise -> WORD - char *token_data = calloc(size + 1, sizeof(char)); - strncpy(res, begin, size); - return res; +void free_token(struct token *tok) +{ + if (tok == NULL) + return; + if (tok->data != NULL) + free(tok->data); + free(tok); } char *stream_init(void) diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index a4d9e13..fe7a024 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -5,6 +5,7 @@ enum token_type { + TOKEN_NULL = 0, TOKEN_EOF, TOKEN_WORD, TOKEN_NEWLINE, From 58fd9d530e64b0abed2660bc398207d7573c8f7c Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Tue, 13 Jan 2026 22:07:40 +0100 Subject: [PATCH 3/3] fix(lexer): includes + typo + clang format --- src/lexer/lexer.c | 15 ++++++++------- src/lexer/lexer.h | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index d42df48..00c8fa6 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,9 @@ static void save_state(char *stream, ssize_t i) */ static bool is_special_char(char c) { - return c == '\'' || c == '\n' || c == ';' || c == 'EOF'; + return c == '\'' || c == '\n' || c == ';' || c == EOF; } - /* @brief: if a special character is found at [begin], * [tok->token_type] is set accordingly * @@ -40,7 +40,7 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) { if (size != 1) return; - if (begin[0] == 'EOF') + if (begin[0] == EOF) { tok->type = TOKEN_EOF; } @@ -58,7 +58,6 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) } } - /* @brief: if a keyword is found at [begin], * [tok->token_type] is set accordingly * @@ -90,10 +89,12 @@ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) */ static void set_token_word(struct token *tok, char *begin, ssize_t size) { - if (tok->token_type == TOKEN_NULL) + if (tok->type == TOKEN_NULL) { - char *token_data = calloc(size + 1, sizeof(char)); - strncpy(res, begin, size); + tok->data = calloc(size + 1, sizeof(char)); + if (tok->data == NULL) + return; + strncpy(tok->data, begin, size); } } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index fe7a024..9d9ea06 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -20,7 +20,7 @@ enum token_type struct token { enum token_type type; - char* data; + char *data; }; /* @@ -57,7 +57,7 @@ struct token *new_token(char *begin, ssize_t size); /* @brief: frees the token given in argument * */ -void free_token(struct token* tok); +void free_token(struct token *tok); /* * @brief: checks if the stream used for the last token creation is empty.