From 10ce140e37838bcfa1278434ae8675d5598e9a5e Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Fri, 16 Jan 2026 19:31:58 +0100 Subject: [PATCH] fix(parser + lexer): interaction -- WIP --- src/lexer/lexer.c | 65 ++++++++++++++++++++++++++++++-------- src/lexer/lexer.h | 9 ++++-- src/parser/parser.c | 34 ++++++++++++-------- src/parser/parser.h | 18 +++++++++++ src/parser/parsing_utils.c | 21 ++---------- 5 files changed, 99 insertions(+), 48 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 852bd2d..68366cf 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -13,16 +13,40 @@ static char *end_last_token; static ssize_t remaining_chars; static bool at_beginning = true; +static struct token *last_token; +static struct token *current_token; -/* @brief: saves state for the next call the the lexer. + +/* @brief: sets the current_token to [tok]. + * this function is called by token_peek(). + */ +static void update_current_token(struct token* tok) +{ + current_token = tok; +} + +/* @brief: frees the last token and sets it to [tok]. + * Also sets current_token to NULL. + * this function is called by token_pop(). * */ -static void save_state(char *stream, ssize_t i) +static void update_last_token(struct token* tok) +{ + free_token(&last_token); + last_token = tok; +} + +/* @brief: saves state for the next call to the the lexer. + * this function is called by token_pop(). + * + */ +static void save_state(char *stream, ssize_t i, struct token *tok) { remaining_chars -= i; end_last_token = stream + i; at_beginning = false; - return; + + update_last_token(tok); } /* @return: true if a special character from the grammar was found, @@ -67,7 +91,7 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) */ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) { - if (tok->type != TOKEN_NULL) + if (tok->type != TOKEN_NULL || size == 0) return; if (strncmp(begin, "if", size) == 0) { @@ -101,7 +125,7 @@ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) */ static void set_token_word(struct token *tok, char *begin, ssize_t size) { - if (tok->type == TOKEN_NULL) + if (tok->type == TOKEN_NULL && size != 0) { tok->type = TOKEN_WORD; tok->data = calloc(size + 1, sizeof(char)); @@ -124,13 +148,14 @@ struct token *new_token(char *begin, ssize_t size) return tok; } -void free_token(struct token *tok) +void free_token(struct token **tok) { - if (tok == NULL) + if (tok == NULL || *tok == NULL) return; - if (tok->data != NULL) - free(tok->data); - free(tok); + if ((*tok)->data != NULL) + free((*tok)->data); + free(*tok); + *tok = NULL; } char *stream_init(void) @@ -156,6 +181,12 @@ char *stream_init(void) struct token *peek_token(void) { + // EOF looping mode + if (current_token != NULL && current_token->type == TOKEN_EOF) + { + return current_token; + } + char *stream = stream_init(); ssize_t i = 0; @@ -175,11 +206,18 @@ struct token *peek_token(void) i++; } - return new_token(stream, i); + struct token *tok = new_token(stream, i); + update_current_token(tok); + return tok; } struct token *pop_token(void) { + if (last_token != NULL && last_token->type == TOKEN_EOF) + { + free_token(&last_token); + return NULL; + } char *stream = stream_init(); ssize_t i = 0; @@ -199,7 +237,8 @@ struct token *pop_token(void) i++; } - save_state(stream, i); + struct token *tok = new_token(stream, i); + save_state(stream, i, tok); - return new_token(stream, i); + return tok; } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 332355f..1b46523 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -26,7 +26,8 @@ struct token /* * @brief: returns the next (newly allocated) token without consuming it. - * if end of input is reached, returns a token of type TOKEN_EOF. + * if end of input is reached, enters in EOF looping node, + * returning only the same token of type TOKEN_EOF. * */ struct token *peek_token(void); @@ -34,6 +35,10 @@ struct token *peek_token(void); /* * @brief: returns the next (newly allocated) token and consumes it. * if end of input is reached, returns a token of type TOKEN_EOF. + * It also frees the last token created if there was one. + * @warning: if the last returned token was a token EOF, it frees it + * and returns NULL. This means that after peeking a token EOF + * in the parser, there must be EXACTLY ONE call to pop_token(). * */ struct token *pop_token(void); @@ -58,7 +63,7 @@ struct token *new_token(char *begin, ssize_t size); /* @brief: frees the token given in argument * */ -void free_token(struct token *tok); +void free_token(struct token **tok); /* * @brief: checks if the stream used for the last token creation is empty. diff --git a/src/parser/parser.c b/src/parser/parser.c index 9d05701..b9c07e7 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -17,33 +17,39 @@ struct ast *get_ast() { - struct list *result_list = NULL; - struct ast *current_node = NULL; + // struct list *result_list = NULL; + // struct ast *current_node = NULL; - struct token *token = peek_token(); + struct token *token = PEEK_TOKEN(); - while (token != NULL && token->type != TOKEN_EOF) + if (token->type == TOKEN_EOF) { - switch (token->type) - { - case TOKEN_WORD: + token = pop_token(); + // TODO + // return ast END. + } + else if (token->type == TOKEN_NEWLINE) + { + token = pop_token(); + // TODO + // return ast EMPTY. + } + else // TOKEN WORD + { + // TODO + // call parse_list current_node = parse_simple_command(); result_list = list_append(result_list, current_node); - break; - default: - // Forward - token = pop_token(); - break; - } - token = peek_token(); } + /* if (token == NULL) { puts("Internal error: cannot get the following token"); puts("Hint: EOF might be missing"); return NULL; } + */ struct ast *result = ast_create_list(result_list); return result; diff --git a/src/parser/parser.h b/src/parser/parser.h index d10cae8..bf88adc 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -3,6 +3,24 @@ #include "utils/ast/ast.h" +// === Macros + +#define PEEK_TOKEN() \ + peek_token(); \ + if (token == NULL) \ + { \ + puts("Internal error: cannot get the following token"); \ + return NULL; \ + } + +#define POP_TOKEN() \ + pop_token(); \ + if (token == NULL) \ + { \ + puts("Internal error: cannot get the following token"); \ + return NULL; \ + } + /* @brief Builds the AST representation of the next command to execute. * * @return Returns the AST representation of the next command to execute. diff --git a/src/parser/parsing_utils.c b/src/parser/parsing_utils.c index b7c487b..b0c6618 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/parsing_utils.c @@ -8,24 +8,6 @@ #include "lexer/lexer.h" #include "utils/ast/ast.h" -// === Macros - -#define PEEK_TOKEN() \ - peek_token(); \ - if (token == NULL) \ - { \ - puts("Internal error: cannot get the following token"); \ - return NULL; \ - } - -#define POP_TOKEN() \ - pop_token(); \ - if (token == NULL) \ - { \ - puts("Internal error: cannot get the following token"); \ - return NULL; \ - } - // === Static functions /* Returns true if c is a command terminator, false otherwise @@ -58,8 +40,9 @@ struct ast *parse_simple_command(void) while (!isterminator(token)) { - command_elements = list_append(command_elements, token->data); token = POP_TOKEN(); + command_elements = list_append(command_elements, token->data); + token = PEEK_TOKEN(); } struct ast *result = ast_create_command(command_elements);