From 3ee4a0b9ca90117a4613ab79e38527b77351a4cb Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Sat, 24 Jan 2026 16:13:16 +0100 Subject: [PATCH] feat: finished the new firsts system and began supporting redirections --- src/parser/grammar.c | 91 ++++++++++++++++++++++++++--------- src/parser/grammar.h | 28 +++++++++-- src/parser/grammar_advanced.c | 22 +++++++++ src/parser/grammar_basic.c | 37 +++++++------- 4 files changed, 134 insertions(+), 44 deletions(-) diff --git a/src/parser/grammar.c b/src/parser/grammar.c index 3fbb5fd..982a744 100644 --- a/src/parser/grammar.c +++ b/src/parser/grammar.c @@ -13,30 +13,13 @@ static struct firsts_list *firsts_map = NULL; // === Static functions -/* @brief get the first accepted tokens of a rule - * - * @arg r the rule - * @return the accepted tokens as a firsts_list struct - */ -static struct firsts_list *first(enum rule r) -{ - if (firsts_map == NULL || firsts_map[r].tokens == NULL) - { - puts("Internal error: attempted to get the firsts of a rule without " - "properly initializing the firsts map"); - return NULL; - } - - return &firsts_map[r]; -} - /* @brief Add a token to a rule's firsts (in firsts_map) * * @arg rule the rule to which add a first * @arg token the token to add to the rule's firsts * @return true on success, false on error */ -static bool add_first(enum rule rule, struct token token) +static bool add_first(enum rule rule, enum token_type token) { struct firsts_list *item = &firsts_map[rule]; if (item->tokens != NULL) @@ -44,20 +27,20 @@ static bool add_first(enum rule rule, struct token token) // Check for duplicates for (size_t i = 0; i < item->list_length; i++) { - if (item->tokens[i].type == token.type) + if (item->tokens[i] == token) return true; } // Append item->list_length++; item->tokens = realloc( - item->tokens, (item->list_length) * sizeof(struct firsts_list)); + item->tokens, (item->list_length) * sizeof(enum token_type)); } else { // Create entry - item->tokens = - calloc(item->list_length + 1, sizeof(struct firsts_list)); + item->list_length = 1; + item->tokens = calloc(1, sizeof(enum token_type)); } // Check for alloc error @@ -100,7 +83,45 @@ bool grammar_init(void) return false; // Populate the firsts map - // TODO + add_first(RULE_INPUT, TOKEN_WORD); + add_first(RULE_INPUT, TOKEN_IF); + add_first(RULE_INPUT, TOKEN_NEWLINE); + add_first(RULE_INPUT, TOKEN_EOF); + + add_first(RULE_LIST, TOKEN_WORD); + add_first(RULE_LIST, TOKEN_IF); + + add_first(RULE_AND_OR, TOKEN_WORD); + add_first(RULE_AND_OR, TOKEN_IF); + + add_first(RULE_PIPELINE, TOKEN_WORD); + add_first(RULE_PIPELINE, TOKEN_IF); + + add_first(RULE_COMMAND, TOKEN_WORD); + add_first(RULE_COMMAND, TOKEN_IF); + + add_first(RULE_SIMPLE_COMMAND, TOKEN_WORD); + + add_first(RULE_SHELL_COMMAND, TOKEN_IF); + + add_first(RULE_IF, TOKEN_IF); + + add_first(RULE_COMPOUND_LIST, TOKEN_NEWLINE); + add_first(RULE_COMPOUND_LIST, TOKEN_WORD); + add_first(RULE_COMPOUND_LIST, TOKEN_IF); + + add_first(RULE_ELSE_CLAUSE, TOKEN_ELSE); + add_first(RULE_ELSE_CLAUSE, TOKEN_ELIF); + + add_first(RULE_ELEMENT, TOKEN_WORD); + add_first(RULE_ELEMENT, TOKEN_IONUMBER); + add_first(RULE_ELEMENT, TOKEN_REDIRECTION); + + add_first(RULE_REDIRECTION, TOKEN_IONUMBER); + add_first(RULE_REDIRECTION, TOKEN_REDIRECTION); + + add_first(RULE_PREFIX, TOKEN_IONUMBER); + add_first(RULE_PREFIX, TOKEN_REDIRECTION); return true; } @@ -119,6 +140,30 @@ void grammar_close(void) firsts_map = NULL; } +struct firsts_list *first(enum rule rule) +{ + if (firsts_map == NULL || firsts_map[rule].tokens == NULL) + { + puts("Internal error: attempted to get the firsts of a rule without " + "properly initializing the firsts map"); + return NULL; + } + + return &firsts_map[rule]; +} + +bool is_first(struct token token, enum rule rule) +{ + struct firsts_list *firsts = &firsts_map[rule]; + for (size_t i = 0; i < firsts->list_length; i++) + { + if (firsts->tokens[i] == token.type) + return true; + } + + return false; +} + struct ast *parse_input(struct lexer_context *ctx) { return parse_list(ctx); diff --git a/src/parser/grammar.h b/src/parser/grammar.h index a5ec6e2..5721ee7 100644 --- a/src/parser/grammar.h +++ b/src/parser/grammar.h @@ -37,27 +37,49 @@ enum rule { RULE_IF, RULE_COMPOUND_LIST, RULE_ELSE_CLAUSE, + RULE_ELEMENT, + RULE_REDIRECTION, + RULE_PREFIX, NUMBER_OF_RULES }; struct firsts_list { - struct token* tokens; // Heap allocated array + enum token_type* tokens; // Heap allocated array size_t list_length; }; // === Functions -/* @brief Initializes the grammar submodule +/* + * @brief Initializes the grammar submodule * @return PARSER_INIT_SUCCESS on success PARSER_INIT_ERROR on error * @warning Do not use outside the parser */ bool grammar_init(void); -/* @brief Closes the grammar submodule +/* + * @brief Closes the grammar submodule * @warning Do not use outside the parser */ void grammar_close(void); +/* + * @brief get the first accepted tokens of a rule + * + * @arg r the rule + * @return the accepted tokens as a firsts_list struct + */ +struct firsts_list *first(enum rule r); + +/* + * @brief tells is token belong to the firsts of a specific rule + * + * @arg token + * @arg rule + * @return true if token belongs to rule's firsts, false otherwise + */ +bool is_first(struct token token, enum rule rule); + /* @brief Acts as the entry point of the parser, calls parse_list * * @code input = list '\n' diff --git a/src/parser/grammar_advanced.c b/src/parser/grammar_advanced.c index 0f29b9b..1608d5c 100644 --- a/src/parser/grammar_advanced.c +++ b/src/parser/grammar_advanced.c @@ -1,6 +1,8 @@ #include "grammar_advanced.h" #include +#include +#include #include "grammar_basic.h" @@ -10,6 +12,12 @@ struct ast *parse_redirection(struct lexer_context *ctx) if (token->type == TOKEN_IONUMBER) { // TODO + } + + int io_number = -1; + if (token->type == TOKEN_IONUMBER) + { + io_number = atoi(token->data); POP_TOKEN(); token = PEEK_TOKEN(); } @@ -20,6 +28,20 @@ struct ast *parse_redirection(struct lexer_context *ctx) "else"); return NULL; } + char *redir_op = strdup(token->data); + POP_TOKEN(); + + token = PEEK_TOKEN(); + if (token->type != TOKEN_WORD) + { + puts("Syntax error: expected a word after redirection"); + free(redir_op); + return NULL; + } + char *target = strdup(token->data); + POP_TOKEN(); + + return ast_create_redir(io_number, redir_op, target); } struct ast *parse_prefix(struct lexer_context *ctx) diff --git a/src/parser/grammar_basic.c b/src/parser/grammar_basic.c index efa105d..78b3b3d 100644 --- a/src/parser/grammar_basic.c +++ b/src/parser/grammar_basic.c @@ -43,15 +43,15 @@ struct ast *parse_list(struct lexer_context *ctx) while (token->type == TOKEN_SEMICOLON) { token = POP_TOKEN(); - current_node = parse_and_or(ctx); - if (current_node == NULL) - { - // TODO free list - // There must be a function for that - return NULL; - } - result_list = list_append(result_list, current_node); token = PEEK_TOKEN(); + if (is_first(*token, RULE_AND_OR)) + { + current_node = parse_and_or(ctx); + if (current_node == NULL) + return NULL; + result_list = list_append(result_list, current_node); + token = PEEK_TOKEN(); + } } return ast_create_list(result_list); @@ -138,7 +138,7 @@ struct ast *parse_simple_command(struct lexer_context *ctx) token = PEEK_TOKEN(); // Eventual elements - while (token->type == TOKEN_WORD) + while (is_first(*token, RULE_ELEMENT)) { // Get element struct ast *element = parse_element(ctx); @@ -170,7 +170,6 @@ struct ast *parse_simple_command(struct lexer_context *ctx) } // Forward - POP_TOKEN(); token = PEEK_TOKEN(); } @@ -189,9 +188,8 @@ struct ast *parse_element(struct lexer_context *ctx) struct token *token = PEEK_TOKEN(); if (token->type == TOKEN_WORD) { - // TODO - puts("NOT IMPLEMENTED"); - return NULL; + token = POP_TOKEN(); + return ast_create_word(token->data); } else if (token->type == TOKEN_IONUMBER || token->type == TOKEN_REDIRECTION) { @@ -310,11 +308,14 @@ struct ast *parse_compound_list(struct lexer_context *ctx) } // And/or - current_cmd = parse_and_or(ctx); - if (current_cmd == NULL) - return NULL; - result_list = list_append(result_list, current_cmd); - + if (is_first(*token, RULE_AND_OR)) + { + current_cmd = parse_and_or(ctx); + if (current_cmd == NULL) + return NULL; + result_list = list_append(result_list, current_cmd); + } + token = PEEK_TOKEN(); }