From 3fa7b97282b70ac96d1755197fcf36477853cb6e Mon Sep 17 00:00:00 2001 From: matteo Date: Fri, 30 Jan 2026 12:21:29 +0100 Subject: [PATCH 1/4] fix: heap-use-after-free and memory leaks on erorr cases --- src/main.c | 7 +++---- src/parser/grammar.c | 14 +++++++++----- src/parser/grammar_basic.c | 7 +++++-- src/utils/hash_map/hash_map.c | 1 + 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/main.c b/src/main.c index c8988ca..6b0f59f 100644 --- a/src/main.c +++ b/src/main.c @@ -66,8 +66,11 @@ static int main_loop(struct lexer_context *ctx, struct args_options *options, if (command_ast == NULL) return err_input(&vars); + // === free + ast_free(&command_ast); parser_close(); + hash_map_free(&vars); return return_code; } @@ -120,9 +123,5 @@ int main(int argc, char **argv) return_code = main_loop(&ctx, &options, vars); - // === free - - hash_map_free(&vars); - return return_code; } diff --git a/src/parser/grammar.c b/src/parser/grammar.c index 57d7d20..73258ea 100644 --- a/src/parser/grammar.c +++ b/src/parser/grammar.c @@ -238,16 +238,20 @@ struct ast *parse_input(struct lexer_context *ctx) } struct ast *ast = parse_list(ctx); - if (ast == NULL) - return NULL; - token = PEEK_TOKEN(); - if (token->type == TOKEN_NEWLINE || token->type == TOKEN_EOF) + + if (ast == NULL) { - if (token->type == TOKEN_NEWLINE) + if (token != NULL && token->type == TOKEN_EOF) { POP_TOKEN(); } + return NULL; + } + + if (token->type == TOKEN_NEWLINE || token->type == TOKEN_EOF) + { + POP_TOKEN(); return ast; } diff --git a/src/parser/grammar_basic.c b/src/parser/grammar_basic.c index 9b4add2..6a6839b 100644 --- a/src/parser/grammar_basic.c +++ b/src/parser/grammar_basic.c @@ -329,12 +329,13 @@ struct ast *parse_if_rule(struct lexer_context *ctx) struct ast *condition_content = parse_compound_list(ctx); // Then keyword - token = POP_TOKEN(); + token = PEEK_TOKEN(); if (token->type != TOKEN_THEN) { perror("Expected the 'then' keyword but token has different type"); return err_if_rule(&condition_content, NULL, NULL); } + POP_TOKEN(); // Then content struct ast *then_content = parse_compound_list(ctx); @@ -344,6 +345,7 @@ struct ast *parse_if_rule(struct lexer_context *ctx) } struct ast *else_content = NULL; + token = PEEK_TOKEN(); // Eventual else/elif clause(s) if (is_first(*token, RULE_ELSE_CLAUSE)) { @@ -355,12 +357,13 @@ struct ast *parse_if_rule(struct lexer_context *ctx) } // Fi keyword - token = POP_TOKEN(); + token = PEEK_TOKEN(); if (token->type != TOKEN_FI) { perror("Expected the 'fi' keyword but token has different type"); return err_if_rule(&condition_content, &then_content, &else_content); } + POP_TOKEN(); // Result struct ast *result = diff --git a/src/utils/hash_map/hash_map.c b/src/utils/hash_map/hash_map.c index 3d77734..6f9a513 100644 --- a/src/utils/hash_map/hash_map.c +++ b/src/utils/hash_map/hash_map.c @@ -117,6 +117,7 @@ void hash_map_free(struct hash_map **hash_map) free((*hash_map)->data); free(*hash_map); } + *hash_map = NULL; } void hash_map_foreach(struct hash_map *hash_map, From f8b91d4da3eab1a76192b16f0dce37fe32b044f9 Mon Sep 17 00:00:00 2001 From: matteo Date: Fri, 30 Jan 2026 16:51:10 +0100 Subject: [PATCH 2/4] fix: memory issues, parser errors and get_ast_if returning always NULL --- src/execution/execution_helpers.c | 2 ++ src/parser/grammar.c | 1 + src/parser/grammar_basic.c | 2 +- src/utils/ast/ast_if.c | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/execution/execution_helpers.c b/src/execution/execution_helpers.c index 9e44dfa..60ec0c3 100644 --- a/src/execution/execution_helpers.c +++ b/src/execution/execution_helpers.c @@ -205,6 +205,8 @@ int exec_ast_command(struct ast_command *command, struct hash_map *vars) int exec_ast_if(struct ast_if *if_node, struct hash_map *vars) { + if (if_node == NULL) + return 2; int cond = execution(if_node->condition, vars); if (cond == 0) return execution(if_node->then_clause, vars); diff --git a/src/parser/grammar.c b/src/parser/grammar.c index 73258ea..b89a99e 100644 --- a/src/parser/grammar.c +++ b/src/parser/grammar.c @@ -140,6 +140,7 @@ bool grammar_init(void) // Element add_first(RULE_ELEMENT, TOKEN_WORD); + add_first(RULE_ELEMENT, TOKEN_ASSIGNMENT_WORD); add_firsts(RULE_ELEMENT, first(RULE_REDIRECTION)); // Prefix diff --git a/src/parser/grammar_basic.c b/src/parser/grammar_basic.c index 6a6839b..86ed089 100644 --- a/src/parser/grammar_basic.c +++ b/src/parser/grammar_basic.c @@ -281,7 +281,7 @@ struct ast *parse_simple_command(struct lexer_context *ctx) struct ast *parse_element(struct lexer_context *ctx) { struct token *token = PEEK_TOKEN(); - if (token->type == TOKEN_WORD) + if (token->type == TOKEN_WORD || token->type == TOKEN_ASSIGNMENT_WORD) { token = POP_TOKEN(); return ast_create_word(token->data); diff --git a/src/utils/ast/ast_if.c b/src/utils/ast/ast_if.c index 1402ff6..6b0ff5d 100644 --- a/src/utils/ast/ast_if.c +++ b/src/utils/ast/ast_if.c @@ -19,7 +19,7 @@ struct ast *ast_create_if(struct ast *condition, struct ast *then_clause, struct ast_if *ast_get_if(struct ast *node) { - if (node == NULL || node->type == AST_IF) + if (node == NULL || node->type != AST_IF) return NULL; return node->data; } From 423793903dadf71f11c9e8deec323967fba5fd88 Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Fri, 30 Jan 2026 19:48:31 +0100 Subject: [PATCH 3/4] feat: while and for loops support for parser, plus new ASTs, new tokens and fixes inside parser --- src/lexer/lexer_utils.h | 2 + src/parser/grammar_advanced.c | 142 ++++++++++++++++++++++++++++++++++ src/parser/grammar_advanced.h | 42 ++++++++++ src/parser/grammar_basic.c | 111 ++++++++++++++++++-------- src/parser/grammar_basic.h | 6 +- src/utils/ast/ast.h | 1 + src/utils/ast/ast_base.h | 1 + src/utils/ast/ast_loop.c | 37 +++++++++ src/utils/ast/ast_loop.h | 34 ++++++++ 9 files changed, 341 insertions(+), 35 deletions(-) create mode 100644 src/utils/ast/ast_loop.c create mode 100644 src/utils/ast/ast_loop.h diff --git a/src/lexer/lexer_utils.h b/src/lexer/lexer_utils.h index af64455..4a09968 100644 --- a/src/lexer/lexer_utils.h +++ b/src/lexer/lexer_utils.h @@ -72,6 +72,8 @@ enum token_type TOKEN_FOR, TOKEN_WHILE, TOKEN_UNTIL, + TOKEN_DO, + TOKEN_DONE, TOKEN_CASE }; diff --git a/src/parser/grammar_advanced.c b/src/parser/grammar_advanced.c index d57e70e..c1e9d20 100644 --- a/src/parser/grammar_advanced.c +++ b/src/parser/grammar_advanced.c @@ -81,3 +81,145 @@ struct ast *parse_prefix(struct lexer_context *ctx) return NULL; } } + +// TODO NOT IMPLEMENTED +struct ast *parse_funcdec(struct lexer_context *ctx) +{ + (void)ctx; + perror("Error: usage of a not implemented function (parse_funcdec)"); + return NULL; +} + +struct ast *parse_for(struct lexer_context *ctx) +{ + (void)ctx; + perror("Error: usage of a not implemented function (parse_for)"); + return NULL; +} + +struct ast *parse_while(struct lexer_context *ctx) +{ + struct token *token = PEEK_TOKEN(); + + // 'while' + if (token->type != TOKEN_WHILE) + { + perror( + "Internal error: expected a TOKEN_WHILE but got a different type"); + return NULL; + } + POP_TOKEN(); + + // condition + struct ast *condition = parse_compound_list(ctx); + if (condition == NULL) + return NULL; + token = PEEK_TOKEN(); + + // 'do' + if (token->type != TOKEN_DO) + { + ast_free(&condition); + perror("Syntax error: expected the 'do' keyowrd but got a different " + "token"); + return NULL; + } + POP_TOKEN(); + token = PEEK_TOKEN(); + + // body + struct ast *body = parse_compound_list(ctx); + if (body == NULL) + { + ast_free(&condition); + return NULL; + } + token = PEEK_TOKEN(); + + // 'done' + if (token->type != TOKEN_DONE) + { + ast_free(&condition); + perror("Syntax error: expected the 'done' keyowrd but got a different " + "token"); + return NULL; + } + POP_TOKEN(); + + struct ast *result = ast_create_loop(condition, body); + if (result == NULL) + { + ast_free(&condition); + ast_free(&body); + perror("Internal error: could not create ast node (is your memory full " + "?)"); + return NULL; + } + + return result; +} + +struct ast *parse_until(struct lexer_context *ctx) +{ + struct token *token = PEEK_TOKEN(); + + // 'while' + if (token->type != TOKEN_UNTIL) + { + perror( + "Internal error: expected a TOKEN_WHILE but got a different type"); + return NULL; + } + POP_TOKEN(); + + // condition + struct ast *condition = parse_compound_list(ctx); + if (condition == NULL) + return NULL; + condition = + ast_create_neg(true, condition); // TODO check result (beware to not + // exceed function lines limit) + token = PEEK_TOKEN(); + + // 'do' + if (token->type != TOKEN_DO) + { + ast_free(&condition); + perror("Syntax error: expected the 'do' keyowrd but got a different " + "token"); + return NULL; + } + POP_TOKEN(); + token = PEEK_TOKEN(); + + // body + struct ast *body = parse_compound_list(ctx); + if (body == NULL) + { + ast_free(&condition); + return NULL; + } + token = PEEK_TOKEN(); + + // 'done' + if (token->type != TOKEN_DONE) + { + ast_free(&condition); + perror("Syntax error: expected the 'done' keyowrd but got a different " + "token"); + return NULL; + } + POP_TOKEN(); + + struct ast *result = ast_create_loop(condition, body); + if (result == NULL) + { + ast_free(&condition); + ast_free(&body); + perror("Internal error: could not create ast node (is your memory full " + "?)"); + return NULL; + } + + return result; +} diff --git a/src/parser/grammar_advanced.h b/src/parser/grammar_advanced.h index f3c27ae..2b9cc63 100644 --- a/src/parser/grammar_advanced.h +++ b/src/parser/grammar_advanced.h @@ -24,4 +24,46 @@ struct ast *parse_redirection(struct lexer_context *ctx); */ struct ast *parse_prefix(struct lexer_context *ctx); +/* + * @brief parses a funcdec rule + * @warning NOT IMPLEMENTED + * + * @code funcdec = WORD '(' ')' {'\n'} shell_command ; + * + * @first WORD + */ +struct ast *parse_funcdec(struct lexer_context *ctx); + +/* + * @brief parses a for rule + * @warning NOT IMPLEMENTED + * + * @code rule_for = 'for' WORD + * ( [';'] | [ {'\n'} 'in' { WORD } ( ';' | '\n' ) ] ) + * {'\n'} 'do' compound_list 'done' ; + * + * @first TOKEN_FOR + */ +struct ast *parse_for(struct lexer_context *ctx); + +/* + * @brief parses a while rule + * @warning NOT IMPLEMENTED + * + * @code rule_while = 'while' compound_list 'do' compound_list 'done' ; + * + * @first TOKEN_WHILE + */ +struct ast *parse_while(struct lexer_context *ctx); + +/* + * @brief parses an until rule + * @warning NOT IMPLEMENTED + * + * @code rule_until = 'until' compound_list 'do' compound_list 'done' ; + * + * @first TOKEN_UNTIL + */ +struct ast *parse_until(struct lexer_context *ctx); + #endif /* ! GRAMMAR_ADVANCED_H */ diff --git a/src/parser/grammar_basic.c b/src/parser/grammar_basic.c index 86ed089..c5a5ee3 100644 --- a/src/parser/grammar_basic.c +++ b/src/parser/grammar_basic.c @@ -26,6 +26,29 @@ static enum ast_and_or_type and_or_tok_to_ast(enum token_type tok_type) } } +/* @brief: frees command_elements and redirections lists (helper func) + * @return: NULL + */ +static void *err_simple_command(struct list *command_elements, + struct list *redirections) +{ + list_deep_destroy(command_elements); + list_deep_destroy(redirections); + return NULL; +} + +/* @brief: frees all the arguments. (helper func) + * @return: NULL. + */ +static void *err_if_rule(struct ast **cond, struct ast **then_clause, + struct ast **else_clause) +{ + ast_free(cond); + ast_free(then_clause); + ast_free(else_clause); + return NULL; +} + // === Functions struct ast *parse_list(struct lexer_context *ctx) @@ -73,16 +96,11 @@ struct ast *parse_and_or(struct lexer_context *ctx) while (token->type == TOKEN_AND || token->type == TOKEN_OR) { - // Set left part - + // Build AST (left part) + enum ast_and_or_type type = and_or_tok_to_ast(token->type); struct ast *left = result; - // eat and_or token - token = POP_TOKEN(); - - // Set type - enum ast_and_or_type type = and_or_tok_to_ast(token->type); - + POP_TOKEN(); token = PEEK_TOKEN(); // Skip newlines @@ -94,6 +112,12 @@ struct ast *parse_and_or(struct lexer_context *ctx) // Right part struct ast *right = parse_pipeline(ctx); + if (right == NULL) + { + ast_free(&left); + return NULL; + } + token = PEEK_TOKEN(); result = ast_create_and_or(left, right, type); if (result == NULL) @@ -120,30 +144,33 @@ struct ast *parse_pipeline(struct lexer_context *ctx) token = PEEK_TOKEN(); } + // command rule struct ast *left = parse_command(ctx); + token = PEEK_TOKEN(); if (negation) { left = ast_create_neg(negation, left); } - token = PEEK_TOKEN(); + // Pipes while (token->type == TOKEN_PIPE) { POP_TOKEN(); + token = PEEK_TOKEN(); // skip newlines - token = PEEK_TOKEN(); while (token->type == TOKEN_NEWLINE) { POP_TOKEN(); token = PEEK_TOKEN(); } + // command rule struct ast *right = parse_command(ctx); + token = PEEK_TOKEN(); // Create AST left = ast_create_pipe(left, right); - token = PEEK_TOKEN(); } return left; @@ -162,6 +189,11 @@ struct ast *parse_command(struct lexer_context *ctx) { result = parse_shell_command(ctx); } + // WARNING funcdec seems to require a LL(2) parser + else if (is_first(*token, RULE_FUNCDEC)) + { + result = parse_funcdec(ctx); + } else { perror("Syntax error: unexpected token"); @@ -171,17 +203,6 @@ struct ast *parse_command(struct lexer_context *ctx) return result; } -/* @brief: frees command_elements and redirections lists (helper func) - * @return: NULL - */ -static void *err_simple_command(struct list *command_elements, - struct list *redirections) -{ - list_deep_destroy(command_elements); - list_deep_destroy(redirections); - return NULL; -} - struct ast *parse_simple_command(struct lexer_context *ctx) { struct list *command_elements = NULL; @@ -299,19 +320,41 @@ struct ast *parse_element(struct lexer_context *ctx) struct ast *parse_shell_command(struct lexer_context *ctx) { - return parse_if_rule(ctx); -} + struct token *token = PEEK_TOKEN(); + struct ast *result = NULL; -/* @brief: frees all the arguments. (helper func) - * @return: NULL. - */ -static void *err_if_rule(struct ast **cond, struct ast **then_clause, - struct ast **else_clause) -{ - ast_free(cond); - ast_free(then_clause); - ast_free(else_clause); - return NULL; + // Grouping + // '(' or '{' + if (token->type == TOKEN_LEFT_BRACKET || token->type == TOKEN_LEFT_PAREN) + { + POP_TOKEN(); + result = parse_compound_list(ctx); + if (result == NULL) + return NULL; + + // ')' or '}' + token = PEEK_TOKEN(); + if (token->type == TOKEN_LEFT_BRACKET + || token->type == TOKEN_LEFT_PAREN) + { + ast_free(&result); + perror("Syntax error: bracket/parenthesis mismatch"); + return NULL; + } + + POP_TOKEN(); + return result; + } + else if (is_first(*token, RULE_IF)) + { + return parse_if_rule(ctx); + } + // TODO loops and case + else + { + perror("Syntax error: unexpected token in parse_shell_command"); + return NULL; + } } struct ast *parse_if_rule(struct lexer_context *ctx) diff --git a/src/parser/grammar_basic.h b/src/parser/grammar_basic.h index 46f7b4f..3ee5355 100644 --- a/src/parser/grammar_basic.h +++ b/src/parser/grammar_basic.h @@ -43,6 +43,7 @@ struct ast *parse_pipeline(struct lexer_context *ctx); * * @code command = simple_command * | shell_command + * * ; * @first first(simple_command), first(shell_command) */ @@ -72,7 +73,10 @@ struct ast *parse_element(struct lexer_context *ctx); /* * @brief Only parses if rules for the moment * - * @code shell_command = if_rule ; + * @code shell_command = '{' compound_list '}' + * | '(' compound_list ')' + * | if_rule + * ; * * @first first(if_rule) */ diff --git a/src/utils/ast/ast.h b/src/utils/ast/ast.h index 9827d8d..1ba949b 100644 --- a/src/utils/ast/ast.h +++ b/src/utils/ast/ast.h @@ -8,6 +8,7 @@ #include "ast_end.h" #include "ast_if.h" #include "ast_list.h" +#include "ast_loop.h" #include "ast_neg.h" #include "ast_pipe.h" #include "ast_redir.h" diff --git a/src/utils/ast/ast_base.h b/src/utils/ast/ast_base.h index ae99a39..71e4a38 100644 --- a/src/utils/ast/ast_base.h +++ b/src/utils/ast/ast_base.h @@ -16,6 +16,7 @@ enum ast_type AST_WORD, AST_PIPE, AST_NEG, + AST_LOOP, AST_ASSIGNMENT }; diff --git a/src/utils/ast/ast_loop.c b/src/utils/ast/ast_loop.c new file mode 100644 index 0000000..fded922 --- /dev/null +++ b/src/utils/ast/ast_loop.c @@ -0,0 +1,37 @@ +#include "ast_loop.h" + +#include +#include + +struct ast *ast_create_loop(struct ast *condition, struct ast *body) +{ + struct ast_loop *node_data = malloc(sizeof(struct ast_loop)); + if (!node_data) + return NULL; + + node_data->condition = condition; + node_data->body = body; + + return ast_create(AST_LOOP, node_data); +} + +struct ast_loop *ast_get_loop(struct ast *node) +{ + if (node == NULL || node->type != AST_LOOP) + return NULL; + return (struct ast_loop *)node->data; +} + +bool ast_is_loop(struct ast *node) +{ + return node != NULL && node->type == AST_LOOP; +} + +void ast_free_loop(struct ast_loop *loop_data) +{ + if (loop_data == NULL) + return; + ast_free(&loop_data->condition); + ast_free(&loop_data->body); + free(loop_data); +} diff --git a/src/utils/ast/ast_loop.h b/src/utils/ast/ast_loop.h new file mode 100644 index 0000000..7c5ba6e --- /dev/null +++ b/src/utils/ast/ast_loop.h @@ -0,0 +1,34 @@ +#ifndef AST_LOOP_H +#define AST_LOOP_H + +#include "ast_base.h" + +struct ast_loop +{ + // Repeat body while condition is true + struct ast *condition; + struct ast *body; +}; + +/** + * Checks if the given AST node is a loop. + */ +bool ast_is_loop(struct ast *node); + +/** + * Retrieves the loop data from the given AST node. + * Assumes that the node is of type AST_LOOP. + */ +struct ast_loop *ast_get_loop(struct ast *node); + +/** + * Creates a new AST node representing a loop. + */ +struct ast *ast_create_loop(struct ast* condition, struct ast* body); + +/* + * @brief: frees the given ast_loop and sets the pointer to NULL. + */ +void ast_free_loop(struct ast_loop *loop_node); + +#endif /* ! AST_LOOP_H */ From 32c35c4bf7254515bcd55839b4b9cfc3709e2023 Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Fri, 30 Jan 2026 19:55:09 +0100 Subject: [PATCH 4/4] fix: add dependencies in Makefile for new ast --- src/utils/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/Makefile.am b/src/utils/Makefile.am index 7876682..085ae1b 100644 --- a/src/utils/Makefile.am +++ b/src/utils/Makefile.am @@ -17,6 +17,7 @@ libutils_a_SOURCES = \ ast/ast_word.c \ ast/ast_neg.c \ ast/ast_pipe.c \ + ast/ast_loop.c \ args/args.c \ vars/vars.c \ ast/ast_assignment.c