From 346ad17e264bd512d7bc2d6c34cc5653b6adbdbc Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Sat, 17 Jan 2026 16:40:53 +0100 Subject: [PATCH 1/4] docs: reworked parser header to fully comply with the given grammar and added a language representation for each parse_* function --- src/parser/parsing_utils.c | 7 ++- src/parser/parsing_utils.h | 88 +++++++++++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/parser/parsing_utils.c b/src/parser/parsing_utils.c index c07653e..0d0f43a 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/parsing_utils.c @@ -47,6 +47,11 @@ static bool is_end_of_list(struct token *token) // === Functions +struct ast *parse_input(void) +{ + return parse_list(); +} + /* Parses a simple list of words (command and arguments) * and returns the resulting ast */ @@ -55,7 +60,7 @@ struct ast *parse_simple_command(void) struct list *command_elements = NULL; struct token *token = PEEK_TOKEN(); - while (!isterminator(token)) + while (token->type == TOKEN_WORD) { token = POP_TOKEN(); command_elements = list_append(command_elements, token->data); diff --git a/src/parser/parsing_utils.h b/src/parser/parsing_utils.h index a380a85..3c9101c 100644 --- a/src/parser/parsing_utils.h +++ b/src/parser/parsing_utils.h @@ -19,33 +19,79 @@ return NULL; \ } -/* @brief: parses a list of [and_or] rules, separated by semicolons. +/* @brief Acts as the entry point of the parser, calls parse_list + */ +struct ast* parse_input(void); + +/* @brief: parses a list of [and_or] rules separated by semicolons and that + * ends by a newline + * + * @code input = list '\n' + * | list EOF + * | '\n' + * | EOF + * ; */ struct ast *parse_list(void); -/* @brief Parses a simple list of words (command and arguments) - * and returns the resulting ast - */ -struct ast *parse_simple_command(void); - -/* - */ -struct ast *parse_if_rule(void); - -/* - */ -struct ast *parse_shell_command(void); - -/* @brief parses commands inside if/else clauses and returns the corresponding - * AST list - */ -struct ast* parse_compound_list(void); - -/* +/* @brief Only parses a pipeline rule for the moment + * + * @code and_or = pipeline ; */ struct ast* parse_and_or(void); -/* +/* @brief Only parses a command rule for the moment + * + * @code pipeline = command ; + */ +struct ast* parse_pipeline(void); + +/* @brief Parses a simple command rule or a shell command rule depending on + * the first token. + * @note + * TOKEN_WORD => simple_command + * TOKEN_IF => shell_command + * + * @code command = simple_command + * | shell_command + * ; + */ +struct ast* parse_command(void); + +/* @brief Parses a simple list of words (command and arguments) + * ending by a separator + * + * @code simple_command = WORD { element } ; + */ +struct ast *parse_simple_command(void); + + +/* @brief Only parses if rules for the moment + * + * @code shell_command = if_rule ; + */ +struct ast *parse_shell_command(void); + + +/* @brief Parses a if rule (condition, then-clause, elif-clause, else-clause) + * + * @code if_rule = 'if' compound_list 'then' compound_list [else_clause] 'fi' ; + */ +struct ast *parse_if_rule(void); + + +/* @brief parses commands inside if/else clauses and returns the corresponding + * AST list + * + * @code compound_list = {'\n'} and_or { ( ';' | '\n' ) {'\n'} and_or } [';'] {'\n'} ; + */ +struct ast* parse_compound_list(void); + +/* @brief + * + * @code else_clause = 'else' compound_list + * | 'elif' compound_list 'then' compound_list [else_clause] + * ; */ struct ast* parse_else_clause(void); From f887c90ec567b05b263ebddbd1630b42d591a28b Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Sat, 17 Jan 2026 17:20:13 +0100 Subject: [PATCH 2/4] feat: elif support, fixed a lot of inconcistencies with the grammar and updated code according to the behaviour described in the header. Also fixed some typos and doc errors --- src/parser/parsing_utils.c | 140 +++++++++++++++++++++++++------------ src/parser/parsing_utils.h | 12 ++-- 2 files changed, 103 insertions(+), 49 deletions(-) diff --git a/src/parser/parsing_utils.c b/src/parser/parsing_utils.c index 0d0f43a..f1408c5 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/parsing_utils.c @@ -52,9 +52,57 @@ struct ast *parse_input(void) return parse_list(); } -/* Parses a simple list of words (command and arguments) - * and returns the resulting ast - */ +struct ast *parse_list(void) +{ + struct list *result_list = NULL; + struct ast *current_node = NULL; + + struct token *token = PEEK_TOKEN(); + + while (!is_end_of_list(token)) + { + if (token->type == TOKEN_SEMICOLON) + { + result_list = list_append(result_list, current_node); + } + else + { + current_node = parse_and_or(); + } + token = PEEK_TOKEN(); + } + + return ast_create_list(result_list); +} + +struct ast *parse_and_or(void) +{ + return parse_pipeline(); +} + +struct ast *parse_pipeline(void) +{ + return parse_command(); +} + +struct ast *parse_command(void) +{ + struct token *token = PEEK_TOKEN(); + + if (token->type == TOKEN_WORD) + { + return parse_simple_command(); + } + else if (token->type == TOKEN_IF) + { + return parse_shell_command(); + } + else + { + return ast_create_void(); // TODO not sure what to do + } +} + struct ast *parse_simple_command(void) { struct list *command_elements = NULL; @@ -71,30 +119,23 @@ struct ast *parse_simple_command(void) return result; } -struct ast *parse_list(void) +// TODO check compliance with the grammar +struct ast *parse_shell_command(void) { - struct list *result_list = NULL; - struct ast *current_node = NULL; - struct token *token = PEEK_TOKEN(); - while (!is_end_of_list(token)) + switch (token->type) { - if (token->type == TOKEN_SEMICOLON) - { - result_list = list_append(result_list, current_node); - } - else - { - // TODO use parse_and_or() instead. - current_node = parse_simple_command(); - } - token = PEEK_TOKEN(); - } + case TOKEN_IF: + return parse_if_rule(); - return ast_create_list(result_list); + default: + puts("I think it's not implemented yet"); + return NULL; + } } +// TODO check compliance with the grammar struct ast *parse_if_rule(void) { // If condition @@ -143,6 +184,7 @@ struct ast *parse_if_rule(void) return result; } +// TODO comply with header's grammar struct ast *parse_compound_list(void) { struct list *result_list = NULL; // ast* list @@ -178,19 +220,44 @@ struct ast *parse_compound_list(void) struct ast *parse_else_clause(void) { - // Eventual elif content struct token *token = PEEK_TOKEN(); + // TODO handle ELIF + // Eventual elif content + while (token->type == TOKEN_ELIF) + { + // Condition + token = POP_TOKEN(); + struct ast *condition = parse_compound_list(); + + // 'then' + token = POP_TOKEN(); + if (token->type != TOKEN_THEN) + { + puts("Expected the 'then' keyword but got a different token type"); + return NULL; + } + + // Then clause + struct ast *then_content = parse_compound_list(); + + // Eventual else clause (recursive) + struct ast *else_content = NULL; + token = PEEK_TOKEN(); + if (token->type == TOKEN_ELSE || token_type == TOKEN_ELIF) + { + else_content = parse_else_clause(); + } + + struct ast *result = + ast_create_if(condition, then_content, else_content); + return result; + } + + // Eventual else content + struct ast *result = NULL; - // TODO handle ELIF - // while (token->type == TOKEN_ELIF) - // { - // puts("ABORTING ELIF: Not implemented ma gueule"); - // token = POP_TOKEN(); // Forward - // } - - // Eventual else content if (token->type == TOKEN_ELSE) { result = parse_compound_list(); @@ -202,18 +269,3 @@ struct ast *parse_else_clause(void) return result; } - -struct ast *parse_shell_command(void) -{ - struct token *token = PEEK_TOKEN(); - - switch (token->type) - { - case TOKEN_IF: - return parse_if_rule(); - - default: - puts("I think it's not implemented yet"); - return NULL; - } -} diff --git a/src/parser/parsing_utils.h b/src/parser/parsing_utils.h index 3c9101c..f4c5a68 100644 --- a/src/parser/parsing_utils.h +++ b/src/parser/parsing_utils.h @@ -20,11 +20,6 @@ } /* @brief Acts as the entry point of the parser, calls parse_list - */ -struct ast* parse_input(void); - -/* @brief: parses a list of [and_or] rules separated by semicolons and that - * ends by a newline * * @code input = list '\n' * | list EOF @@ -32,6 +27,13 @@ struct ast* parse_input(void); * | EOF * ; */ +struct ast* parse_input(void); + +/* @brief: parses a list of [and_or] rules separated by semicolons and that + * ends by a newline + * + * @code list = and_or { ';' and_or } [ ';' ] ; + */ struct ast *parse_list(void); /* @brief Only parses a pipeline rule for the moment From 5d87e87f2ec62c605b60a9cb2f59a57d2b9b97b5 Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Sat, 17 Jan 2026 20:15:27 +0100 Subject: [PATCH 3/4] fix: lot of fixes in parsing. Now code should be fully compliant with the doc grammar. WARNING: not tested yet --- src/parser/parsing_utils.c | 135 +++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/src/parser/parsing_utils.c b/src/parser/parsing_utils.c index f1408c5..487602d 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/parsing_utils.c @@ -59,17 +59,29 @@ struct ast *parse_list(void) struct token *token = PEEK_TOKEN(); - while (!is_end_of_list(token)) + // and_or + current_node = parse_and_or(); + if (current_node == NULL) + return NULL; + list_append(result_list, current_node); + + // Following and_or commands + token = PEEK_TOKEN(); + while (token->type == TOKEN_SEMICOLON) { - if (token->type == TOKEN_SEMICOLON) - { - result_list = list_append(result_list, current_node); - } - else + token = POP_TOKEN(); + if (!isterminator(token)) // Follow(list) { current_node = parse_and_or(); + if (current_node == NULL) + { + //TODO free list + // There must be a function for that + return NULL; + } + list_append(result_list, current_node); + token = PEEK_TOKEN(); } - token = PEEK_TOKEN(); } return ast_create_list(result_list); @@ -107,11 +119,17 @@ struct ast *parse_simple_command(void) { struct list *command_elements = NULL; struct token *token = PEEK_TOKEN(); + if (token->type != TOKEN_WORD) + { + puts("Expected a command but got a different token type"); + return NULL; + } while (token->type == TOKEN_WORD) { token = POP_TOKEN(); - command_elements = list_append(command_elements, token->data); + char* word = strdup(token->data); + command_elements = list_append(command_elements, word); token = PEEK_TOKEN(); } @@ -122,25 +140,13 @@ struct ast *parse_simple_command(void) // TODO check compliance with the grammar struct ast *parse_shell_command(void) { - struct token *token = PEEK_TOKEN(); - - switch (token->type) - { - case TOKEN_IF: - return parse_if_rule(); - - default: - puts("I think it's not implemented yet"); - return NULL; - } + return parse_if_rule(); } -// TODO check compliance with the grammar struct ast *parse_if_rule(void) { - // If condition + // If keyword struct token *token = POP_TOKEN(); - if (token->type != TOKEN_IF) { puts("Internal error: expected a if rule but token has different " @@ -148,27 +154,43 @@ struct ast *parse_if_rule(void) return NULL; } + // Condition content struct ast *condition_content = parse_compound_list(); - // Then content + // Then keyword token = POP_TOKEN(); - if (token->type != TOKEN_THEN) { + ast_free(&condition_content); puts("Expected the 'then' keyword but token has different type"); return NULL; } + // Then content struct ast *then_content = parse_compound_list(); + if (then_content == NULL) + { + ast_free(&condition_content); + ast_free(&then_content); + return NULL; + } // Eventual else/elif clause(s) struct ast *else_content = parse_else_clause(); + if (else_content == NULL) + { + ast_free(&condition_content); + ast_free(&then_content); + return NULL; + } token = POP_TOKEN(); if (token->type != TOKEN_FI) { + ast_free(&condition_content); + ast_free(&then_content); + ast_free(&else_content); puts("Expected the 'fi' keyword but token has different type"); - // TODO free previous asts return NULL; } @@ -176,44 +198,60 @@ struct ast *parse_if_rule(void) ast_create_if(condition_content, then_content, else_content); if (result == NULL) { + ast_free(&condition_content); + ast_free(&then_content); + ast_free(&else_content); puts("Internal error: could not create a new AST (AST_IF)"); - // TODO free previous asts return NULL; } return result; } -// TODO comply with header's grammar struct ast *parse_compound_list(void) { struct list *result_list = NULL; // ast* list - struct list *command_elements = NULL; // token* list + struct ast *current_cmd = NULL; struct token *token = PEEK_TOKEN(); - while (token->type != TOKEN_THEN || token->type != TOKEN_ELIF - || token->type != TOKEN_ELSE) - { - // Parse simple command - if (token->type == TOKEN_SEMICOLON || token->type == TOKEN_NEWLINE) - { - // Stage (-> next command) - struct ast *command = ast_create_command(command_elements); - result_list = list_append(result_list, command); - command_elements = NULL; - } - - if (token->type == TOKEN_EOF) - { - puts("Syntax error: Unexpected end of stream"); // TODO pas très - // bien dit - return NULL; - } - - command_elements = list_append(command_elements, token->data); + // Skip newlines + while (token == TOKEN_NEWLINE) token = POP_TOKEN(); + + // and_or + current_cmd = parse_and_or(); + if (current_cmd == NULL) + return NULL; + list_append(result_list, current_cmd); + + // Following commands + token = PEEK_TOKEN(); + while (token->type == TOKEN_SEMICOLON || token->type TOKEN_NEWLINE) + { + POP_TOKEN(); + + // Skip newlines + while (token == TOKEN_NEWLINE) + token = POP_TOKEN(); + + // and_or + current_cmd = parse_and_or(); + if (current_cmd == NULL) + return NULL; + list_append(result_list, current_cmd); + + token = PEEK_TOKEN(); } + // Eventual semicolons + if (token->type == TOKEN_SEMICOLON) + token = POP_TOKEN(); + + // Skip newlines + while (token == TOKEN_NEWLINE) + token = POP_TOKEN(); + + struct ast *result = ast_create_list(result_list); return result; } @@ -222,7 +260,6 @@ struct ast *parse_else_clause(void) { struct token *token = PEEK_TOKEN(); - // TODO handle ELIF // Eventual elif content while (token->type == TOKEN_ELIF) { From c1f1a2fc372975c3cd8ff1bdc2ee5897ce5b6c30 Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Mon, 19 Jan 2026 19:15:28 +0100 Subject: [PATCH 4/4] fix(parser): 3098750984535 compilations errors in parsing_utils --- src/parser/parsing_utils.c | 55 ++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/src/parser/parsing_utils.c b/src/parser/parsing_utils.c index 487602d..544b311 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/parsing_utils.c @@ -1,9 +1,12 @@ +#define _POSIX_C_SOURCE 200809L + // === Includes #include "parsing_utils.h" #include #include #include +#include #include "lexer/lexer.h" #include "utils/ast/ast.h" @@ -11,7 +14,7 @@ // === Static functions /* Returns true if c is a command terminator, false otherwise - */ +*/ static bool isterminator(struct token *token) { if (token == NULL) @@ -19,17 +22,20 @@ static bool isterminator(struct token *token) switch (token->type) { - case TOKEN_NEWLINE: - case TOKEN_SEMICOLON: - case TOKEN_EOF: - return true; - default: - return false; + case TOKEN_NEWLINE: + case TOKEN_SEMICOLON: + case TOKEN_EOF: + return true; + default: + return false; } } /* @brief: returns true if token is an end of list indicator. + * @warning: not used */ + +/* static bool is_end_of_list(struct token *token) { if (token == NULL) @@ -37,13 +43,14 @@ static bool is_end_of_list(struct token *token) switch (token->type) { - case TOKEN_NEWLINE: - case TOKEN_EOF: - return true; - default: - return false; + case TOKEN_NEWLINE: + case TOKEN_EOF: + return true; + default: + return false; } } +*/ // === Functions @@ -150,7 +157,7 @@ struct ast *parse_if_rule(void) if (token->type != TOKEN_IF) { puts("Internal error: expected a if rule but token has different " - "type"); + "type"); return NULL; } @@ -215,24 +222,28 @@ struct ast *parse_compound_list(void) struct token *token = PEEK_TOKEN(); // Skip newlines - while (token == TOKEN_NEWLINE) + while (token->type == TOKEN_NEWLINE) + { token = POP_TOKEN(); + } // and_or current_cmd = parse_and_or(); if (current_cmd == NULL) return NULL; list_append(result_list, current_cmd); - + // Following commands token = PEEK_TOKEN(); - while (token->type == TOKEN_SEMICOLON || token->type TOKEN_NEWLINE) + while (token->type == TOKEN_SEMICOLON || token->type == TOKEN_NEWLINE) { POP_TOKEN(); // Skip newlines - while (token == TOKEN_NEWLINE) + while (token->type == TOKEN_NEWLINE) + { token = POP_TOKEN(); + } // and_or current_cmd = parse_and_or(); @@ -245,12 +256,16 @@ struct ast *parse_compound_list(void) // Eventual semicolons if (token->type == TOKEN_SEMICOLON) + { token = POP_TOKEN(); + } // Skip newlines - while (token == TOKEN_NEWLINE) + while (token->type == TOKEN_NEWLINE) + { token = POP_TOKEN(); - + } + struct ast *result = ast_create_list(result_list); return result; @@ -281,7 +296,7 @@ struct ast *parse_else_clause(void) // Eventual else clause (recursive) struct ast *else_content = NULL; token = PEEK_TOKEN(); - if (token->type == TOKEN_ELSE || token_type == TOKEN_ELIF) + if (token->type == TOKEN_ELSE || token->type == TOKEN_ELIF) { else_content = parse_else_clause(); }