From 96ac2fea77cbf010e849716c60f0ea5407dfa068 Mon Sep 17 00:00:00 2001 From: "Gu://em_" Date: Fri, 23 Jan 2026 17:01:26 +0100 Subject: [PATCH] feat: yet another new parser architecture --- src/main.c | 4 + src/parser/grammar.c | 41 ++++++++++ src/parser/grammar.h | 65 +++++++++++++++ src/parser/grammar_advanced.c | 0 src/parser/grammar_advanced.h | 4 + .../{parsing_utils.c => grammar_basic.c} | 79 +++---------------- .../{parsing_utils.h => grammar_basic.h} | 35 ++------ src/parser/parser.c | 63 ++++++++------- src/parser/parser.h | 19 +++++ 9 files changed, 182 insertions(+), 128 deletions(-) create mode 100644 src/parser/grammar.c create mode 100644 src/parser/grammar.h create mode 100644 src/parser/grammar_advanced.c create mode 100644 src/parser/grammar_advanced.h rename src/parser/{parsing_utils.c => grammar_basic.c} (82%) rename src/parser/{parsing_utils.h => grammar_basic.h} (59%) diff --git a/src/main.c b/src/main.c index a03cfa9..7ef833c 100644 --- a/src/main.c +++ b/src/main.c @@ -73,6 +73,9 @@ int main(int argc, char **argv) // init lexer context struct lexer_context *ctx = calloc(1, sizeof(struct lexer_context)); + // init parser + int parser_init(); + // Retrieve and build first AST struct ast *command_ast = get_ast(ctx); @@ -107,6 +110,7 @@ int main(int argc, char **argv) return ERR_INPUT_PROCESSING; ast_free(&command_ast); + parser_close(); return return_code; } diff --git a/src/parser/grammar.c b/src/parser/grammar.c new file mode 100644 index 0000000..15ee342 --- /dev/null +++ b/src/parser/grammar.c @@ -0,0 +1,41 @@ +#define _POSIX_C_SOURCE 200809L + +// === Includes +#include "grammar.h" + +#include "../utils/hash_map/hash_map.h" +#include "grammar_basic.h" + +// === Static variables + +static struct hash_map *firsts_map = NULL; + +// === Static functions +static enum token_type first(enum rule r) +{ + // TODO + return TOKEN_NULL; +} + +// === Functions + +bool grammar_init(void) +{ + // Create firsts hashmap + // TODO + + // Populate the hashmap + // TODO + + return true; +} + +void grammar_close(void) +{ + // TODO free hashmap +} + +struct ast *parse_input(struct lexer_context *ctx) +{ + return parse_list(ctx); +} diff --git a/src/parser/grammar.h b/src/parser/grammar.h new file mode 100644 index 0000000..fe31849 --- /dev/null +++ b/src/parser/grammar.h @@ -0,0 +1,65 @@ +#ifndef GRAMMAR_H +#define GRAMMAR_H + +#include + +#include "../lexer/lexer.h" + +// === Macros + +#define PEEK_TOKEN() \ + peek_token(ctx); \ + if (token == NULL) \ + { \ + puts("Internal error: cannot get the following token"); \ + return NULL; \ + } + +#define POP_TOKEN() \ + pop_token(ctx); \ + if (token == NULL) \ + { \ + puts("Internal error: cannot get the following token"); \ + return NULL; \ + } + +// === Structures + +enum rule { + RULE_NULL, + RULE_INPUT, + RULE_LIST, + RULE_AND_OR, + RULE_PIPELINE, + RULE_COMMAND, + RULE_SIMPLE_COMMAND, + RULE_SHELL_COMMAND, + RULE_IF, + RULE_COMPOUND_LIST, + RULE_ELSE_CLAUSE +}; + +// === Functions + +/* @brief Initializes the grammar submodule + * @return PARSER_INIT_SUCCESS on success PARSER_INIT_ERROR on error + * @warning Do not use outside the parser + */ + bool grammar_init(void); + +/* @brief Closes the grammar submodule + * @warning Do not use outside the parser + */ + void grammar_close(void); + +/* @brief Acts as the entry point of the parser, calls parse_list + * + * @code input = list '\n' + * | list EOF + * | '\n' + * | EOF + * ; + */ +struct ast *parse_input(struct lexer_context *ctx); + +#endif /* ! GRAMMAR_H */ diff --git a/src/parser/grammar_advanced.c b/src/parser/grammar_advanced.c new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/grammar_advanced.h b/src/parser/grammar_advanced.h new file mode 100644 index 0000000..8773655 --- /dev/null +++ b/src/parser/grammar_advanced.h @@ -0,0 +1,4 @@ +#ifndef GRAMMAR_ADVANCED_H +#define GRAMMAR_ADVANCED_H + +#endif /* ! GRAMMAR_ADVANCED_H */ diff --git a/src/parser/parsing_utils.c b/src/parser/grammar_basic.c similarity index 82% rename from src/parser/parsing_utils.c rename to src/parser/grammar_basic.c index 3be4f05..dcb1f1b 100644 --- a/src/parser/parsing_utils.c +++ b/src/parser/grammar_basic.c @@ -1,64 +1,11 @@ -#define _POSIX_C_SOURCE 200809L +#include "grammar_basic.h" -// === Includes -#include "parsing_utils.h" - -#include -#include #include #include #include "../lexer/lexer.h" -#include "../utils/ast/ast.h" - -// === Static functions - -/* Returns true if c is a command terminator, false otherwise -static bool isterminator(struct token *token) -{ - if (token == NULL) - return false; - - switch (token->type) - { - case TOKEN_NEWLINE: - case TOKEN_SEMICOLON: - case TOKEN_EOF: - return true; - default: - return false; - } -} - - */ - -/* @brief: returns true if token is an end of list indicator. - * @warning: not used - */ - -/* -static bool is_end_of_list(struct token *token) -{ - if (token == NULL) - return false; - - switch (token->type) - { - case TOKEN_NEWLINE: - case TOKEN_EOF: - return true; - default: - return false; - } -} -*/ - -// === Functions - -struct ast *parse_input(struct lexer_context *ctx) -{ - return parse_list(ctx); -} +#include "../utils/lists/lists.h" +#include "grammar.h" struct ast *parse_list(struct lexer_context *ctx) { @@ -78,20 +25,16 @@ struct ast *parse_list(struct lexer_context *ctx) while (token->type == TOKEN_SEMICOLON) { token = POP_TOKEN(); - // if (!isterminator(token)) // Follow(list) - // { - current_node = parse_and_or(ctx); - if (current_node == NULL) - { - // TODO free list - // There must be a function for that - return NULL; - } - result_list = list_append(result_list, current_node); - // } + current_node = parse_and_or(ctx); + if (current_node == NULL) + { + // TODO free list + // There must be a function for that + return NULL; + } + result_list = list_append(result_list, current_node); token = PEEK_TOKEN(); } - // result_list = list_append(result_list, current_node); return ast_create_list(result_list); } diff --git a/src/parser/parsing_utils.h b/src/parser/grammar_basic.h similarity index 59% rename from src/parser/parsing_utils.h rename to src/parser/grammar_basic.h index 89eb4bf..0990e1b 100644 --- a/src/parser/parsing_utils.h +++ b/src/parser/grammar_basic.h @@ -1,35 +1,10 @@ -#ifndef PARSING_UTILS_H -#define PARSING_UTILS_H +#ifndef GRAMMAR_BASIC_H +#define GRAMMAR_BASIC_H +#include "../utils/ast/ast.h" #include "../lexer/lexer.h" -// === Macros - -#define PEEK_TOKEN() \ - peek_token(ctx); \ - if (token == NULL) \ - { \ - puts("Internal error: cannot get the following token"); \ - return NULL; \ - } - -#define POP_TOKEN() \ - pop_token(ctx); \ - if (token == NULL) \ - { \ - puts("Internal error: cannot get the following token"); \ - return NULL; \ - } - -/* @brief Acts as the entry point of the parser, calls parse_list - * - * @code input = list '\n' - * | list EOF - * | '\n' - * | EOF - * ; - */ -struct ast *parse_input(struct lexer_context *ctx); +// === Functions /* @brief: parses a list of [and_or] rules separated by semicolons and that * ends by a newline @@ -97,4 +72,4 @@ struct ast *parse_compound_list(struct lexer_context *ctx); */ struct ast *parse_else_clause(struct lexer_context *ctx); -#endif /* ! PARSING_UTILS_H */ +#endif /* ! GRAMMAR_BASIC_H */ diff --git a/src/parser/parser.c b/src/parser/parser.c index dbe3f72..b86c696 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -1,49 +1,52 @@ #include "parser.h" -#include -#include #include -#include -#include -#include "../lexer/lexer.h" -#include "../parser/parsing_utils.h" -#include "../utils/lists/lists.h" +#include "grammar.h" -// === Static functions -// ... +// === Static variables + +static enum parser_state state = PARSER_STATE_NOT_INITIALIZED; // === Functions +bool parser_init(void) +{ + if (state == PARSER_STATE_READY) + { + puts("Internal error: tried to initialize the parser module twice."); + return NULL; + } + int success = grammar_init(); + if (success == false) + return false; + + state = PARSER_STATE_READY; + return true; +} + struct ast *get_ast(struct lexer_context *ctx) { - struct token *token = PEEK_TOKEN(); - struct ast *res; - - if (token->type == TOKEN_EOF) + if (ctx == NULL) { - token = pop_token(ctx); - return ast_create_end(); + puts("Internal error: called parser with no lexer context (NULL " + "pointer). Aborting."); + return NULL; } - else if (token->type == TOKEN_NEWLINE) + if (state == PARSER_STATE_NOT_INITIALIZED) { - token = pop_token(ctx); - return ast_create_void(); + puts("Internal error: attempted to call parser without initializing " + "it. Aborting."); + return NULL; } - else // TOKEN WORD + if (state == PARSER_STATE_CLOSED) { - res = parse_list(ctx); + puts("Internal error: attempted to call parser after closing it. " + "Aborting."); + return NULL; } - /* - if (token == NULL) - { - puts("Internal error: cannot get the following token"); - puts("Hint: EOF might be missing"); - return NULL; - } - */ - return res; + return parse_input(ctx); } // TODO @@ -51,4 +54,4 @@ struct ast *get_ast_str(char *command) { (void)command; return NULL; -} \ No newline at end of file +} diff --git a/src/parser/parser.h b/src/parser/parser.h index 1837e75..b51d2f5 100644 --- a/src/parser/parser.h +++ b/src/parser/parser.h @@ -1,9 +1,28 @@ #ifndef PARSER_H #define PARSER_H +#include + #include "../lexer/lexer.h" #include "../utils/ast/ast.h" +enum parser_state { + PARSER_STATE_NOT_INITIALIZED = 0, + PARSER_STATE_READY, + PARSER_STATE_CLOSED +}; + +/* @brief Initializes the parser module + * @warning parser needs to be closed after use with parser_close() + * + * @return Returns false on error and true on success + */ +bool parser_init(void); + +/* @brief Closes the parser module after use + */ +void parser_close(void); + /* @brief Builds the AST representation of the next command to execute. * * @return Returns the AST representation of the next command to execute.