feat: yet another new parser architecture

This commit is contained in:
Gu://em_ 2026-01-23 17:01:26 +01:00
parent 6dd19a75ad
commit 96ac2fea77
9 changed files with 182 additions and 128 deletions

View file

@ -73,6 +73,9 @@ int main(int argc, char **argv)
// init lexer context // init lexer context
struct lexer_context *ctx = calloc(1, sizeof(struct lexer_context)); struct lexer_context *ctx = calloc(1, sizeof(struct lexer_context));
// init parser
int parser_init();
// Retrieve and build first AST // Retrieve and build first AST
struct ast *command_ast = get_ast(ctx); struct ast *command_ast = get_ast(ctx);
@ -107,6 +110,7 @@ int main(int argc, char **argv)
return ERR_INPUT_PROCESSING; return ERR_INPUT_PROCESSING;
ast_free(&command_ast); ast_free(&command_ast);
parser_close();
return return_code; return return_code;
} }

41
src/parser/grammar.c Normal file
View file

@ -0,0 +1,41 @@
#define _POSIX_C_SOURCE 200809L
// === Includes
#include "grammar.h"
#include "../utils/hash_map/hash_map.h"
#include "grammar_basic.h"
// === Static variables
static struct hash_map *firsts_map = NULL;
// === Static functions
static enum token_type first(enum rule r)
{
// TODO
return TOKEN_NULL;
}
// === Functions
bool grammar_init(void)
{
// Create firsts hashmap
// TODO
// Populate the hashmap
// TODO
return true;
}
void grammar_close(void)
{
// TODO free hashmap
}
struct ast *parse_input(struct lexer_context *ctx)
{
return parse_list(ctx);
}

65
src/parser/grammar.h Normal file
View file

@ -0,0 +1,65 @@
#ifndef GRAMMAR_H
#define GRAMMAR_H
#include <stdbool.h>
#include "../lexer/lexer.h"
// === Macros
#define PEEK_TOKEN() \
peek_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
#define POP_TOKEN() \
pop_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
// === Structures
enum rule {
RULE_NULL,
RULE_INPUT,
RULE_LIST,
RULE_AND_OR,
RULE_PIPELINE,
RULE_COMMAND,
RULE_SIMPLE_COMMAND,
RULE_SHELL_COMMAND,
RULE_IF,
RULE_COMPOUND_LIST,
RULE_ELSE_CLAUSE
};
// === Functions
/* @brief Initializes the grammar submodule
* @return PARSER_INIT_SUCCESS on success PARSER_INIT_ERROR on error
* @warning Do not use outside the parser
*/
bool grammar_init(void);
/* @brief Closes the grammar submodule
* @warning Do not use outside the parser
*/
void grammar_close(void);
/* @brief Acts as the entry point of the parser, calls parse_list
*
* @code input = list '\n'
* | list EOF
* | '\n'
* | EOF
* ;
*/
struct ast *parse_input(struct lexer_context *ctx);
#endif /* ! GRAMMAR_H */

View file

View file

@ -0,0 +1,4 @@
#ifndef GRAMMAR_ADVANCED_H
#define GRAMMAR_ADVANCED_H
#endif /* ! GRAMMAR_ADVANCED_H */

View file

@ -1,64 +1,11 @@
#define _POSIX_C_SOURCE 200809L #include "grammar_basic.h"
// === Includes
#include "parsing_utils.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "../lexer/lexer.h" #include "../lexer/lexer.h"
#include "../utils/ast/ast.h" #include "../utils/lists/lists.h"
#include "grammar.h"
// === Static functions
/* Returns true if c is a command terminator, false otherwise
static bool isterminator(struct token *token)
{
if (token == NULL)
return false;
switch (token->type)
{
case TOKEN_NEWLINE:
case TOKEN_SEMICOLON:
case TOKEN_EOF:
return true;
default:
return false;
}
}
*/
/* @brief: returns true if token is an end of list indicator.
* @warning: not used
*/
/*
static bool is_end_of_list(struct token *token)
{
if (token == NULL)
return false;
switch (token->type)
{
case TOKEN_NEWLINE:
case TOKEN_EOF:
return true;
default:
return false;
}
}
*/
// === Functions
struct ast *parse_input(struct lexer_context *ctx)
{
return parse_list(ctx);
}
struct ast *parse_list(struct lexer_context *ctx) struct ast *parse_list(struct lexer_context *ctx)
{ {
@ -78,20 +25,16 @@ struct ast *parse_list(struct lexer_context *ctx)
while (token->type == TOKEN_SEMICOLON) while (token->type == TOKEN_SEMICOLON)
{ {
token = POP_TOKEN(); token = POP_TOKEN();
// if (!isterminator(token)) // Follow(list) current_node = parse_and_or(ctx);
// { if (current_node == NULL)
current_node = parse_and_or(ctx); {
if (current_node == NULL) // TODO free list
{ // There must be a function for that
// TODO free list return NULL;
// There must be a function for that }
return NULL; result_list = list_append(result_list, current_node);
}
result_list = list_append(result_list, current_node);
// }
token = PEEK_TOKEN(); token = PEEK_TOKEN();
} }
// result_list = list_append(result_list, current_node);
return ast_create_list(result_list); return ast_create_list(result_list);
} }

View file

@ -1,35 +1,10 @@
#ifndef PARSING_UTILS_H #ifndef GRAMMAR_BASIC_H
#define PARSING_UTILS_H #define GRAMMAR_BASIC_H
#include "../utils/ast/ast.h"
#include "../lexer/lexer.h" #include "../lexer/lexer.h"
// === Macros // === Functions
#define PEEK_TOKEN() \
peek_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
#define POP_TOKEN() \
pop_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
/* @brief Acts as the entry point of the parser, calls parse_list
*
* @code input = list '\n'
* | list EOF
* | '\n'
* | EOF
* ;
*/
struct ast *parse_input(struct lexer_context *ctx);
/* @brief: parses a list of [and_or] rules separated by semicolons and that /* @brief: parses a list of [and_or] rules separated by semicolons and that
* ends by a newline * ends by a newline
@ -97,4 +72,4 @@ struct ast *parse_compound_list(struct lexer_context *ctx);
*/ */
struct ast *parse_else_clause(struct lexer_context *ctx); struct ast *parse_else_clause(struct lexer_context *ctx);
#endif /* ! PARSING_UTILS_H */ #endif /* ! GRAMMAR_BASIC_H */

View file

@ -1,49 +1,52 @@
#include "parser.h" #include "parser.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../lexer/lexer.h" #include "grammar.h"
#include "../parser/parsing_utils.h"
#include "../utils/lists/lists.h"
// === Static functions // === Static variables
// ...
static enum parser_state state = PARSER_STATE_NOT_INITIALIZED;
// === Functions // === Functions
bool parser_init(void)
{
if (state == PARSER_STATE_READY)
{
puts("Internal error: tried to initialize the parser module twice.");
return NULL;
}
int success = grammar_init();
if (success == false)
return false;
state = PARSER_STATE_READY;
return true;
}
struct ast *get_ast(struct lexer_context *ctx) struct ast *get_ast(struct lexer_context *ctx)
{ {
struct token *token = PEEK_TOKEN(); if (ctx == NULL)
struct ast *res;
if (token->type == TOKEN_EOF)
{ {
token = pop_token(ctx); puts("Internal error: called parser with no lexer context (NULL "
return ast_create_end(); "pointer). Aborting.");
return NULL;
} }
else if (token->type == TOKEN_NEWLINE) if (state == PARSER_STATE_NOT_INITIALIZED)
{ {
token = pop_token(ctx); puts("Internal error: attempted to call parser without initializing "
return ast_create_void(); "it. Aborting.");
return NULL;
} }
else // TOKEN WORD if (state == PARSER_STATE_CLOSED)
{ {
res = parse_list(ctx); puts("Internal error: attempted to call parser after closing it. "
"Aborting.");
return NULL;
} }
/* return parse_input(ctx);
if (token == NULL)
{
puts("Internal error: cannot get the following token");
puts("Hint: EOF might be missing");
return NULL;
}
*/
return res;
} }
// TODO // TODO
@ -51,4 +54,4 @@ struct ast *get_ast_str(char *command)
{ {
(void)command; (void)command;
return NULL; return NULL;
} }

View file

@ -1,9 +1,28 @@
#ifndef PARSER_H #ifndef PARSER_H
#define PARSER_H #define PARSER_H
#include <stdbool.h>
#include "../lexer/lexer.h" #include "../lexer/lexer.h"
#include "../utils/ast/ast.h" #include "../utils/ast/ast.h"
enum parser_state {
PARSER_STATE_NOT_INITIALIZED = 0,
PARSER_STATE_READY,
PARSER_STATE_CLOSED
};
/* @brief Initializes the parser module
* @warning parser needs to be closed after use with parser_close()
*
* @return Returns false on error and true on success
*/
bool parser_init(void);
/* @brief Closes the parser module after use
*/
void parser_close(void);
/* @brief Builds the AST representation of the next command to execute. /* @brief Builds the AST representation of the next command to execute.
* *
* @return Returns the AST representation of the next command to execute. * @return Returns the AST representation of the next command to execute.