feat: yet another new parser architecture

This commit is contained in:
Gu://em_ 2026-01-23 17:01:26 +01:00
parent 6dd19a75ad
commit 96ac2fea77
9 changed files with 182 additions and 128 deletions

View file

@ -73,6 +73,9 @@ int main(int argc, char **argv)
// init lexer context
struct lexer_context *ctx = calloc(1, sizeof(struct lexer_context));
// init parser
int parser_init();
// Retrieve and build first AST
struct ast *command_ast = get_ast(ctx);
@ -107,6 +110,7 @@ int main(int argc, char **argv)
return ERR_INPUT_PROCESSING;
ast_free(&command_ast);
parser_close();
return return_code;
}

41
src/parser/grammar.c Normal file
View file

@ -0,0 +1,41 @@
#define _POSIX_C_SOURCE 200809L
// === Includes
#include "grammar.h"
#include "../utils/hash_map/hash_map.h"
#include "grammar_basic.h"
// === Static variables
static struct hash_map *firsts_map = NULL;
// === Static functions
static enum token_type first(enum rule r)
{
// TODO
return TOKEN_NULL;
}
// === Functions
bool grammar_init(void)
{
// Create firsts hashmap
// TODO
// Populate the hashmap
// TODO
return true;
}
void grammar_close(void)
{
// TODO free hashmap
}
struct ast *parse_input(struct lexer_context *ctx)
{
return parse_list(ctx);
}

65
src/parser/grammar.h Normal file
View file

@ -0,0 +1,65 @@
#ifndef GRAMMAR_H
#define GRAMMAR_H
#include <stdbool.h>
#include "../lexer/lexer.h"
// === Macros
#define PEEK_TOKEN() \
peek_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
#define POP_TOKEN() \
pop_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
// === Structures
enum rule {
RULE_NULL,
RULE_INPUT,
RULE_LIST,
RULE_AND_OR,
RULE_PIPELINE,
RULE_COMMAND,
RULE_SIMPLE_COMMAND,
RULE_SHELL_COMMAND,
RULE_IF,
RULE_COMPOUND_LIST,
RULE_ELSE_CLAUSE
};
// === Functions
/* @brief Initializes the grammar submodule
* @return PARSER_INIT_SUCCESS on success PARSER_INIT_ERROR on error
* @warning Do not use outside the parser
*/
bool grammar_init(void);
/* @brief Closes the grammar submodule
* @warning Do not use outside the parser
*/
void grammar_close(void);
/* @brief Acts as the entry point of the parser, calls parse_list
*
* @code input = list '\n'
* | list EOF
* | '\n'
* | EOF
* ;
*/
struct ast *parse_input(struct lexer_context *ctx);
#endif /* ! GRAMMAR_H */

View file

View file

@ -0,0 +1,4 @@
#ifndef GRAMMAR_ADVANCED_H
#define GRAMMAR_ADVANCED_H
#endif /* ! GRAMMAR_ADVANCED_H */

View file

@ -1,64 +1,11 @@
#define _POSIX_C_SOURCE 200809L
#include "grammar_basic.h"
// === Includes
#include "parsing_utils.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "../lexer/lexer.h"
#include "../utils/ast/ast.h"
// === Static functions
/* Returns true if c is a command terminator, false otherwise
static bool isterminator(struct token *token)
{
if (token == NULL)
return false;
switch (token->type)
{
case TOKEN_NEWLINE:
case TOKEN_SEMICOLON:
case TOKEN_EOF:
return true;
default:
return false;
}
}
*/
/* @brief: returns true if token is an end of list indicator.
* @warning: not used
*/
/*
static bool is_end_of_list(struct token *token)
{
if (token == NULL)
return false;
switch (token->type)
{
case TOKEN_NEWLINE:
case TOKEN_EOF:
return true;
default:
return false;
}
}
*/
// === Functions
struct ast *parse_input(struct lexer_context *ctx)
{
return parse_list(ctx);
}
#include "../utils/lists/lists.h"
#include "grammar.h"
struct ast *parse_list(struct lexer_context *ctx)
{
@ -78,8 +25,6 @@ struct ast *parse_list(struct lexer_context *ctx)
while (token->type == TOKEN_SEMICOLON)
{
token = POP_TOKEN();
// if (!isterminator(token)) // Follow(list)
// {
current_node = parse_and_or(ctx);
if (current_node == NULL)
{
@ -88,10 +33,8 @@ struct ast *parse_list(struct lexer_context *ctx)
return NULL;
}
result_list = list_append(result_list, current_node);
// }
token = PEEK_TOKEN();
}
// result_list = list_append(result_list, current_node);
return ast_create_list(result_list);
}

View file

@ -1,35 +1,10 @@
#ifndef PARSING_UTILS_H
#define PARSING_UTILS_H
#ifndef GRAMMAR_BASIC_H
#define GRAMMAR_BASIC_H
#include "../utils/ast/ast.h"
#include "../lexer/lexer.h"
// === Macros
#define PEEK_TOKEN() \
peek_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
#define POP_TOKEN() \
pop_token(ctx); \
if (token == NULL) \
{ \
puts("Internal error: cannot get the following token"); \
return NULL; \
}
/* @brief Acts as the entry point of the parser, calls parse_list
*
* @code input = list '\n'
* | list EOF
* | '\n'
* | EOF
* ;
*/
struct ast *parse_input(struct lexer_context *ctx);
// === Functions
/* @brief: parses a list of [and_or] rules separated by semicolons and that
* ends by a newline
@ -97,4 +72,4 @@ struct ast *parse_compound_list(struct lexer_context *ctx);
*/
struct ast *parse_else_clause(struct lexer_context *ctx);
#endif /* ! PARSING_UTILS_H */
#endif /* ! GRAMMAR_BASIC_H */

View file

@ -1,49 +1,52 @@
#include "parser.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../lexer/lexer.h"
#include "../parser/parsing_utils.h"
#include "../utils/lists/lists.h"
#include "grammar.h"
// === Static functions
// ...
// === Static variables
static enum parser_state state = PARSER_STATE_NOT_INITIALIZED;
// === Functions
struct ast *get_ast(struct lexer_context *ctx)
bool parser_init(void)
{
struct token *token = PEEK_TOKEN();
struct ast *res;
if (token->type == TOKEN_EOF)
if (state == PARSER_STATE_READY)
{
token = pop_token(ctx);
return ast_create_end();
}
else if (token->type == TOKEN_NEWLINE)
{
token = pop_token(ctx);
return ast_create_void();
}
else // TOKEN WORD
{
res = parse_list(ctx);
}
/*
if (token == NULL)
{
puts("Internal error: cannot get the following token");
puts("Hint: EOF might be missing");
puts("Internal error: tried to initialize the parser module twice.");
return NULL;
}
*/
return res;
int success = grammar_init();
if (success == false)
return false;
state = PARSER_STATE_READY;
return true;
}
struct ast *get_ast(struct lexer_context *ctx)
{
if (ctx == NULL)
{
puts("Internal error: called parser with no lexer context (NULL "
"pointer). Aborting.");
return NULL;
}
if (state == PARSER_STATE_NOT_INITIALIZED)
{
puts("Internal error: attempted to call parser without initializing "
"it. Aborting.");
return NULL;
}
if (state == PARSER_STATE_CLOSED)
{
puts("Internal error: attempted to call parser after closing it. "
"Aborting.");
return NULL;
}
return parse_input(ctx);
}
// TODO

View file

@ -1,9 +1,28 @@
#ifndef PARSER_H
#define PARSER_H
#include <stdbool.h>
#include "../lexer/lexer.h"
#include "../utils/ast/ast.h"
enum parser_state {
PARSER_STATE_NOT_INITIALIZED = 0,
PARSER_STATE_READY,
PARSER_STATE_CLOSED
};
/* @brief Initializes the parser module
* @warning parser needs to be closed after use with parser_close()
*
* @return Returns false on error and true on success
*/
bool parser_init(void);
/* @brief Closes the parser module after use
*/
void parser_close(void);
/* @brief Builds the AST representation of the next command to execute.
*
* @return Returns the AST representation of the next command to execute.