42sh/src/parser/grammar.c

// === Includes
#include "grammar.h"

#include <stdio.h>
#include <stdlib.h>

#include "grammar_basic.h"

// === Static variables

// rule-indexed array containing firsts
static struct firsts_list *firsts_map = NULL;

// === Static functions

/* @brief Add a token to a rule's firsts (in firsts_map)
 *
 * @arg rule the rule to which add a first
 * @arg token the token to add to the rule's firsts
 * @return true on success, false on error
 */
static bool add_first(enum rule rule, enum token_type token)
{
    struct firsts_list *item = &firsts_map[rule];
    if (item->tokens != NULL)
    {
        // Check for duplicates
        for (size_t i = 0; i < item->list_length; i++)
        {
            if (item->tokens[i] == token)
                return true;
        }

        // Append
        item->list_length++;
        item->tokens = realloc(item->tokens,
                               (item->list_length) * sizeof(enum token_type));
    }
    else
    {
        // Create entry
        item->list_length = 1;
        item->tokens = calloc(1, sizeof(enum token_type));
    }

    // Check for alloc error
    if (item->tokens == NULL)
    {
        item->list_length = 0;
        return false;
    }

    // Fill
    item->tokens[item->list_length - 1] = token;

    return true;
}

/* @brief Add a list of tokens to a rule's firsts (in firsts_map)
 *
 * @arg rule the rule to which add a first
 * @arg tokens_list the list of tokens to add to the rule's firsts
 * @return true on success, false on error
 */
static bool add_firsts(enum rule rule, struct firsts_list *tokens_list)
{
    for (size_t i = 0; i < tokens_list->list_length; i++)
    {
        bool res = add_first(rule, tokens_list->tokens[i]);
        if (!res)
            return false;
    }
    return true;
}

/* @brief initializes the firsts_map static variable (does not populate it)
 * @return true on success, false on error
 */
static bool init_firsts_map(void)
{
    firsts_map = calloc(NUMBER_OF_RULES, sizeof(struct firsts_list));
    if (firsts_map == NULL)
    {
        perror("Internal error: couldn't create the firsts_map (is your memory "
               "full ?)");
        return false;
    }

    return true;
}

// === Functions

int grammar_init(void)
{
    // Initialize the firsts map
    bool success = init_firsts_map();
    if (success != true)
        return false;

    // Populate the firsts map
    // TODO CHECK ORDER

    // If
    add_first(RULE_IF, TOKEN_IF);

    // Else clause
    add_first(RULE_ELSE_CLAUSE, TOKEN_ELSE);
    add_first(RULE_ELSE_CLAUSE, TOKEN_ELIF);

    // For
    add_first(RULE_FOR, TOKEN_FOR);

    // While
    add_first(RULE_WHILE, TOKEN_WHILE);

    // Until
    add_first(RULE_WHILE, TOKEN_UNTIL);

    // Case
    add_first(RULE_CASE, TOKEN_CASE);

    // Case item
    add_first(RULE_CASE_ITEM, TOKEN_LEFT_PAREN);
    add_first(RULE_CASE_ITEM, TOKEN_WORD);

    // Case clause
    add_firsts(RULE_CASE_CLAUSE, first(RULE_CASE_ITEM));

    // Redirection
    add_first(RULE_REDIRECTION, TOKEN_IONUMBER);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT_RIGHT);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_DOUBLE_RIGHT);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT_AMP);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT_AMP);
    add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT_PIPE);
    // %RIP Matteo 30/01/2026

    // Element
    add_first(RULE_ELEMENT, TOKEN_WORD);
    add_first(RULE_ELEMENT, TOKEN_ASSIGNMENT_WORD);
    add_firsts(RULE_ELEMENT, first(RULE_REDIRECTION));

    // Prefix
    add_first(RULE_PREFIX, TOKEN_ASSIGNMENT_WORD);
    add_firsts(RULE_PREFIX, first(RULE_REDIRECTION));

    // Shell command
    add_firsts(RULE_SHELL_COMMAND, first(RULE_IF));

    // Simple command
    add_firsts(RULE_SIMPLE_COMMAND, first(RULE_PREFIX));
    add_first(RULE_SIMPLE_COMMAND, TOKEN_WORD);

    // Funcdec
    add_first(RULE_FUNCDEC, TOKEN_WORD);

    // Command
    add_firsts(RULE_COMMAND, first(RULE_SIMPLE_COMMAND));
    add_firsts(RULE_COMMAND, first(RULE_SHELL_COMMAND));
    add_firsts(RULE_COMMAND, first(RULE_FUNCDEC));

    // Pipeline
    add_first(RULE_PIPELINE, TOKEN_WORD);
    add_firsts(RULE_PIPELINE, first(RULE_COMMAND));

    // And Or
    add_firsts(RULE_AND_OR, first(RULE_PIPELINE));

    // Compound list
    add_first(RULE_COMPOUND_LIST, TOKEN_NEWLINE);
    add_firsts(RULE_COMPOUND_LIST, first(RULE_AND_OR));

    // List
    add_firsts(RULE_LIST, first(RULE_AND_OR));

    // Input
    add_first(RULE_INPUT, TOKEN_NEWLINE);
    add_first(RULE_INPUT, TOKEN_EOF);
    add_firsts(RULE_INPUT, first(RULE_LIST));

    return true;
}

void grammar_close(void)
{
    // Deep free firsts map
    for (int i = 0; i < NUMBER_OF_RULES; i++)
    {
        if (firsts_map[i].tokens != NULL)
        {
            free(firsts_map[i].tokens);
        }
    }
    free(firsts_map);
    firsts_map = NULL;
}

struct firsts_list *first(enum rule rule)
{
    if (firsts_map == NULL || firsts_map[rule].tokens == NULL)
    {
        perror("Internal error: attempted to get the firsts of a rule without "
               "properly initializing the firsts map");
        return NULL;
    }

    return &firsts_map[rule];
}

bool is_first(struct token token, enum rule rule)
{
    struct firsts_list *firsts = &firsts_map[rule];
    for (size_t i = 0; i < firsts->list_length; i++)
    {
        if (firsts->tokens[i] == token.type)
            return true;
    }

    return false;
}

struct ast *parse_input(struct lexer_context *ctx)
{
    struct token *token = PEEK_TOKEN();

    if (token->type == TOKEN_EOF)
    {
        POP_TOKEN();
        return ast_create_end();
    }

    if (token->type == TOKEN_NEWLINE)
    {
        POP_TOKEN();
        return ast_create_list(NULL);
    }

    struct ast *ast = parse_list(ctx);
    token = PEEK_TOKEN();

    if (ast == NULL)
    {
        if (token != NULL && token->type == TOKEN_EOF)
        {
            POP_TOKEN();
        }
        return NULL;
    }

    if (token->type == TOKEN_NEWLINE || token->type == TOKEN_EOF)
    {
        POP_TOKEN();
        return ast;
    }

    perror("Syntax error: expected newline or EOF after list");
    ast_free(&ast);
    return NULL;
}
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`// === Includes`
			`#include "grammar.h"`

feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`#include <stdio.h>`
			`#include <stdlib.h>`

feat: toujours les mêmes qui font les pipes. Plus de assert dans ASTs (pour des raisons évidentes de stabilité du code) et nouveaux types (AST_PIPE et AST_NEG), + modifs random dans le parser 2026-01-27 00:30:19 +01:00			`#include "grammar_basic.h"`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00
			`// === Static variables`

feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`// rule-indexed array containing firsts`
			`static struct firsts_list *firsts_map = NULL;`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00
			`// === Static functions`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00
			`/* @brief Add a token to a rule's firsts (in firsts_map)`
			`*`
			`* @arg rule the rule to which add a first`
			`* @arg token the token to add to the rule's firsts`
			`* @return true on success, false on error`
			`*/`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`static bool add_first(enum rule rule, enum token_type token)`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`{`
			`struct firsts_list *item = &firsts_map[rule];`
			`if (item->tokens != NULL)`
			`{`
			`// Check for duplicates`
			`for (size_t i = 0; i < item->list_length; i++)`
			`{`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`if (item->tokens[i] == token)`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`return true;`
			`}`

			`// Append`
			`item->list_length++;`
feat: toujours les mêmes qui font les pipes. Plus de assert dans ASTs (pour des raisons évidentes de stabilité du code) et nouveaux types (AST_PIPE et AST_NEG), + modifs random dans le parser 2026-01-27 00:30:19 +01:00			`item->tokens = realloc(item->tokens,`
			`(item->list_length) * sizeof(enum token_type));`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`}`
			`else`
			`{`
			`// Create entry`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`item->list_length = 1;`
			`item->tokens = calloc(1, sizeof(enum token_type));`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`}`

			`// Check for alloc error`
			`if (item->tokens == NULL)`
			`{`
			`item->list_length = 0;`
			`return false;`
			`}`

			`// Fill`
			`item->tokens[item->list_length - 1] = token;`

			`return true;`
			`}`

feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`/* @brief Add a list of tokens to a rule's firsts (in firsts_map)`
			`*`
			`* @arg rule the rule to which add a first`
			`* @arg tokens_list the list of tokens to add to the rule's firsts`
			`* @return true on success, false on error`
			`*/`
			`static bool add_firsts(enum rule rule, struct firsts_list *tokens_list)`
			`{`
			`for (size_t i = 0; i < tokens_list->list_length; i++)`
			`{`
			`bool res = add_first(rule, tokens_list->tokens[i]);`
			`if (!res)`
			`return false;`
			`}`
			`return true;`
			`}`

feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`/* @brief initializes the firsts_map static variable (does not populate it)`
			`* @return true on success, false on error`
			`*/`
			`static bool init_firsts_map(void)`
			`{`
			`firsts_map = calloc(NUMBER_OF_RULES, sizeof(struct firsts_list));`
			`if (firsts_map == NULL)`
			`{`
fix: changed puts to perror + redirections in parser 2026-01-27 16:05:11 +01:00			`perror("Internal error: couldn't create the firsts_map (is your memory "`
feat(parser): redirections 2026-01-27 19:56:33 +01:00			`"full ?)");`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`return false;`
			`}`

			`return true;`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`}`

			`// === Functions`

fix(clang-tidy): for clang, function cannot return bool if it takes no arguments ???? 2026-01-30 18:26:56 +01:00			`int grammar_init(void)`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`{`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`// Initialize the firsts map`
			`bool success = init_firsts_map();`
			`if (success != true)`
			`return false;`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`// Populate the firsts map`
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// TODO CHECK ORDER`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// If`
			`add_first(RULE_IF, TOKEN_IF);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Else clause`
			`add_first(RULE_ELSE_CLAUSE, TOKEN_ELSE);`
			`add_first(RULE_ELSE_CLAUSE, TOKEN_ELIF);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// For`
			`add_first(RULE_FOR, TOKEN_FOR);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// While`
			`add_first(RULE_WHILE, TOKEN_WHILE);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Until`
			`add_first(RULE_WHILE, TOKEN_UNTIL);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Case`
			`add_first(RULE_CASE, TOKEN_CASE);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Case item`
			`add_first(RULE_CASE_ITEM, TOKEN_LEFT_PAREN);`
			`add_first(RULE_CASE_ITEM, TOKEN_WORD);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Case clause`
			`add_firsts(RULE_CASE_CLAUSE, first(RULE_CASE_ITEM));`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Redirection`
			`add_first(RULE_REDIRECTION, TOKEN_IONUMBER);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT_RIGHT);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_DOUBLE_RIGHT);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_LEFT_AMP);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT_AMP);`
			`add_first(RULE_REDIRECTION, TOKEN_REDIR_RIGHT_PIPE);`
			`// %RIP Matteo 30/01/2026`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Element`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`add_first(RULE_ELEMENT, TOKEN_WORD);`
fix: memory issues, parser errors and get_ast_if returning always NULL 2026-01-30 16:51:10 +01:00			`add_first(RULE_ELEMENT, TOKEN_ASSIGNMENT_WORD);`
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`add_firsts(RULE_ELEMENT, first(RULE_REDIRECTION));`

			`// Prefix`
			`add_first(RULE_PREFIX, TOKEN_ASSIGNMENT_WORD);`
			`add_firsts(RULE_PREFIX, first(RULE_REDIRECTION));`

			`// Shell command`
			`add_firsts(RULE_SHELL_COMMAND, first(RULE_IF));`

			`// Simple command`
			`add_firsts(RULE_SIMPLE_COMMAND, first(RULE_PREFIX));`
feat: fuckning working 2026-01-29 20:29:02 +01:00			`add_first(RULE_SIMPLE_COMMAND, TOKEN_WORD);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Funcdec`
			`add_first(RULE_FUNCDEC, TOKEN_WORD);`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00
feat: final firsts initialization 2026-01-29 19:47:59 +01:00			`// Command`
			`add_firsts(RULE_COMMAND, first(RULE_SIMPLE_COMMAND));`
			`add_firsts(RULE_COMMAND, first(RULE_SHELL_COMMAND));`
			`add_firsts(RULE_COMMAND, first(RULE_FUNCDEC));`

			`// Pipeline`
			`add_first(RULE_PIPELINE, TOKEN_WORD);`
			`add_firsts(RULE_PIPELINE, first(RULE_COMMAND));`

			`// And Or`
			`add_firsts(RULE_AND_OR, first(RULE_PIPELINE));`

			`// Compound list`
			`add_first(RULE_COMPOUND_LIST, TOKEN_NEWLINE);`
			`add_firsts(RULE_COMPOUND_LIST, first(RULE_AND_OR));`

			`// List`
			`add_firsts(RULE_LIST, first(RULE_AND_OR));`

			`// Input`
			`add_first(RULE_INPUT, TOKEN_NEWLINE);`
			`add_first(RULE_INPUT, TOKEN_EOF);`
			`add_firsts(RULE_INPUT, first(RULE_LIST));`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00
			`return true;`
			`}`

			`void grammar_close(void)`
			`{`
feat: made the firsts system for parser (not yet populated) 2026-01-24 13:06:39 +01:00			`// Deep free firsts map`
			`for (int i = 0; i < NUMBER_OF_RULES; i++)`
			`{`
			`if (firsts_map[i].tokens != NULL)`
			`{`
			`free(firsts_map[i].tokens);`
			`}`
			`}`
			`free(firsts_map);`
			`firsts_map = NULL;`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`}`

feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`struct firsts_list *first(enum rule rule)`
			`{`
			`if (firsts_map == NULL \|\| firsts_map[rule].tokens == NULL)`
			`{`
fix: changed puts to perror + redirections in parser 2026-01-27 16:05:11 +01:00			`perror("Internal error: attempted to get the firsts of a rule without "`
feat(parser): redirections 2026-01-27 19:56:33 +01:00			`"properly initializing the firsts map");`
feat: finished the new firsts system and began supporting redirections 2026-01-24 16:13:16 +01:00			`return NULL;`
			`}`

			`return &firsts_map[rule];`
			`}`

			`bool is_first(struct token token, enum rule rule)`
			`{`
			`struct firsts_list *firsts = &firsts_map[rule];`
			`for (size_t i = 0; i < firsts->list_length; i++)`
			`{`
			`if (firsts->tokens[i] == token.type)`
			`return true;`
			`}`

			`return false;`
			`}`

feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`struct ast parse_input(struct lexer_context ctx)`
			`{`
fix: random fixes 2026-01-24 16:48:21 +01:00			`struct token *token = PEEK_TOKEN();`

			`if (token->type == TOKEN_EOF)`
fix: small bugs to make it compile 2026-01-27 18:00:59 +01:00			`{`
			`POP_TOKEN();`
			`return ast_create_end();`
			`}`
fix: random fixes 2026-01-24 16:48:21 +01:00
			`if (token->type == TOKEN_NEWLINE)`
			`{`
			`POP_TOKEN();`
			`return ast_create_list(NULL);`
			`}`

			`struct ast *ast = parse_list(ctx);`
			`token = PEEK_TOKEN();`
fix: heap-use-after-free and memory leaks on erorr cases 2026-01-30 12:21:29 +01:00
			`if (ast == NULL)`
fix: random fixes 2026-01-24 16:48:21 +01:00			`{`
fix: heap-use-after-free and memory leaks on erorr cases 2026-01-30 12:21:29 +01:00			`if (token != NULL && token->type == TOKEN_EOF)`
feat(parser): implementing all redirection types -- WIP 2026-01-26 19:00:20 +01:00			`{`
fix: random fixes 2026-01-24 16:48:21 +01:00			`POP_TOKEN();`
feat(parser): implementing all redirection types -- WIP 2026-01-26 19:00:20 +01:00			`}`
fix: heap-use-after-free and memory leaks on erorr cases 2026-01-30 12:21:29 +01:00			`return NULL;`
			`}`

			`if (token->type == TOKEN_NEWLINE \|\| token->type == TOKEN_EOF)`
			`{`
			`POP_TOKEN();`
fix: random fixes 2026-01-24 16:48:21 +01:00			`return ast;`
			`}`

fix: changed puts to perror + redirections in parser 2026-01-27 16:05:11 +01:00			`perror("Syntax error: expected newline or EOF after list");`
fix: random fixes 2026-01-24 16:48:21 +01:00			`ast_free(&ast);`
			`return NULL;`
feat: yet another new parser architecture 2026-01-23 17:01:26 +01:00			`}`