From 3cdba0ad9c37dd4fc9a4e819dece3554456a0b26 Mon Sep 17 00:00:00 2001 From: matteo Date: Fri, 23 Jan 2026 15:56:21 +0100 Subject: [PATCH 1/2] feat(lexer): comments done + operator WIP --- src/lexer/lexer.c | 12 ++++++++---- src/lexer/lexer_utils.c | 35 ++++++++++++++++++++++++++++++++++- src/lexer/lexer_utils.h | 22 ++++++++++++++++++---- 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index f16088b..e6dcfc7 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -133,13 +133,14 @@ struct token *peek_token(struct lexer_context *ctx) i++; } - struct token *tok = new_token(stream, i); + struct token *tok = new_token(stream, i, ctx->only_digits); // if token is comment, we don't want it if (tok->type == TOKEN_COMMENT) { - // drop current stream - get_next_stream(ctx); + // Find next newline or EOF. + go_end_of_line(ctx); + free_token(&tok); tok = peek_token(ctx); } @@ -170,6 +171,9 @@ struct token *pop_token(struct lexer_context *ctx) if (!update_lexing_mode(stream, i, &lexing_mode) && lexing_mode == LEXER_NORMAL) { + // TODO call here a function + // it must check if is a spe char or an operator + // and sets i accordingly. if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token @@ -195,7 +199,7 @@ struct token *pop_token(struct lexer_context *ctx) // (this should never happen) if (ctx->current_token == NULL) { - ctx->current_token = new_token(stream, i); + ctx->current_token = new_token(stream, i, ctx->only_digits); } save_state(stream, i, ctx); diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c index 4206c4c..4906fcb 100644 --- a/src/lexer/lexer_utils.c +++ b/src/lexer/lexer_utils.c @@ -132,12 +132,30 @@ static void set_token_word(struct token *tok, char *begin, ssize_t size) } } -struct token *new_token(char *begin, ssize_t size) +/* @brief: Sets the token to an IO number + * Also allocates the data and fills it. + */ +static void set_token_ION(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type == TOKEN_NULL && size != 0) + { + tok->type = TOKEN_IONUMBER; + tok->data = calloc(size + 1, sizeof(char)); + if (tok->data == NULL) + return; + strncpy(tok->data, begin, size); + } +} + +struct token *new_token(char *begin, ssize_t size, bool only_digits) { struct token *tok = calloc(1, sizeof(struct token)); if (tok == NULL) return NULL; + if (only_digits) + set_token_ION(tok, begin, size); + set_token_spechar(tok, begin, size); set_token_keyword(tok, begin, size); set_token_word(tok, begin, size); @@ -186,6 +204,21 @@ void stream_init(struct lexer_context *ctx) ctx->end_previous_token = trimed_stream; } +void go_end_of_line(struct lexer_context *ctx) +{ + if (ctx == NULL || ctx->end_previous_token == NULL) + return; + + ssize_t i = 0; + while (ctx->end_previous_token[i] != '\n' + && ctx->end_previous_token[i] != EOF) + { + i++; + } + ctx->end_previous_token += i; + ctx->remaining_chars -= i; +} + void get_next_stream(struct lexer_context *ctx) { ctx->remaining_chars = 0; diff --git a/src/lexer/lexer_utils.h b/src/lexer/lexer_utils.h index 83bc772..fde696b 100644 --- a/src/lexer/lexer_utils.h +++ b/src/lexer/lexer_utils.h @@ -1,6 +1,7 @@ #ifndef LEXER_UTILS_H #define LEXER_UTILS_H +#include #include #include @@ -9,6 +10,10 @@ struct lexer_context char *end_previous_token; ssize_t remaining_chars; + // usefull to detect IO numbers. + // tells us if we only lexed digits in current token. + bool only_digits; + struct token *previous_token; struct token *current_token; }; @@ -39,17 +44,20 @@ enum token_type TOKEN_GRAVE, TOKEN_SEMICOLON, TOKEN_COMMENT, - TOKEN_PIPE, - TOKEN_AMPERSAND, + TOKEN_STAR, TOKEN_BACKSLASH, TOKEN_DOLLAR, TOKEN_LEFT_PAREN, TOKEN_RIGHT_PAREN, TOKEN_LEFT_BRACKET, TOKEN_RIGHT_BRACKET, + + // redirections TOKEN_LESS, TOKEN_GREATER, - TOKEN_STAR, + TOKEN_PIPE, + TOKEN_AMPERSAND, + TOKEN_IONUMBER, // Keywords TOKEN_IF, @@ -73,7 +81,7 @@ struct token * * @return: NULL on error, a token otherwise. */ -struct token *new_token(char *begin, ssize_t size); +struct token *new_token(char *begin, ssize_t size, bool only_digits); /* @brief: frees the token given in argument */ @@ -90,6 +98,12 @@ void free_token(struct token **tok); */ void stream_init(struct lexer_context *ctx); +/* @brief: finds the next '\n' or EOF character, + * starting at [ctx->end_previous_token], + * and updates the stream and remaining_chars accordingly. + */ +void go_end_of_line(struct lexer_context *ctx); + /* * @brief: drops the current stream and asks IOB for a new one */ From 1e5593fc8ea8de76df49ce1c7ce4b2f3932b622d Mon Sep 17 00:00:00 2001 From: matteo Date: Fri, 23 Jan 2026 19:34:47 +0100 Subject: [PATCH 2/2] feat(lexer): operators done --- src/lexer/lexer.c | 14 +----- src/lexer/lexer_utils.c | 102 ++++++++++++++++++++++++++++++++-------- src/lexer/lexer_utils.h | 38 +++++++++++---- 3 files changed, 111 insertions(+), 43 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index e6dcfc7..6c46f51 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -11,18 +11,6 @@ #include "../utils/string_utils/string_utils.h" #include "lexer_utils.h" -/* @return: true if a special character from the grammar was found, - * false otherwise. - */ -static bool is_special_char(char c) -{ - if (c == EOF) - return true; - - char special_chars[] = "\n'\"`;#|&\\(){}<>*"; - return strchr(special_chars, c) != NULL; -} - /* @brief: sets the ctx->current_token to [tok]. * this function is called by token_peek(). */ @@ -177,7 +165,7 @@ struct token *pop_token(struct lexer_context *ctx) if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token - i++; + i += len_op_sepchar(stream, i); break; } if (isblank(stream[i])) diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c index 4906fcb..d80134a 100644 --- a/src/lexer/lexer_utils.c +++ b/src/lexer/lexer_utils.c @@ -24,24 +24,12 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) case '\n': tok->type = TOKEN_NEWLINE; break; - case '\'': - tok->type = TOKEN_QUOTE; - break; - case '"': - tok->type = TOKEN_DOUBLE_QUOTE; - break; case '`': tok->type = TOKEN_GRAVE; break; case '#': tok->type = TOKEN_COMMENT; break; - case '|': - tok->type = TOKEN_PIPE; - break; - case '&': - tok->type = TOKEN_AMPERSAND; - break; case '\\': tok->type = TOKEN_BACKSLASH; break; @@ -57,12 +45,6 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) case '}': tok->type = TOKEN_RIGHT_BRACKET; break; - case '<': - tok->type = TOKEN_LESS; - break; - case '>': - tok->type = TOKEN_GREATER; - break; case '*': tok->type = TOKEN_STAR; break; @@ -117,6 +99,47 @@ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) strncpy(tok->data, begin, size); } +/* @brief: if an operator is found at [begin], + * [tok->token_type] is set accordingly + */ +static void set_token_operator(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type != TOKEN_NULL) + return; + if (strncmp(begin, ">", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT; + } + else if (strncmp(begin, "<", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT; + } + else if (strncmp(begin, ">>", size) == 0) + { + tok->type = TOKEN_REDIR_DOUBLE_RIGHT; + } + else if (strncmp(begin, ">&", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT_AMP; + } + else if (strncmp(begin, ">|", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT_PIPE; + } + else if (strncmp(begin, "<&", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT_AMP; + } + else if (strncmp(begin, "<>", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT_RIGHT; + } + else if (strncmp(begin, "|", size) == 0) + { + tok->type = TOKEN_PIPE; + } +} + /* @brief: if token_type has not yet been set, then it is a TOKEN_WORD * Also allocates the data and fills it. */ @@ -147,6 +170,23 @@ static void set_token_ION(struct token *tok, char *begin, ssize_t size) } } +/* @brief: check if [c] is a delimiter for end of line. + * @return: true if [c] == '\n' or EOF. false otherwise. + */ +static bool is_end_of_line(char c) +{ + return c == EOF || c == '\n'; +} + +bool is_special_char(char c) +{ + if (c == EOF) + return true; + + char special_chars[] = "\n'\"`;#|&\\(){}<>*"; + return strchr(special_chars, c) != NULL; +} + struct token *new_token(char *begin, ssize_t size, bool only_digits) { struct token *tok = calloc(1, sizeof(struct token)); @@ -156,6 +196,7 @@ struct token *new_token(char *begin, ssize_t size, bool only_digits) if (only_digits) set_token_ION(tok, begin, size); + set_token_operator(tok, begin, size); set_token_spechar(tok, begin, size); set_token_keyword(tok, begin, size); set_token_word(tok, begin, size); @@ -204,14 +245,35 @@ void stream_init(struct lexer_context *ctx) ctx->end_previous_token = trimed_stream; } +ssize_t len_op_sepchar(char *stream, ssize_t i) +{ + if (!is_special_char(stream[i])) + return -1; // should never happen + + if (stream[i] != '>' && stream[i] != '<') + return 1; // special character (cannot be operator) + + // operator + + if (stream[i] == '<') + { + if (stream[i + 1] == '&' || stream[i + 1] == '>') + return 2; // <&, <> + } + else if (stream[i + 1] == '>' || stream[i + 1] == '|' + || stream[i + 1] == '&') + return 2; // >>, >&, >| + + return 1; // >, < +} + void go_end_of_line(struct lexer_context *ctx) { if (ctx == NULL || ctx->end_previous_token == NULL) return; ssize_t i = 0; - while (ctx->end_previous_token[i] != '\n' - && ctx->end_previous_token[i] != EOF) + while (!is_end_of_line(ctx->end_previous_token[i])) { i++; } diff --git a/src/lexer/lexer_utils.h b/src/lexer/lexer_utils.h index fde696b..aa8941a 100644 --- a/src/lexer/lexer_utils.h +++ b/src/lexer/lexer_utils.h @@ -53,11 +53,16 @@ enum token_type TOKEN_RIGHT_BRACKET, // redirections - TOKEN_LESS, - TOKEN_GREATER, - TOKEN_PIPE, - TOKEN_AMPERSAND, + TOKEN_REDIR_LEFT, + TOKEN_REDIR_RIGHT, + TOKEN_REDIR_LEFT_RIGHT, + TOKEN_REDIR_DOUBLE_RIGHT, + TOKEN_REDIR_LEFT_AMP, + TOKEN_REDIR_RIGHT_AMP, + TOKEN_REDIR_RIGHT_PIPE, + TOKEN_IONUMBER, + TOKEN_PIPE, // Keywords TOKEN_IF, @@ -75,8 +80,12 @@ struct token char *data; }; -/* - * @brief: return a newly allocated token, with the corresponding type. +/* @return: true if a special character from the grammar was found, + * false otherwise. + */ +bool is_special_char(char c); + +/* @brief: return a newly allocated token, with the corresponding type. * The data contains [size] char, starting from [begin]. * * @return: NULL on error, a token otherwise. @@ -99,13 +108,22 @@ void free_token(struct token **tok); void stream_init(struct lexer_context *ctx); /* @brief: finds the next '\n' or EOF character, - * starting at [ctx->end_previous_token], - * and updates the stream and remaining_chars accordingly. + * starting at [ctx->end_previous_token], + * and updates the stream and remaining_chars accordingly. + * + * @note: Daft Punk. bang. */ void go_end_of_line(struct lexer_context *ctx); -/* - * @brief: drops the current stream and asks IOB for a new one +/* @brief: this function is called when we found a special character + * in the stream. This can either be an operator (ig '>>' or '<&' etc), + * or a special char (ig '\' or '#' etc). + * @return: the length of the operator/special char found (can be 1, 2 or 3). + * -1 on error. + */ +ssize_t len_op_sepchar(char *stream, ssize_t i); + +/* @brief: drops the current stream and asks IOB for a new one */ void get_next_stream(struct lexer_context *ctx);