From 1e5593fc8ea8de76df49ce1c7ce4b2f3932b622d Mon Sep 17 00:00:00 2001 From: matteo Date: Fri, 23 Jan 2026 19:34:47 +0100 Subject: [PATCH] feat(lexer): operators done --- src/lexer/lexer.c | 14 +----- src/lexer/lexer_utils.c | 102 ++++++++++++++++++++++++++++++++-------- src/lexer/lexer_utils.h | 38 +++++++++++---- 3 files changed, 111 insertions(+), 43 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index e6dcfc7..6c46f51 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -11,18 +11,6 @@ #include "../utils/string_utils/string_utils.h" #include "lexer_utils.h" -/* @return: true if a special character from the grammar was found, - * false otherwise. - */ -static bool is_special_char(char c) -{ - if (c == EOF) - return true; - - char special_chars[] = "\n'\"`;#|&\\(){}<>*"; - return strchr(special_chars, c) != NULL; -} - /* @brief: sets the ctx->current_token to [tok]. * this function is called by token_peek(). */ @@ -177,7 +165,7 @@ struct token *pop_token(struct lexer_context *ctx) if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token - i++; + i += len_op_sepchar(stream, i); break; } if (isblank(stream[i])) diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c index 4906fcb..d80134a 100644 --- a/src/lexer/lexer_utils.c +++ b/src/lexer/lexer_utils.c @@ -24,24 +24,12 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) case '\n': tok->type = TOKEN_NEWLINE; break; - case '\'': - tok->type = TOKEN_QUOTE; - break; - case '"': - tok->type = TOKEN_DOUBLE_QUOTE; - break; case '`': tok->type = TOKEN_GRAVE; break; case '#': tok->type = TOKEN_COMMENT; break; - case '|': - tok->type = TOKEN_PIPE; - break; - case '&': - tok->type = TOKEN_AMPERSAND; - break; case '\\': tok->type = TOKEN_BACKSLASH; break; @@ -57,12 +45,6 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) case '}': tok->type = TOKEN_RIGHT_BRACKET; break; - case '<': - tok->type = TOKEN_LESS; - break; - case '>': - tok->type = TOKEN_GREATER; - break; case '*': tok->type = TOKEN_STAR; break; @@ -117,6 +99,47 @@ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) strncpy(tok->data, begin, size); } +/* @brief: if an operator is found at [begin], + * [tok->token_type] is set accordingly + */ +static void set_token_operator(struct token *tok, char *begin, ssize_t size) +{ + if (tok->type != TOKEN_NULL) + return; + if (strncmp(begin, ">", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT; + } + else if (strncmp(begin, "<", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT; + } + else if (strncmp(begin, ">>", size) == 0) + { + tok->type = TOKEN_REDIR_DOUBLE_RIGHT; + } + else if (strncmp(begin, ">&", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT_AMP; + } + else if (strncmp(begin, ">|", size) == 0) + { + tok->type = TOKEN_REDIR_RIGHT_PIPE; + } + else if (strncmp(begin, "<&", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT_AMP; + } + else if (strncmp(begin, "<>", size) == 0) + { + tok->type = TOKEN_REDIR_LEFT_RIGHT; + } + else if (strncmp(begin, "|", size) == 0) + { + tok->type = TOKEN_PIPE; + } +} + /* @brief: if token_type has not yet been set, then it is a TOKEN_WORD * Also allocates the data and fills it. */ @@ -147,6 +170,23 @@ static void set_token_ION(struct token *tok, char *begin, ssize_t size) } } +/* @brief: check if [c] is a delimiter for end of line. + * @return: true if [c] == '\n' or EOF. false otherwise. + */ +static bool is_end_of_line(char c) +{ + return c == EOF || c == '\n'; +} + +bool is_special_char(char c) +{ + if (c == EOF) + return true; + + char special_chars[] = "\n'\"`;#|&\\(){}<>*"; + return strchr(special_chars, c) != NULL; +} + struct token *new_token(char *begin, ssize_t size, bool only_digits) { struct token *tok = calloc(1, sizeof(struct token)); @@ -156,6 +196,7 @@ struct token *new_token(char *begin, ssize_t size, bool only_digits) if (only_digits) set_token_ION(tok, begin, size); + set_token_operator(tok, begin, size); set_token_spechar(tok, begin, size); set_token_keyword(tok, begin, size); set_token_word(tok, begin, size); @@ -204,14 +245,35 @@ void stream_init(struct lexer_context *ctx) ctx->end_previous_token = trimed_stream; } +ssize_t len_op_sepchar(char *stream, ssize_t i) +{ + if (!is_special_char(stream[i])) + return -1; // should never happen + + if (stream[i] != '>' && stream[i] != '<') + return 1; // special character (cannot be operator) + + // operator + + if (stream[i] == '<') + { + if (stream[i + 1] == '&' || stream[i + 1] == '>') + return 2; // <&, <> + } + else if (stream[i + 1] == '>' || stream[i + 1] == '|' + || stream[i + 1] == '&') + return 2; // >>, >&, >| + + return 1; // >, < +} + void go_end_of_line(struct lexer_context *ctx) { if (ctx == NULL || ctx->end_previous_token == NULL) return; ssize_t i = 0; - while (ctx->end_previous_token[i] != '\n' - && ctx->end_previous_token[i] != EOF) + while (!is_end_of_line(ctx->end_previous_token[i])) { i++; } diff --git a/src/lexer/lexer_utils.h b/src/lexer/lexer_utils.h index fde696b..aa8941a 100644 --- a/src/lexer/lexer_utils.h +++ b/src/lexer/lexer_utils.h @@ -53,11 +53,16 @@ enum token_type TOKEN_RIGHT_BRACKET, // redirections - TOKEN_LESS, - TOKEN_GREATER, - TOKEN_PIPE, - TOKEN_AMPERSAND, + TOKEN_REDIR_LEFT, + TOKEN_REDIR_RIGHT, + TOKEN_REDIR_LEFT_RIGHT, + TOKEN_REDIR_DOUBLE_RIGHT, + TOKEN_REDIR_LEFT_AMP, + TOKEN_REDIR_RIGHT_AMP, + TOKEN_REDIR_RIGHT_PIPE, + TOKEN_IONUMBER, + TOKEN_PIPE, // Keywords TOKEN_IF, @@ -75,8 +80,12 @@ struct token char *data; }; -/* - * @brief: return a newly allocated token, with the corresponding type. +/* @return: true if a special character from the grammar was found, + * false otherwise. + */ +bool is_special_char(char c); + +/* @brief: return a newly allocated token, with the corresponding type. * The data contains [size] char, starting from [begin]. * * @return: NULL on error, a token otherwise. @@ -99,13 +108,22 @@ void free_token(struct token **tok); void stream_init(struct lexer_context *ctx); /* @brief: finds the next '\n' or EOF character, - * starting at [ctx->end_previous_token], - * and updates the stream and remaining_chars accordingly. + * starting at [ctx->end_previous_token], + * and updates the stream and remaining_chars accordingly. + * + * @note: Daft Punk. bang. */ void go_end_of_line(struct lexer_context *ctx); -/* - * @brief: drops the current stream and asks IOB for a new one +/* @brief: this function is called when we found a special character + * in the stream. This can either be an operator (ig '>>' or '<&' etc), + * or a special char (ig '\' or '#' etc). + * @return: the length of the operator/special char found (can be 1, 2 or 3). + * -1 on error. + */ +ssize_t len_op_sepchar(char *stream, ssize_t i); + +/* @brief: drops the current stream and asks IOB for a new one */ void get_next_stream(struct lexer_context *ctx);