From 745649df1c4e5a5d27eafba0c610423476c16b3a Mon Sep 17 00:00:00 2001 From: Matteo Flebus Date: Mon, 12 Jan 2026 21:31:15 +0100 Subject: [PATCH] feat(lexer): finished --- src/lexer/lexer.c | 75 +++++++++++++++++++++++++++++++++++------------ src/lexer/lexer.h | 1 + 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index def814c..5d4ba94 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -1,15 +1,53 @@ #include "lexer.h" +#include #include #include #include #include #include "io_backend/io_backend.h" +#include "utils/string_utils/string_utils.h" static char *end_last_token; static ssize_t remaining_chars; +/* @brief: saves state for the next call the the lexer. + * + */ +static void save_state(char *stream, ssize_t i) +{ + remaining_chars -= i; + end_last_token = stream + i; + return; +} + +/* @return: true if a special character from the grammar was found, + * false otherwise. + * + */ +static bool is_special_char(char c) +{ + return c == '\'' || c == '\n' || c == ';'; +} + +/* @return: true if a keyword from the grammar was found, false otherwise. + * + */ +static bool is_keyword(char *stream, ssize_t i) +{ + if (i == 2) + { + return strcmp(stream, "if") == 0 || strcmp(stream, "fi") == 0; + } + if (i == 4) + { + return strcmp(stream, "then") || strcmp(stream, "else") + || strcmp(stream, "elif"); + } + return false; +} + char *new_token(char *begin, ssize_t size) { char *res = calloc(size + 1, sizeof(char)); @@ -32,6 +70,10 @@ char *stream_init(void) stream = end_last_token; } + char *trimed_stream = trim_blanks_left(stream); + remaining_chars -= trimed_stream - stream; + stream = trimed_stream; + return stream; } @@ -39,34 +81,29 @@ char *get_token(void) { char *stream = stream_init(); - bool inquotes = false; ssize_t i = 0; while (i < remaining_chars) { - switch (stream[i]) + if (is_special_char(stream[i])) { - case '\'': - inquotes = !inquotes; + if (i == 0) // where we create spe_char token + i++; break; - - case ' ' | '\n' | '\t': - if (inquotes) - break; - else - { - // token creation - // skip blank char - // exit from loop - char *token = new_token(stream, i); - return token; - } - default: + } + if (isblank(stream[i])) + { + break; + } + else if (is_keyword(stream, i)) + { + i++; break; } i++; } - remaining_chars -= i; - return NULL; + save_state(stream, i); + + return new_token(stream, i); } diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 406ca9c..7e7ca10 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -30,6 +30,7 @@ char *new_token(char *begin, ssize_t size); * If it is, it calls stream_read() from IO_backend, * and sets [remaing_chars]. * If not, it starts from the end of the last token. + * Also trims left blanks before returning. * * @return: char* stream from which we tokenise. */