feat(lexer): created parser utils

2026-01-21 16:19:10 +01:00 · 2026-01-21 16:19:10 +01:00 · 26ac0ffe05
commit 26ac0ffe05
parent b5b40f303c
5 changed files with 296 additions and 276 deletions
--- a/src/lexer/lexer_utils.c
+++ b/src/lexer/lexer_utils.c
@ -0,0 +1,183 @@
+#include "lexer_utils.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "../io_backend/io_backend.h"
+#include "../utils/string_utils/string_utils.h"
+
+/* @brief: if a special character is found at [begin],
+ * [tok->token_type] is set accordingly
+ */
+static void set_token_spechar(struct token *tok, char *begin, ssize_t size)
+{
+    if (size != 1)
+        return;
+    switch (begin[0])
+    {
+    case EOF:
+        tok->type = TOKEN_EOF;
+        break;
+    case ';':
+        tok->type = TOKEN_SEMICOLON;
+        break;
+    case '\n':
+        tok->type = TOKEN_NEWLINE;
+        break;
+    case '\'':
+        tok->type = TOKEN_QUOTE;
+        break;
+    case '"':
+        tok->type = TOKEN_DOUBLE_QUOTE;
+        break;
+    case '`':
+        tok->type = TOKEN_GRAVE;
+        break;
+    case '#':
+        tok->type = TOKEN_COMMENT;
+        break;
+    case '|':
+        tok->type = TOKEN_PIPE;
+        break;
+    case '&':
+        tok->type = TOKEN_AMPERSAND;
+        break;
+    case '\\':
+        tok->type = TOKEN_BACKSLASH;
+        break;
+    case '$':
+        tok->type = TOKEN_DOLLAR;
+        break;
+    case '(':
+        tok->type = TOKEN_LEFT_PAREN;
+        break;
+    case ')':
+        tok->type = TOKEN_RIGHT_PAREN;
+        break;
+    case '{':
+        tok->type = TOKEN_LEFT_BRACKET;
+        break;
+    case '}':
+        tok->type = TOKEN_RIGHT_BRACKET;
+        break;
+    case '<':
+        tok->type = TOKEN_LESS;
+        break;
+    case '>':
+        tok->type = TOKEN_GREATER;
+        break;
+    case '*':
+        tok->type = TOKEN_STAR;
+        break;
+    default:
+        break;
+    }
+}
+
+/* @brief: if a keyword is found at [begin],
+ * [tok->token_type] is set accordingly
+ */
+static void set_token_keyword(struct token *tok, char *begin, ssize_t size)
+{
+    if (tok->type != TOKEN_NULL || size == 0)
+        return;
+    if (strncmp(begin, "if", size) == 0)
+    {
+        tok->type = TOKEN_IF;
+    }
+    else if (strncmp(begin, "fi", size) == 0)
+    {
+        tok->type = TOKEN_FI;
+    }
+    else if (strncmp(begin, "then", size) == 0)
+    {
+        tok->type = TOKEN_THEN;
+    }
+    else if (strncmp(begin, "else", size) == 0)
+    {
+        tok->type = TOKEN_ELSE;
+    }
+    else if (strncmp(begin, "elif", size) == 0)
+    {
+        tok->type = TOKEN_ELIF;
+    }
+
+    // no keywords found.
+    if (tok->type == TOKEN_NULL)
+        return;
+
+    tok->data = calloc(size + 1, sizeof(char));
+    if (tok->data == NULL)
+        return;
+    strncpy(tok->data, begin, size);
+}
+
+/* @brief: if token_type has not yet been set, then it is a TOKEN_WORD
+ *         Also allocates the data and fills it.
+ */
+static void set_token_word(struct token *tok, char *begin, ssize_t size)
+{
+    if (tok->type == TOKEN_NULL && size != 0)
+    {
+        tok->type = TOKEN_WORD;
+        tok->data = calloc(size + 1, sizeof(char));
+        if (tok->data == NULL)
+            return;
+        strncpy(tok->data, begin, size);
+    }
+}
+
+struct token *new_token(char *begin, ssize_t size)
+{
+    struct token *tok = calloc(1, sizeof(struct token));
+    if (tok == NULL)
+        return NULL;
+
+    set_token_spechar(tok, begin, size);
+    set_token_keyword(tok, begin, size);
+    set_token_word(tok, begin, size);
+
+    return tok;
+}
+
+void destroy_lexer_context(struct lexer_context **ctx)
+{
+    if (ctx == NULL || *ctx == NULL)
+        return;
+    if ((*ctx)->previous_token != NULL)
+        free((*ctx)->previous_token);
+    if ((*ctx)->current_token != NULL)
+        free((*ctx)->current_token);
+    free(*ctx);
+    *ctx = NULL;
+}
+
+void free_token(struct token **tok)
+{
+    if (tok == NULL || *tok == NULL)
+        return;
+    if ((*tok)->data != NULL)
+        free((*tok)->data);
+    free(*tok);
+    *tok = NULL;
+}
+
+char *stream_init(struct lexer_context *ctx)
+{
+    char *stream;
+
+    if (ctx->previous_token == NULL) // at the begining
+    {
+        ctx->remaining_chars = stream_read(&stream);
+    }
+    else
+    {
+        stream = ctx->end_previous_token;
+    }
+
+    char *trimed_stream = trim_blank_left(stream);
+    ctx->remaining_chars -= trimed_stream - stream;
+    stream = trimed_stream;
+
+    return stream;
+}