feat(lexer): created parser utils

2026-01-21 16:19:10 +01:00 · 2026-01-21 16:19:10 +01:00 · 26ac0ffe05
commit 26ac0ffe05
parent b5b40f303c
5 changed files with 296 additions and 276 deletions
--- a/src/lexer/lexer_utils.h
+++ b/src/lexer/lexer_utils.h
@ -0,0 +1,91 @@
+#ifndef LEXER_UTILS_H
+#define LEXER_UTILS_H
+
+#include <sys/types.h>
+#include <stddef.h>
+
+struct lexer_context
+{
+    char *end_previous_token;
+    ssize_t remaining_chars;
+
+    struct token *previous_token;
+    struct token *current_token;
+};
+
+/* @brief: frees all fields of ctx and sets ctx to NULL.
+ */
+void destroy_lexer_context(struct lexer_context **ctx);
+
+enum lexing_mode
+{
+    LEXER_NORMAL,
+    LEXER_QUOTE,
+    LEXER_DOUBLE_QUOTE
+};
+
+enum token_type
+{
+    // Special characters
+    TOKEN_NULL = 0,
+    TOKEN_EOF,
+    TOKEN_WORD,
+    TOKEN_NEWLINE,
+
+    // WARNING: quote and double quote should never be used inside a token.
+    TOKEN_QUOTE,
+    TOKEN_DOUBLE_QUOTE,
+
+    TOKEN_GRAVE,
+    TOKEN_SEMICOLON,
+    TOKEN_COMMENT,
+    TOKEN_PIPE,
+    TOKEN_AMPERSAND,
+    TOKEN_BACKSLASH,
+    TOKEN_DOLLAR,
+    TOKEN_LEFT_PAREN,
+    TOKEN_RIGHT_PAREN,
+    TOKEN_LEFT_BRACKET,
+    TOKEN_RIGHT_BRACKET,
+    TOKEN_LESS,
+    TOKEN_GREATER,
+    TOKEN_STAR,
+
+    // Keywords
+    TOKEN_IF,
+    TOKEN_THEN,
+    TOKEN_ELSE,
+    TOKEN_FI,
+    TOKEN_ELIF
+};
+
+struct token
+{
+    enum token_type type;
+    char *data;
+};
+
+/*
+ * @brief: return a newly allocated token, with the corresponding type.
+ *         The data contains [size] char, starting from [begin].
+ *
+ * @return: NULL on error, a token otherwise.
+ */
+struct token *new_token(char *begin, ssize_t size);
+
+/* @brief: frees the token given in argument
+ */
+void free_token(struct token **tok);
+
+/*
+ * @brief: checks if the stream used for the last token creation is empty.
+ *         If it is, it calls stream_read() from IO_backend,
+ *         and sets [remaing_chars].
+ *         If not, it starts from the end of the last token.
+ *         Also trims left blanks before returning.
+ *
+ * @return: char* stream from which we tokenise.
+ */
+char *stream_init(struct lexer_context *ctx);
+
+#endif /* LEXER_UTILS_H */