#include "lexer.h" #include #include #include #include #include #include #include "../io_backend/io_backend.h" #include "../utils/string_utils/string_utils.h" #include "lexer_utils.h" /* @return: true if a special character from the grammar was found, * false otherwise. */ static bool is_special_char(char c) { if (c == EOF) return true; char special_chars[] = "\n'\"`;#|&\\(){}<>*"; return strchr(special_chars, c) != NULL; } /* @brief: sets the ctx->current_token to [tok]. * this function is called by token_peek(). */ static void update_current_token(struct token *tok, struct lexer_context *ctx) { ctx->current_token = tok; } /* @brief: frees the last token and sets it to [tok]. * Also sets ctx->current_token to NULL. * this function is called by token_pop(). */ static void update_previous_token(struct token *tok, struct lexer_context *ctx) { free_token(&ctx->previous_token); ctx->previous_token = tok; } /* @brief: updates the current position in the stream. * [stream] += [i] * Also frees the last sent token, and sets it to ctx->current_token. * Current token is then set to NULL. * This function is called by token_pop(). */ static void save_state(char *stream, ssize_t i, struct lexer_context *ctx) { ctx->remaining_chars -= i; ctx->end_previous_token = stream + i; update_previous_token(ctx->current_token, ctx); update_current_token(NULL, ctx); } /* * @brief: Updates the lexing_mode to LEXER_NORMAL * if the SECOND quote is found at stream[i]. * Updates the lexing_mode to the corresponding quote type * if the FIRST quote of any type is found. * * @return: true if an update was done. false otherwise. */ static bool update_lexing_mode(char *stream, ssize_t i, enum lexing_mode *lexing_mode) { enum lexing_mode mode_before_update = *lexing_mode; // FIRST quote if (*lexing_mode == LEXER_NORMAL) { if (stream[i] == '"') *lexing_mode = LEXER_DOUBLE_QUOTE; if (stream[i] == '\'') *lexing_mode = LEXER_QUOTE; } // SECOND quote else { if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'') *lexing_mode = LEXER_NORMAL; if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"') *lexing_mode = LEXER_NORMAL; } return *lexing_mode != mode_before_update; } struct token *peek_token(struct lexer_context *ctx) { // we already created the upcoming token during the previous call to peek() if (ctx->current_token != NULL) { return ctx->current_token; } stream_init(ctx); char *stream = ctx->end_previous_token; ssize_t i = 0; // Usefull to know if we are inside a quote or double quote enum lexing_mode lexing_mode = LEXER_NORMAL; while (i < ctx->remaining_chars) { // true if we didn't encounter a quote of any type at stream[i] // AND we are not inside quotes if (!update_lexing_mode(stream, i, &lexing_mode) && lexing_mode == LEXER_NORMAL) { if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token i++; break; } if (isblank(stream[i])) { break; } } else if (stream[i] == EOF) { fprintf(stderr, "Lexing error: unmatched quote\n"); // error handling return NULL; } i++; } struct token *tok = new_token(stream, i, ctx->only_digits); // if token is comment, we don't want it if (tok->type == TOKEN_COMMENT) { // Find next newline or EOF. go_end_of_line(ctx); free_token(&tok); tok = peek_token(ctx); } update_current_token(tok, ctx); return tok; } struct token *pop_token(struct lexer_context *ctx) { if (ctx->current_token != NULL && ctx->current_token->type == TOKEN_EOF) { // we reached end of input, frees all the token still allocated. free_token(&ctx->previous_token); free_token(&ctx->current_token); return NULL; } stream_init(ctx); char *stream = ctx->end_previous_token; ssize_t i = 0; // Usefull to know if we are inside a quote or double quote enum lexing_mode lexing_mode = LEXER_NORMAL; while (i < ctx->remaining_chars) { // true if we didn't encounter a quote of any type at stream[i] // AND we are not inside quotes if (!update_lexing_mode(stream, i, &lexing_mode) && lexing_mode == LEXER_NORMAL) { // TODO call here a function // it must check if is a spe char or an operator // and sets i accordingly. if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token i++; break; } if (isblank(stream[i])) { break; } } else if (stream[i] == EOF) { fprintf(stderr, "Lexing error: unmatched quote\n"); // error handling return NULL; } i++; } // just in case peek() was not called before poping. // (this should never happen) if (ctx->current_token == NULL) { ctx->current_token = new_token(stream, i, ctx->only_digits); } save_state(stream, i, ctx); return ctx->previous_token; }