#include "lexer.h" #include #include #include #include #include #include #include "io_backend/io_backend.h" #include "utils/string_utils/string_utils.h" // ######## STATIC FUNCTIONS ############## /* @brief: sets the ctx->current_token to [tok]. * this function is called by token_peek(). */ static void update_current_token(struct token *tok, struct lexer_context *ctx) { ctx->current_token = tok; } /* @brief: frees the last token and sets it to [tok]. * Also sets ctx->current_token to NULL. * this function is called by token_pop(). */ static void update_previous_token(struct token *tok, struct lexer_context *ctx) { free_token(&ctx->previous_token); ctx->previous_token = tok; } /* @brief: updates the current position in the stream. * [stream] += [i] * Also frees the last sent token, and sets it to ctx->current_token. * Current token is then set to NULL. * This function is called by token_pop(). */ static void save_state(char *stream, ssize_t i, struct lexer_context *ctx) { ctx->remaining_chars -= i; ctx->end_previous_token = stream + i; update_previous_token(ctx->current_token); update_current_token(NULL); } /* @return: true if a special character from the grammar was found, * false otherwise. */ static bool is_special_char(char c) { if (c == EOF) return true; char special_chars[] = "\n'\"`;#|&\\$(){}<>*"; return strchr(special_chars, c) != NULL; } /* @brief: if a special character is found at [begin], * [tok->token_type] is set accordingly */ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) { if (size != 1) return; switch (begin[0]) { case EOF: tok->type = TOKEN_EOF; break; case ';': tok->type = TOKEN_SEMICOLON; break; case '\n': tok->type = TOKEN_NEWLINE; break; case '\'': tok->type = TOKEN_QUOTE; break; case '"': tok->type = TOKEN_DOUBLE_QUOTE; break; case '`': tok->type = TOKEN_GRAVE; break; case '#': tok->type = TOKEN_COMMENT; break; case '|': tok->type = TOKEN_PIPE; break; case '&': tok->type = TOKEN_AMPERSAND; break; case '\\': tok->type = TOKEN_BACKSLASH; break; case '$': tok->type = TOKEN_DOLLAR; break; case '(': tok->type = TOKEN_LEFT_PAREN; break; case ')': tok->type = TOKEN_RIGHT_PAREN; break; case '{': tok->type = TOKEN_LEFT_BRACKET; break; case '}': tok->type = TOKEN_RIGHT_BRACKET; break; case '<': tok->type = TOKEN_LESS; break; case '>': tok->type = TOKEN_GREATER; break; case '*': tok->type = TOKEN_STAR; break; default: break; } } /* @brief: if a keyword is found at [begin], * [tok->token_type] is set accordingly */ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) { if (tok->type != TOKEN_NULL || size == 0) return; if (strncmp(begin, "if", size) == 0) { tok->type = TOKEN_IF; } else if (strncmp(begin, "fi", size) == 0) { tok->type = TOKEN_FI; } else if (strncmp(begin, "then", size) == 0) { tok->type = TOKEN_THEN; } else if (strncmp(begin, "else", size) == 0) { tok->type = TOKEN_ELSE; } else if (strncmp(begin, "elif", size) == 0) { tok->type = TOKEN_ELIF; } // no keywords found. if (tok->type == TOKEN_NULL) return; tok->data = calloc(size + 1, sizeof(char)); if (tok->data == NULL) return; strncpy(tok->data, begin, size); } /* @brief: if token_type has not yet been set, then it is a TOKEN_WORD * Also allocates the data and fills it. */ static void set_token_word(struct token *tok, char *begin, ssize_t size) { if (tok->type == TOKEN_NULL && size != 0) { tok->type = TOKEN_WORD; tok->data = calloc(size + 1, sizeof(char)); if (tok->data == NULL) return; strncpy(tok->data, begin, size); } } void destroy_lexer_context(struct lexer_context **ctx) { if (ctx == NULL || *ctx == NULL) return; if (ctx->end_previous_token != NULL) free(ctx->end_previous_token); if (ctx->previous_token != NULL) free(ctx->previous_token); if (ctx->current_token != NULL) free(ctx->current_token); free(*ctx); *ctx = NULL; } struct token *new_token(char *begin, ssize_t size) { struct token *tok = calloc(1, sizeof(struct token)); if (tok == NULL) return NULL; set_token_spechar(tok, begin, size); set_token_keyword(tok, begin, size); set_token_word(tok, begin, size); return tok; } void free_token(struct token **tok) { if (tok == NULL || *tok == NULL) return; if ((*tok)->data != NULL) free((*tok)->data); free(*tok); *tok = NULL; } char *stream_init(struct lexer_context *ctx) { char *stream; if (ctx->previous_token == NULL) // at the begining { ctx->remaining_chars = stream_read(&stream); } else { stream = ctx->end_previous_token; } char *trimed_stream = trim_blank_left(stream); ctx->remaining_chars -= trimed_stream - stream; stream = trimed_stream; return stream; } /* * @brief: Updates the lexing_mode to LEXER_NORMAL * if the SECOND quote is found at stream[i]. * Updates the lexing_mode to the corresponding quote type * if the FIRST quote of any type is found. * * @return: true if an update was done. false otherwise. */ static bool update_lexing_mode(char *stream, ssize_t i, enum lexing_mode *lexing_mode) { enum lexing_mode mode_before_update = *lexing_mode; // FIRST quote if (*lexing_mode == LEXER_NORMAL) { if (stream[i] == '"') *lexing_mode = LEXER_DOUBLE_QUOTE; if (stream[i] == '\'') *lexing_mode = LEXER_QUOTE; } // SECOND quote else { if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'') *lexing_mode = LEXER_NORMAL; if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"') *lexing_mode = LEXER_NORMAL; } return *lexing_mode != mode_before_update; } struct token *peek_token(struct lexer_context *ctx) { // we already created the upcoming token during the previous call to peek() if (ctx->current_token != NULL) { return ctx->current_token; } char *stream = stream_init(); ssize_t i = 0; // Usefull to know if we are inside a quote or double quote enum lexing_mode lexing_mode = LEXER_NORMAL; while (i < ctx->remaining_chars) { // true if we didn't encounter a quote of any type at stream[i] // AND we are not inside quotes if (!update_lexing_mode(stream, i, &lexing_mode) && lexing_mode == LEXER_NORMAL) { if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token i++; break; } if (isblank(stream[i])) { break; } } else if (stream[i] == EOF) { fprintf(stderr, "Lexing error: unmatched quote\n"); // error handling return NULL; } i++; } struct token *tok = new_token(stream, i); update_current_token(tok); return tok; } struct token *pop_token(struct lexer_context *ctx) { if (ctx->current_token != NULL && ctx->current_token->type == TOKEN_EOF) { // we reached end of input, frees all the token still allocated. free_token(&ctx->previous_token); free_token(&ctx->current_token); return NULL; } char *stream = stream_init(); ssize_t i = 0; // Usefull to know if we are inside a quote or double quote enum lexing_mode lexing_mode = LEXER_NORMAL; while (i < ctx->remaining_chars) { // true if we didn't encounter a quote of any type at stream[i] // AND we are not inside quotes if (!update_lexing_mode(stream, i, &lexing_mode) && lexing_mode == LEXER_NORMAL) { if (is_special_char(stream[i])) { if (i == 0) // where we create spe_char token i++; break; } if (isblank(stream[i])) { break; } } else if (stream[i] == EOF) { fprintf(stderr, "Lexing error: unmatched quote\n"); // error handling return NULL; } i++; } // just in case peek() was not called before poping. // (this should never happen) if (ctx->current_token == NULL) { ctx->current_token = new_token(stream, i); } save_state(stream, i); return ctx->previous_token; }