#include "lexer_utils.h" #include #include #include "../io_backend/io_backend.h" #include "../utils/string_utils/string_utils.h" /* @brief: if a special character is found at [begin], * [tok->token_type] is set accordingly */ static void set_token_spechar(struct token *tok, char *begin, ssize_t size) { if (size != 1) return; switch (begin[0]) { case EOF: tok->type = TOKEN_EOF; break; case ';': tok->type = TOKEN_SEMICOLON; break; case '\n': tok->type = TOKEN_NEWLINE; break; case '`': tok->type = TOKEN_GRAVE; break; case '#': tok->type = TOKEN_COMMENT; break; case '\\': tok->type = TOKEN_BACKSLASH; break; case '(': tok->type = TOKEN_LEFT_PAREN; break; case ')': tok->type = TOKEN_RIGHT_PAREN; break; case '{': tok->type = TOKEN_LEFT_BRACKET; break; case '}': tok->type = TOKEN_RIGHT_BRACKET; break; case '*': tok->type = TOKEN_STAR; break; default: break; } } /* @brief: if a keyword is found at [begin], * [tok->token_type] is set accordingly */ static void set_token_keyword(struct token *tok, char *begin, ssize_t size) { if (tok->type != TOKEN_NULL || size == 0) return; if (strncmp(begin, "if", size) == 0 && size == 2) { tok->type = TOKEN_IF; } else if (strncmp(begin, "fi", size) == 0 && size == 2) { tok->type = TOKEN_FI; } else if (strncmp(begin, "then", size) == 0 && size == 4) { tok->type = TOKEN_THEN; } else if (strncmp(begin, "else", size) == 0 && size == 4) { tok->type = TOKEN_ELSE; } else if (strncmp(begin, "elif", size) == 0 && size == 4) { tok->type = TOKEN_ELIF; } else if (strncmp(begin, "&&", size) == 0 && size == 2) { tok->type = TOKEN_AND; } else if (strncmp(begin, "||", size) == 0 && size == 2) { tok->type = TOKEN_OR; } // no keywords found. if (tok->type == TOKEN_NULL) return; tok->data = calloc(size + 1, sizeof(char)); if (tok->data == NULL) return; strncpy(tok->data, begin, size); } /* @brief: if an operator is found at [begin], * [tok->token_type] is set accordingly */ static void set_token_operator(struct token *tok, char *begin, ssize_t size) { if (tok->type != TOKEN_NULL) return; if (strncmp(begin, ">", size) == 0) { tok->type = TOKEN_REDIR_RIGHT; } else if (strncmp(begin, "<", size) == 0) { tok->type = TOKEN_REDIR_LEFT; } else if (strncmp(begin, ">>", size) == 0) { tok->type = TOKEN_REDIR_DOUBLE_RIGHT; } else if (strncmp(begin, ">&", size) == 0) { tok->type = TOKEN_REDIR_RIGHT_AMP; } else if (strncmp(begin, ">|", size) == 0) { tok->type = TOKEN_REDIR_RIGHT_PIPE; } else if (strncmp(begin, "<&", size) == 0) { tok->type = TOKEN_REDIR_LEFT_AMP; } else if (strncmp(begin, "<>", size) == 0) { tok->type = TOKEN_REDIR_LEFT_RIGHT; } else if (strncmp(begin, "|", size) == 0) { tok->type = TOKEN_PIPE; } } /* @brief: if token_type has not yet been set, then it is a TOKEN_WORD * Also allocates the data and fills it. */ static void set_token_word(struct token *tok, char *begin, ssize_t size) { if (tok->type == TOKEN_NULL && size != 0) { tok->type = TOKEN_WORD; tok->data = calloc(size + 1, sizeof(char)); if (tok->data == NULL) return; strncpy(tok->data, begin, size); } } /* @brief: Sets the token to an IO number * Also allocates the data and fills it. */ static void set_token_ION(struct token *tok, char *begin, ssize_t size) { if (tok->type == TOKEN_NULL && size != 0) { tok->type = TOKEN_IONUMBER; tok->data = calloc(size + 1, sizeof(char)); if (tok->data == NULL) return; strncpy(tok->data, begin, size); } } /* @brief: check if [c] is a delimiter for end of line. * @return: true if [c] == '\n' or EOF. false otherwise. */ static bool is_end_of_line(char c) { return c == EOF || c == '\n'; } bool is_special_char(char *stream, ssize_t i) { char c = stream[i]; if (c == EOF) return true; if (i > 0 && c == '#' && stream[i - 1] == '$') return false; // the edge case of $# if (i > 0 && stream[i - 1] == '\\') return false; // TODO handle backslash better // this doesnt work, ex : echo \\#comment // (need to count the previous consequtive backslashes) char special_chars[] = "\n'\"`;#|&(){}<>*"; return strchr(special_chars, c) != NULL; } struct token *new_token(char *begin, ssize_t size, bool only_digits) { struct token *tok = calloc(1, sizeof(struct token)); if (tok == NULL) return NULL; if (only_digits) set_token_ION(tok, begin, size); set_token_operator(tok, begin, size); set_token_spechar(tok, begin, size); set_token_keyword(tok, begin, size); set_token_word(tok, begin, size); return tok; } void destroy_lexer_context(struct lexer_context **ctx) { if (ctx == NULL || *ctx == NULL) return; if ((*ctx)->previous_token != NULL) free((*ctx)->previous_token); if ((*ctx)->current_token != NULL) free((*ctx)->current_token); free(*ctx); *ctx = NULL; } void free_token(struct token **tok) { if (tok == NULL || *tok == NULL) return; if ((*tok)->data != NULL) free((*tok)->data); free(*tok); *tok = NULL; } void stream_init(struct lexer_context *ctx) { char *stream; if (ctx->remaining_chars == 0) // at the begining { ctx->remaining_chars = stream_read(&stream); } else { stream = ctx->end_previous_token; } char *trimed_stream = trim_blank_left(stream); ctx->remaining_chars -= trimed_stream - stream; ctx->end_previous_token = trimed_stream; } ssize_t len_op_sepchar(char *stream, ssize_t i) { if (!is_special_char(stream, i)) return -1; // should never happen if (stream[i] != '>' && stream[i] != '<') return 1; // special character (cannot be operator) // operator if (stream[i] == '<') { if (stream[i + 1] == '&' || stream[i + 1] == '>') return 2; // <&, <> } else if (stream[i + 1] == '>' || stream[i + 1] == '|' || stream[i + 1] == '&') return 2; // >>, >&, >| return 1; // >, < } void go_end_of_line(struct lexer_context *ctx) { if (ctx == NULL || ctx->end_previous_token == NULL) return; ssize_t i = 0; while (!is_end_of_line(ctx->end_previous_token[i])) { i++; } ctx->end_previous_token += i; ctx->remaining_chars -= i; } void get_next_stream(struct lexer_context *ctx) { ctx->remaining_chars = 0; stream_init(ctx); }