42sh/src/lexer/lexer_utils.h

#ifndef LEXER_UTILS_H
#define LEXER_UTILS_H

#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>

struct lexer_context
{
    char *end_previous_token;
    ssize_t remaining_chars;

    struct token *previous_token;
    struct token *current_token;
};

/* @brief: frees all fields of ctx and sets ctx to NULL.
 */
void destroy_lexer_context(struct lexer_context **ctx);

enum lexing_mode
{
    LEXER_NORMAL,
    LEXER_QUOTE,
    LEXER_DOUBLE_QUOTE
};

enum token_type
{
    // Blanks
    TOKEN_NULL = 0,
    TOKEN_EOF,
    TOKEN_NEWLINE,

    // words
    TOKEN_WORD,
    TOKEN_ASSIGNMENT_WORD,

    // Special characters
    TOKEN_GRAVE,
    TOKEN_SEMICOLON,
    TOKEN_COMMENT,
    TOKEN_STAR,
    TOKEN_BACKSLASH,
    TOKEN_DOLLAR,
    TOKEN_LEFT_PAREN,
    TOKEN_RIGHT_PAREN,
    TOKEN_LEFT_BRACKET,
    TOKEN_RIGHT_BRACKET,
    TOKEN_PIPE,
    TOKEN_NEGATION,

    // Redirections
    TOKEN_REDIR_LEFT,
    TOKEN_REDIR_RIGHT,
    TOKEN_REDIR_LEFT_RIGHT,
    TOKEN_REDIR_DOUBLE_RIGHT,
    TOKEN_REDIR_LEFT_AMP,
    TOKEN_REDIR_RIGHT_AMP,
    TOKEN_REDIR_RIGHT_PIPE,

    TOKEN_IONUMBER,

    // Keywords
    TOKEN_IF,
    TOKEN_THEN,
    TOKEN_ELSE,
    TOKEN_FI,
    TOKEN_ELIF,
    TOKEN_AND,
    TOKEN_OR,
    TOKEN_FOR,
    TOKEN_WHILE,
    TOKEN_UNTIL,
    TOKEN_CASE,
    TOKEN_EXPORT
};

struct token
{
    enum token_type type;
    char *data;
};

// used to give info from lexing when creating a new token.
struct token_info
{
    // usefull to detect IO numbers.
    // tells us if we only lexed digits in current token.
    bool only_digits;

    // usefull to detect assignments, and syntax errors with '='.
    bool has_equal;
};

/* @return: true if a special character from the grammar was found at stream[i],
 *          false otherwise.
 */
bool is_special_char(char *stream, ssize_t i);

/* @brief: return a newly allocated token, with the type corresponding
 *         to the info given in arguments.
 *         The data contains [size] char, starting from [begin].
 *
 * @return: NULL on error, a token otherwise.
 */
struct token *new_token(char *begin, ssize_t size, struct token_info *info);

/* @brief: frees the token given in argument
 */
void free_token(struct token **tok);

/*
 * @brief: checks if the stream used for the last token creation is empty.
 *         If it is, it calls stream_read() from IO_backend,
 *         and sets [remaining_chars].
 *         If not, it starts from the end of the last token.
 *         Also trims left blanks before returning.
 *
 * @return: char* stream from which we tokenise.
 */
void stream_init(struct lexer_context *ctx);

/* @brief: finds the next '\n' or EOF character,
 *         starting at [ctx->end_previous_token],
 *         and updates the stream and remaining_chars accordingly.
 *
 * @note: Daft Punk. bang.
 */
void go_end_of_line(struct lexer_context *ctx);

/* @brief: this function is called when we found a special character
 *         in the stream. This can either be an operator (ig '>>' or '<&' etc),
 *         or a special char (ig '\' or '#' etc).
 * @return: the length of the operator/special char found (can be 1, 2 or 3).
 *          -1 on error.
 */
ssize_t len_op_sepchar(char *stream, ssize_t i);

/* @brief: drops the current stream and asks IOB for a new one
 */
void get_next_stream(struct lexer_context *ctx);

#endif /* LEXER_UTILS_H */