feat(lexer): quote handling

This commit is contained in:
Matteo Flebus 2026-01-19 17:32:45 +01:00
parent f4d7f58ef5
commit c81afc2c69
2 changed files with 127 additions and 77 deletions

View file

@ -69,62 +69,62 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size)
return;
switch (begin[0])
{
case EOF:
tok->type = TOKEN_EOF;
break;
case ';':
tok->type = TOKEN_SEMICOLON;
break;
case '\n':
tok->type = TOKEN_NEWLINE;
break;
case '\'':
tok->type = TOKEN_QUOTE;
break;
case '"':
tok->type = TOKEN_DOUBLE_QUOTE;
break;
case '`':
tok->type = TOKEN_GRAVE;
break;
case '#':
tok->type = TOKEN_COMMENT;
break;
case '|':
tok->type = TOKEN_PIPE;
break;
case '&':
tok->type = TOKEN_AMPERSAND;
break;
case '\\':
tok->type = TOKEN_BACKSLASH;
break;
case '$':
tok->type = TOKEN_DOLLAR;
break;
case '(':
tok->type = TOKEN_LEFT_PAREN;
break;
case ')':
tok->type = TOKEN_RIGHT_PAREN;
break;
case '{':
tok->type = TOKEN_LEFT_BRACKET;
break;
case '}':
tok->type = TOKEN_RIGHT_BRACKET;
break;
case '<':
tok->type = TOKEN_LESS;
break;
case '>':
tok->type = TOKEN_GREATER;
break;
case '*':
tok->type = TOKEN_STAR;
break;
default:
break;
case EOF:
tok->type = TOKEN_EOF;
break;
case ';':
tok->type = TOKEN_SEMICOLON;
break;
case '\n':
tok->type = TOKEN_NEWLINE;
break;
case '\'':
tok->type = TOKEN_QUOTE;
break;
case '"':
tok->type = TOKEN_DOUBLE_QUOTE;
break;
case '`':
tok->type = TOKEN_GRAVE;
break;
case '#':
tok->type = TOKEN_COMMENT;
break;
case '|':
tok->type = TOKEN_PIPE;
break;
case '&':
tok->type = TOKEN_AMPERSAND;
break;
case '\\':
tok->type = TOKEN_BACKSLASH;
break;
case '$':
tok->type = TOKEN_DOLLAR;
break;
case '(':
tok->type = TOKEN_LEFT_PAREN;
break;
case ')':
tok->type = TOKEN_RIGHT_PAREN;
break;
case '{':
tok->type = TOKEN_LEFT_BRACKET;
break;
case '}':
tok->type = TOKEN_RIGHT_BRACKET;
break;
case '<':
tok->type = TOKEN_LESS;
break;
case '>':
tok->type = TOKEN_GREATER;
break;
case '*':
tok->type = TOKEN_STAR;
break;
default:
break;
}
}
@ -224,6 +224,36 @@ char *stream_init(void)
return stream;
}
/*
* @brief: Updates the lexing_mode to LEXER_NORMAL
* if the SECOND quote is found at stream[i].
* Updates the lexing_mode to the corresponding quote type
* if the FIRST quote of any type is found.
*
* @return: true if an update was done. false otherwise.
*/
static bool update_lexing_mode(char *stream, ssize_t i,
enum lexing_mode *lexing_mode)
{
enum lexing_mode mode_before_update = *lexing_mode;
// SECOND quote
if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'')
*lexing_mode = LEXER_NORMAL;
if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"')
*lexing_mode = LEXER_NORMAL;
// FIRST quote
if (*lexing_mode == LEXER_NORMAL)
{
if (stream[i] == '"')
*lexing_mode = LEXER_DOUBLE_QUOTE;
if (stream[i] == '\'')
*lexing_mode = LEXER_QUOTE;
}
return *lexing_mode != mode_before_update;
}
struct token *peek_token(void)
{
// we already created the upcoming token during the previous call to peek()
@ -233,20 +263,26 @@ struct token *peek_token(void)
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
if (is_special_char(stream[i]))
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
if (is_special_char(stream[i]))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
}
}
i++;
}
@ -266,20 +302,26 @@ struct token *pop_token(void)
return NULL;
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
if (is_special_char(stream[i]))
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
if (is_special_char(stream[i]))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
}
}
i++;
}

View file

@ -3,6 +3,13 @@
#include <sys/types.h>
enum lexing_mode
{
LEXER_NORMAL,
LEXER_QUOTE,
LEXER_DOUBLE_QUOTE
};
enum token_type
{
// Special characters
@ -10,8 +17,12 @@ enum token_type
TOKEN_EOF,
TOKEN_WORD,
TOKEN_NEWLINE,
// WARNING: quote and double quote should never be used inside a token.
// Its only use is to know if we are inside a quote, and which type of quote
TOKEN_QUOTE,
TOKEN_DOUBLE_QUOTE,
TOKEN_GRAVE,
TOKEN_SEMICOLON,
TOKEN_COMMENT,
@ -43,8 +54,6 @@ struct token
/*
* @brief: returns the next (newly allocated) token without consuming it.
* if end of input is reached, enters in EOF looping node,
* returning only the same token of type TOKEN_EOF.
* if end of input is reached, returns a token of type TOKEN_EOF.
*/
struct token *peek_token(void);
@ -57,7 +66,6 @@ struct token *peek_token(void);
* @warning: if the last returned token was a token EOF, it frees it
* and returns NULL. This means that after peeking a token EOF
* in the parser, there must be EXACTLY ONE call to pop_token().
*
*/
struct token *pop_token(void);