feat(lexer): quote handling

This commit is contained in:
Matteo Flebus 2026-01-19 17:32:45 +01:00
parent f4d7f58ef5
commit c81afc2c69
2 changed files with 127 additions and 77 deletions

View file

@ -69,62 +69,62 @@ static void set_token_spechar(struct token *tok, char *begin, ssize_t size)
return;
switch (begin[0])
{
case EOF:
tok->type = TOKEN_EOF;
break;
case ';':
tok->type = TOKEN_SEMICOLON;
break;
case '\n':
tok->type = TOKEN_NEWLINE;
break;
case '\'':
tok->type = TOKEN_QUOTE;
break;
case '"':
tok->type = TOKEN_DOUBLE_QUOTE;
break;
case '`':
tok->type = TOKEN_GRAVE;
break;
case '#':
tok->type = TOKEN_COMMENT;
break;
case '|':
tok->type = TOKEN_PIPE;
break;
case '&':
tok->type = TOKEN_AMPERSAND;
break;
case '\\':
tok->type = TOKEN_BACKSLASH;
break;
case '$':
tok->type = TOKEN_DOLLAR;
break;
case '(':
tok->type = TOKEN_LEFT_PAREN;
break;
case ')':
tok->type = TOKEN_RIGHT_PAREN;
break;
case '{':
tok->type = TOKEN_LEFT_BRACKET;
break;
case '}':
tok->type = TOKEN_RIGHT_BRACKET;
break;
case '<':
tok->type = TOKEN_LESS;
break;
case '>':
tok->type = TOKEN_GREATER;
break;
case '*':
tok->type = TOKEN_STAR;
break;
default:
break;
case EOF:
tok->type = TOKEN_EOF;
break;
case ';':
tok->type = TOKEN_SEMICOLON;
break;
case '\n':
tok->type = TOKEN_NEWLINE;
break;
case '\'':
tok->type = TOKEN_QUOTE;
break;
case '"':
tok->type = TOKEN_DOUBLE_QUOTE;
break;
case '`':
tok->type = TOKEN_GRAVE;
break;
case '#':
tok->type = TOKEN_COMMENT;
break;
case '|':
tok->type = TOKEN_PIPE;
break;
case '&':
tok->type = TOKEN_AMPERSAND;
break;
case '\\':
tok->type = TOKEN_BACKSLASH;
break;
case '$':
tok->type = TOKEN_DOLLAR;
break;
case '(':
tok->type = TOKEN_LEFT_PAREN;
break;
case ')':
tok->type = TOKEN_RIGHT_PAREN;
break;
case '{':
tok->type = TOKEN_LEFT_BRACKET;
break;
case '}':
tok->type = TOKEN_RIGHT_BRACKET;
break;
case '<':
tok->type = TOKEN_LESS;
break;
case '>':
tok->type = TOKEN_GREATER;
break;
case '*':
tok->type = TOKEN_STAR;
break;
default:
break;
}
}
@ -224,6 +224,36 @@ char *stream_init(void)
return stream;
}
/*
* @brief: Updates the lexing_mode to LEXER_NORMAL
* if the SECOND quote is found at stream[i].
* Updates the lexing_mode to the corresponding quote type
* if the FIRST quote of any type is found.
*
* @return: true if an update was done. false otherwise.
*/
static bool update_lexing_mode(char *stream, ssize_t i,
enum lexing_mode *lexing_mode)
{
enum lexing_mode mode_before_update = *lexing_mode;
// SECOND quote
if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'')
*lexing_mode = LEXER_NORMAL;
if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"')
*lexing_mode = LEXER_NORMAL;
// FIRST quote
if (*lexing_mode == LEXER_NORMAL)
{
if (stream[i] == '"')
*lexing_mode = LEXER_DOUBLE_QUOTE;
if (stream[i] == '\'')
*lexing_mode = LEXER_QUOTE;
}
return *lexing_mode != mode_before_update;
}
struct token *peek_token(void)
{
// we already created the upcoming token during the previous call to peek()
@ -233,20 +263,26 @@ struct token *peek_token(void)
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
if (is_special_char(stream[i]))
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
if (is_special_char(stream[i]))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
}
}
i++;
}
@ -266,20 +302,26 @@ struct token *pop_token(void)
return NULL;
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
if (is_special_char(stream[i]))
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
if (is_special_char(stream[i]))
{
if (i == 0) // where we create spe_char token
i++;
break;
}
if (isblank(stream[i]))
{
break;
}
}
i++;
}