feat(lexer): quote handling

This commit is contained in:
Matteo Flebus 2026-01-19 17:32:45 +01:00
parent f4d7f58ef5
commit c81afc2c69
2 changed files with 127 additions and 77 deletions

View file

@ -224,6 +224,36 @@ char *stream_init(void)
return stream;
}
/*
* @brief: Updates the lexing_mode to LEXER_NORMAL
* if the SECOND quote is found at stream[i].
* Updates the lexing_mode to the corresponding quote type
* if the FIRST quote of any type is found.
*
* @return: true if an update was done. false otherwise.
*/
static bool update_lexing_mode(char *stream, ssize_t i,
enum lexing_mode *lexing_mode)
{
enum lexing_mode mode_before_update = *lexing_mode;
// SECOND quote
if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'')
*lexing_mode = LEXER_NORMAL;
if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"')
*lexing_mode = LEXER_NORMAL;
// FIRST quote
if (*lexing_mode == LEXER_NORMAL)
{
if (stream[i] == '"')
*lexing_mode = LEXER_DOUBLE_QUOTE;
if (stream[i] == '\'')
*lexing_mode = LEXER_QUOTE;
}
return *lexing_mode != mode_before_update;
}
struct token *peek_token(void)
{
// we already created the upcoming token during the previous call to peek()
@ -233,10 +263,15 @@ struct token *peek_token(void)
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (is_special_char(stream[i]))
{
@ -248,6 +283,7 @@ struct token *peek_token(void)
{
break;
}
}
i++;
}
@ -266,10 +302,15 @@ struct token *pop_token(void)
return NULL;
}
char *stream = stream_init();
ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars)
{
// true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{
if (is_special_char(stream[i]))
{
@ -281,6 +322,7 @@ struct token *pop_token(void)
{
break;
}
}
i++;
}

View file

@ -3,6 +3,13 @@
#include <sys/types.h>
enum lexing_mode
{
LEXER_NORMAL,
LEXER_QUOTE,
LEXER_DOUBLE_QUOTE
};
enum token_type
{
// Special characters
@ -10,8 +17,12 @@ enum token_type
TOKEN_EOF,
TOKEN_WORD,
TOKEN_NEWLINE,
// WARNING: quote and double quote should never be used inside a token.
// Its only use is to know if we are inside a quote, and which type of quote
TOKEN_QUOTE,
TOKEN_DOUBLE_QUOTE,
TOKEN_GRAVE,
TOKEN_SEMICOLON,
TOKEN_COMMENT,
@ -43,8 +54,6 @@ struct token
/*
* @brief: returns the next (newly allocated) token without consuming it.
* if end of input is reached, enters in EOF looping node,
* returning only the same token of type TOKEN_EOF.
* if end of input is reached, returns a token of type TOKEN_EOF.
*/
struct token *peek_token(void);
@ -57,7 +66,6 @@ struct token *peek_token(void);
* @warning: if the last returned token was a token EOF, it frees it
* and returns NULL. This means that after peeking a token EOF
* in the parser, there must be EXACTLY ONE call to pop_token().
*
*/
struct token *pop_token(void);