Merge branch 'lexer' into dev

This commit is contained in:
Matteo Flebus 2026-01-19 17:35:52 +01:00
commit b5267a7e1b
2 changed files with 70 additions and 21 deletions

View file

@ -224,6 +224,36 @@ char *stream_init(void)
return stream; return stream;
} }
/*
* @brief: Updates the lexing_mode to LEXER_NORMAL
* if the SECOND quote is found at stream[i].
* Updates the lexing_mode to the corresponding quote type
* if the FIRST quote of any type is found.
*
* @return: true if an update was done. false otherwise.
*/
static bool update_lexing_mode(char *stream, ssize_t i,
enum lexing_mode *lexing_mode)
{
enum lexing_mode mode_before_update = *lexing_mode;
// SECOND quote
if (*lexing_mode == LEXER_QUOTE && stream[i] == '\'')
*lexing_mode = LEXER_NORMAL;
if (*lexing_mode == LEXER_DOUBLE_QUOTE && stream[i] == '"')
*lexing_mode = LEXER_NORMAL;
// FIRST quote
if (*lexing_mode == LEXER_NORMAL)
{
if (stream[i] == '"')
*lexing_mode = LEXER_DOUBLE_QUOTE;
if (stream[i] == '\'')
*lexing_mode = LEXER_QUOTE;
}
return *lexing_mode != mode_before_update;
}
struct token *peek_token(void) struct token *peek_token(void)
{ {
// we already created the upcoming token during the previous call to peek() // we already created the upcoming token during the previous call to peek()
@ -233,20 +263,26 @@ struct token *peek_token(void)
} }
char *stream = stream_init(); char *stream = stream_init();
ssize_t i = 0; ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars) while (i < remaining_chars)
{ {
if (is_special_char(stream[i])) // true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{ {
if (i == 0) // where we create spe_char token if (is_special_char(stream[i]))
i++; {
break; if (i == 0) // where we create spe_char token
} i++;
if (isblank(stream[i])) break;
{ }
break; if (isblank(stream[i]))
{
break;
}
} }
i++; i++;
} }
@ -266,20 +302,26 @@ struct token *pop_token(void)
return NULL; return NULL;
} }
char *stream = stream_init(); char *stream = stream_init();
ssize_t i = 0; ssize_t i = 0;
// Usefull to know if we are inside a quote or double quote
enum lexing_mode lexing_mode = LEXER_NORMAL;
while (i < remaining_chars) while (i < remaining_chars)
{ {
if (is_special_char(stream[i])) // true if we didn't encounter a quotes of any type at stream[i]
if (!update_lexing_mode(stream, i, &lexing_mode))
{ {
if (i == 0) // where we create spe_char token if (is_special_char(stream[i]))
i++; {
break; if (i == 0) // where we create spe_char token
} i++;
if (isblank(stream[i])) break;
{ }
break; if (isblank(stream[i]))
{
break;
}
} }
i++; i++;
} }

View file

@ -3,6 +3,13 @@
#include <sys/types.h> #include <sys/types.h>
enum lexing_mode
{
LEXER_NORMAL,
LEXER_QUOTE,
LEXER_DOUBLE_QUOTE
};
enum token_type enum token_type
{ {
// Special characters // Special characters
@ -10,8 +17,11 @@ enum token_type
TOKEN_EOF, TOKEN_EOF,
TOKEN_WORD, TOKEN_WORD,
TOKEN_NEWLINE, TOKEN_NEWLINE,
// WARNING: quote and double quote should never be used inside a token.
TOKEN_QUOTE, TOKEN_QUOTE,
TOKEN_DOUBLE_QUOTE, TOKEN_DOUBLE_QUOTE,
TOKEN_GRAVE, TOKEN_GRAVE,
TOKEN_SEMICOLON, TOKEN_SEMICOLON,
TOKEN_COMMENT, TOKEN_COMMENT,
@ -43,8 +53,6 @@ struct token
/* /*
* @brief: returns the next (newly allocated) token without consuming it. * @brief: returns the next (newly allocated) token without consuming it.
* if end of input is reached, enters in EOF looping node,
* returning only the same token of type TOKEN_EOF.
* if end of input is reached, returns a token of type TOKEN_EOF. * if end of input is reached, returns a token of type TOKEN_EOF.
*/ */
struct token *peek_token(void); struct token *peek_token(void);
@ -57,7 +65,6 @@ struct token *peek_token(void);
* @warning: if the last returned token was a token EOF, it frees it * @warning: if the last returned token was a token EOF, it frees it
* and returns NULL. This means that after peeking a token EOF * and returns NULL. This means that after peeking a token EOF
* in the parser, there must be EXACTLY ONE call to pop_token(). * in the parser, there must be EXACTLY ONE call to pop_token().
*
*/ */
struct token *pop_token(void); struct token *pop_token(void);