summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Maniero <carlos@maniero.me>2023-05-02 23:45:51 -0300
committerJohnny Richard <johnny@johnnyrichard.com>2023-05-03 22:45:23 +0200
commite623c701d2ef41cf4993590e2932c7538c83fc54 (patch)
treee096ead143fe76d73df5789a3aee5b2b6358c602
parent990f4d3e4c662c401a08e3704a39878fd6c1c1b6 (diff)
parser: Use lookahead instead of consuming tokens
Previously, during block declaration, the parser consumed the token which caused some parsers (such as return and variable declaration) to not be self-contained and to depend on the callee to start the parser. In this commit, I've refactored the parser to only look for future tokens using lookahead, and delegate the consumption to child parser functions. This results in a more modular and self-contained parser that improves the overall maintainability and readability of the code. Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r--src/lexer.c13
-rw-r--r--src/lexer.h3
-rw-r--r--src/parser.c35
-rw-r--r--test/parser_test.c3
4 files changed, 43 insertions, 11 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 9f2a57b..7245a41 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -229,13 +229,16 @@ lexer_load_file_contents(lexer_t *lexer)
}
void
-lexer_peek_next_token(lexer_t *lexer, token_t *token)
+lexer_lookahead(lexer_t *lexer, token_t *token, size_t level)
{
uint32_t cur = lexer->cur;
uint32_t row = lexer->row;
uint32_t bol = lexer->bol;
- lexer_next_token(lexer, token);
+ while (level != 0) {
+ lexer_next_token(lexer, token);
+ level--;
+ }
lexer->cur = cur;
lexer->row = row;
@@ -243,6 +246,12 @@ lexer_peek_next_token(lexer_t *lexer, token_t *token)
}
void
+lexer_peek_next_token(lexer_t *lexer, token_t *token)
+{
+ lexer_lookahead(lexer, token, 1);
+}
+
+void
lexer_drop_next_token(lexer_t *lexer)
{
token_t token;
diff --git a/src/lexer.h b/src/lexer.h
index 70d4b0d..9aa8efe 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -89,6 +89,9 @@ lexer_drop_next_token(lexer_t *lexer);
void
lexer_peek_next_token(lexer_t *lexer, token_t *token);
+void
+lexer_lookahead(lexer_t *lexer, token_t *token, size_t level);
+
char *
token_kind_to_str(token_kind_t kind);
diff --git a/src/parser.c b/src/parser.c
index 8218c87..5f7a709 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -224,6 +224,10 @@ parser_parse_expression(parser_t *parser, ast_node_t *node)
static ast_node_t *
parser_parse_return_stmt(parser_t *parser)
{
+ if (!drop_expected_token(parser, TOKEN_KEYWORD_RETURN)) {
+ return NULL;
+ }
+
ast_node_t *argument_token = ast_node_new();
if (!parser_parse_expression(parser, argument_token)) {
ast_node_destroy(argument_token);
@@ -241,8 +245,14 @@ parser_parse_return_stmt(parser_t *parser)
}
static bool
-parser_parse_variable_assignment(parser_t *parser, token_t variable_token, ast_node_t *node)
+parser_parse_variable_assignment(parser_t *parser, ast_node_t *node)
{
+ token_t variable_token;
+
+ if (!expected_token(&variable_token, parser, TOKEN_NAME)) {
+ return false;
+ }
+
if (!drop_expected_token(parser, TOKEN_EQUAL))
return false;
@@ -275,8 +285,13 @@ parser_parse_variable_assignment(parser_t *parser, token_t variable_token, ast_n
}
static bool
-parser_parse_variable_definition(parser_t *parser, string_view_t variable_name, ast_node_t *node)
+parser_parse_variable_definition(parser_t *parser, ast_node_t *node)
{
+ token_t variable_name;
+ if (!expected_token(&variable_name, parser, TOKEN_NAME)) {
+ return false;
+ }
+
if (!drop_expected_token(parser, TOKEN_COLON))
return false;
@@ -295,7 +310,7 @@ parser_parse_variable_definition(parser_t *parser, string_view_t variable_name,
return false;
}
- ast_node_init_variable_declaration(node, variable_name, type, expression);
+ ast_node_init_variable_declaration(node, variable_name.value, type, expression);
scope_push(parser->scope, &node->data.variable_declaration.identifier, node);
return true;
@@ -309,7 +324,7 @@ parser_parse_block_declarations(parser_t *parser)
}
token_t current_token;
- lexer_next_token(parser->lexer, &current_token);
+ lexer_peek_next_token(parser->lexer, &current_token);
scope_enter(parser->scope);
@@ -335,13 +350,13 @@ parser_parse_block_declarations(parser_t *parser)
vector_push_back(body, return_node);
} else {
token_t token;
- lexer_peek_next_token(parser->lexer, &token);
+ lexer_lookahead(parser->lexer, &token, 2);
switch (token.kind) {
case TOKEN_COLON: {
ast_node_t *variable_node = ast_node_new();
- if (!parser_parse_variable_definition(parser, current_token.value, variable_node)) {
+ if (!parser_parse_variable_definition(parser, variable_node)) {
ast_node_destroy(variable_node);
vector_destroy(body);
return NULL;
@@ -353,7 +368,7 @@ parser_parse_block_declarations(parser_t *parser)
case TOKEN_EQUAL: {
ast_node_t *variable_assignment = ast_node_new();
- if (!parser_parse_variable_assignment(parser, current_token, variable_assignment)) {
+ if (!parser_parse_variable_assignment(parser, variable_assignment)) {
ast_node_destroy(variable_assignment);
vector_destroy(body);
return NULL;
@@ -376,13 +391,17 @@ parser_parse_block_declarations(parser_t *parser)
case TOKEN_SLASH:
case TOKEN_EOF:
case TOKEN_UNKNOWN:
+ // FIXME: Show an error it means syntax error
+ lexer_drop_next_token(parser->lexer);
break;
}
}
- lexer_next_token(parser->lexer, &current_token);
+ lexer_peek_next_token(parser->lexer, &current_token);
}
+ lexer_next_token(parser->lexer, &current_token);
+
if (current_token.kind == TOKEN_EOF) {
parser_error_push_unexpected_kind(parser, &current_token, TOKEN_CCURLY);
scope_leave(parser->scope);
diff --git a/test/parser_test.c b/test/parser_test.c
index 52437ce..3f814c6 100644
--- a/test/parser_test.c
+++ b/test/parser_test.c
@@ -67,7 +67,8 @@ test_parse_function(const MunitParameter params[], void *user_data_or_fixture)
make_lexer_from_static_src(&lexer, "main(): i32 { \nreturn 42;\n }");
parser_init(&parser, &lexer, scope);
ast_node_t *ast_function = parser_parse_function_declaration(&parser);
- assert_true(ast_function != NULL);
+
+ assert_not_null(ast_function);
char actual[5];