diff options
author | Carlos Maniero <carlos@maniero.me> | 2023-05-02 23:45:52 -0300 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-05-03 22:52:41 +0200 |
commit | b18a53b912ae66ad2bb23985640c9fac56ced358 (patch) | |
tree | eadc80d198b9ddab5f1fe77377e1249a3ec3259f | |
parent | e623c701d2ef41cf4993590e2932c7538c83fc54 (diff) |
parser: Split block into small functions
Since it is possible to look a future token without consuming it, it was
possible to split the block parser into small chunks of code.
There is the performance drawback, because now the parser makes multiple
lookups to the same token. However IMO that it is not a big concern
given the small computation required to get a token. Also it can be
easily addressed by computing all token in advance.
Memory Leak:
During the refactor I found some extra memory leaks related to not
released scopes. So then, more than just printing a message I introduced
an assert on scope.c to make sure developers will get this feedback asap
because our testing framework suppress messages from stderr when the
test passes.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r-- | src/parser.c | 184 | ||||
-rw-r--r-- | src/scope.c | 2 | ||||
-rw-r--r-- | test/parser_test.c | 4 |
3 files changed, 118 insertions, 72 deletions
diff --git a/src/parser.c b/src/parser.c index 5f7a709..d5ffa21 100644 --- a/src/parser.c +++ b/src/parser.c @@ -316,6 +316,83 @@ parser_parse_variable_definition(parser_t *parser, ast_node_t *node) return true; } +static bool +is_next_statement_a_variable_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + if (token.kind != TOKEN_NAME) { + return false; + } + + lexer_lookahead(parser->lexer, &token, 2); + + return token.kind == TOKEN_COLON; +} + +static bool +is_next_statement_a_variable_assignement(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + if (token.kind != TOKEN_NAME) { + return false; + } + + lexer_lookahead(parser->lexer, &token, 2); + + return token.kind == TOKEN_EQUAL; +} + +static bool +is_next_statement_return(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_RETURN; +} + +static bool +is_block_end(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF; +} + +static void +parser_error_report_unexpected_token(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + parser_error_t *error = &parser->errors[parser->errors_len++]; + error->token = token; + + sprintf( + error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value)); +} + +static bool +parser_ensure_function_return_statement(parser_t *parser, vector_t *body, token_t *function_token) +{ + ast_node_t *latest_node = vector_at(body, body->size - 1); + + if (latest_node->kind != AST_RETURN_STMT) { + parser_error_t error; + error.token = *function_token; + + sprintf(error.message, "expected 'return' keyword."); + + parser->errors[parser->errors_len++] = error; + vector_destroy(body); + return false; + } + + return true; +} + static vector_t * parser_parse_block_declarations(parser_t *parser) { @@ -327,18 +404,11 @@ parser_parse_block_declarations(parser_t *parser) lexer_peek_next_token(parser->lexer, ¤t_token); scope_enter(parser->scope); - vector_t *body = vector_new(); - while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) { - if (current_token.kind != TOKEN_NAME && current_token.kind != TOKEN_KEYWORD_RETURN) { - parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_NAME); - scope_leave(parser->scope); - vector_destroy(body); - return NULL; - } + while (!is_block_end(parser)) { - if (current_token.kind == TOKEN_KEYWORD_RETURN) { + if (is_next_statement_return(parser)) { ast_node_t *return_node = parser_parse_return_stmt(parser); if (return_node == NULL) { @@ -348,82 +418,51 @@ parser_parse_block_declarations(parser_t *parser) } vector_push_back(body, return_node); - } else { - token_t token; - lexer_lookahead(parser->lexer, &token, 2); - - switch (token.kind) { - case TOKEN_COLON: { - ast_node_t *variable_node = ast_node_new(); - - if (!parser_parse_variable_definition(parser, variable_node)) { - ast_node_destroy(variable_node); - vector_destroy(body); - return NULL; - } - - vector_push_back(body, variable_node); - break; - } - case TOKEN_EQUAL: { - ast_node_t *variable_assignment = ast_node_new(); - - if (!parser_parse_variable_assignment(parser, variable_assignment)) { - ast_node_destroy(variable_assignment); - vector_destroy(body); - return NULL; - } - - vector_push_back(body, variable_assignment); - break; - } - case TOKEN_NAME: - case TOKEN_OPAREN: - case TOKEN_CPAREN: - case TOKEN_SEMICOLON: - case TOKEN_OCURLY: - case TOKEN_CCURLY: - case TOKEN_NUMBER: - case TOKEN_PLUS: - case TOKEN_KEYWORD_RETURN: - case TOKEN_MINUS: - case TOKEN_STAR: - case TOKEN_SLASH: - case TOKEN_EOF: - case TOKEN_UNKNOWN: - // FIXME: Show an error it means syntax error - lexer_drop_next_token(parser->lexer); - break; + continue; + } + + if (is_next_statement_a_variable_declaration(parser)) { + ast_node_t *variable_node = ast_node_new(); + + if (!parser_parse_variable_definition(parser, variable_node)) { + scope_leave(parser->scope); + ast_node_destroy(variable_node); + vector_destroy(body); + return NULL; } + + vector_push_back(body, variable_node); + continue; } - lexer_peek_next_token(parser->lexer, ¤t_token); - } + if (is_next_statement_a_variable_assignement(parser)) { + ast_node_t *variable_assignment = ast_node_new(); - lexer_next_token(parser->lexer, ¤t_token); + if (!parser_parse_variable_assignment(parser, variable_assignment)) { + scope_leave(parser->scope); + ast_node_destroy(variable_assignment); + vector_destroy(body); + return NULL; + } + + vector_push_back(body, variable_assignment); + continue; + } + + parser_error_report_unexpected_token(parser); - if (current_token.kind == TOKEN_EOF) { - parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_CCURLY); scope_leave(parser->scope); vector_destroy(body); return NULL; } - ast_node_t *latest_node = vector_at(body, body->size - 1); - - if (latest_node->kind != AST_RETURN_STMT) { - parser_error_t error; - error.token = current_token; - - sprintf(error.message, "expected 'return' keyword."); + scope_leave(parser->scope); - parser->errors[parser->errors_len++] = error; - scope_leave(parser->scope); + if (!drop_expected_token(parser, TOKEN_CCURLY)) { vector_destroy(body); return NULL; } - scope_leave(parser->scope); return body; } @@ -462,6 +501,11 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } + if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) { + vector_destroy(body); + return NULL; + } + ast_node_t *node = ast_node_new(); ast_node_init_function_declaration(node, func_name_token.value, return_type, body); diff --git a/src/scope.c b/src/scope.c index 662e59c..6338e60 100644 --- a/src/scope.c +++ b/src/scope.c @@ -15,6 +15,7 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ #include "scope.h" +#include <assert.h> #include <errno.h> #include <stdio.h> #include <string.h> @@ -58,6 +59,7 @@ scope_destroy(scope_t *scope) fprintf(stderr, "Stack not cleaned before destroying. This may lead to memory leaks.\n" "Please make sure to call the leave function before destroying it."); + assert(scope->stack->size == 1); } for (size_t i = 0; i < scope->stack->size; i++) { diff --git a/test/parser_test.c b/test/parser_test.c index 3f814c6..ce77828 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -210,7 +210,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or assert_parser_error("main() i32 { return 42; }", "expected ':' but got 'TOKEN_NAME'"); assert_parser_error("main(): { return 42; }", "expected 'TOKEN_NAME' but got '{'"); assert_parser_error("main(): i32 return 42; }", "expected '{' but got 'return'"); - assert_parser_error("main(): i32 { 42; }", "expected 'TOKEN_NAME' but got 'TOKEN_NUMBER'"); + assert_parser_error("main(): i32 { 42; }", "unexpected token 'TOKEN_NUMBER' value='42'"); assert_parser_error("main(): i32 { return; }", "unexpected '; (;)' token"); assert_parser_error("main(): i32 { return 42;", "expected '}' but got end of file"); assert_parser_error("main(): beff { return 42; }", "type 'beff' is not defined"); @@ -218,7 +218,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or assert_parser_error("main(): i32 { b = 1; return b; }", "trying to assign 'b' before defining it."); // FIXME: once function calls are implemented, this error should inform that // neither a variable or function call was found. - assert_parser_error("main(): i32 { oxi 42; }", "expected 'TOKEN_NAME' but got 'TOKEN_NUMBER'"); + assert_parser_error("main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'"); return MUNIT_OK; } |