From b18a53b912ae66ad2bb23985640c9fac56ced358 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Tue, 2 May 2023 23:45:52 -0300 Subject: parser: Split block into small functions Since it is possible to look a future token without consuming it, it was possible to split the block parser into small chunks of code. There is the performance drawback, because now the parser makes multiple lookups to the same token. However IMO that it is not a big concern given the small computation required to get a token. Also it can be easily addressed by computing all token in advance. Memory Leak: During the refactor I found some extra memory leaks related to not released scopes. So then, more than just printing a message I introduced an assert on scope.c to make sure developers will get this feedback asap because our testing framework suppress messages from stderr when the test passes. Signed-off-by: Carlos Maniero --- src/parser.c | 184 ++++++++++++++++++++++++++++++++++++----------------------- src/scope.c | 2 + 2 files changed, 116 insertions(+), 70 deletions(-) (limited to 'src') diff --git a/src/parser.c b/src/parser.c index 5f7a709..d5ffa21 100644 --- a/src/parser.c +++ b/src/parser.c @@ -316,6 +316,83 @@ parser_parse_variable_definition(parser_t *parser, ast_node_t *node) return true; } +static bool +is_next_statement_a_variable_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + if (token.kind != TOKEN_NAME) { + return false; + } + + lexer_lookahead(parser->lexer, &token, 2); + + return token.kind == TOKEN_COLON; +} + +static bool +is_next_statement_a_variable_assignement(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + if (token.kind != TOKEN_NAME) { + return false; + } + + lexer_lookahead(parser->lexer, &token, 2); + + return token.kind == TOKEN_EQUAL; +} + +static bool +is_next_statement_return(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_RETURN; +} + +static bool +is_block_end(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF; +} + +static void +parser_error_report_unexpected_token(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + parser_error_t *error = &parser->errors[parser->errors_len++]; + error->token = token; + + sprintf( + error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value)); +} + +static bool +parser_ensure_function_return_statement(parser_t *parser, vector_t *body, token_t *function_token) +{ + ast_node_t *latest_node = vector_at(body, body->size - 1); + + if (latest_node->kind != AST_RETURN_STMT) { + parser_error_t error; + error.token = *function_token; + + sprintf(error.message, "expected 'return' keyword."); + + parser->errors[parser->errors_len++] = error; + vector_destroy(body); + return false; + } + + return true; +} + static vector_t * parser_parse_block_declarations(parser_t *parser) { @@ -327,18 +404,11 @@ parser_parse_block_declarations(parser_t *parser) lexer_peek_next_token(parser->lexer, ¤t_token); scope_enter(parser->scope); - vector_t *body = vector_new(); - while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) { - if (current_token.kind != TOKEN_NAME && current_token.kind != TOKEN_KEYWORD_RETURN) { - parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_NAME); - scope_leave(parser->scope); - vector_destroy(body); - return NULL; - } + while (!is_block_end(parser)) { - if (current_token.kind == TOKEN_KEYWORD_RETURN) { + if (is_next_statement_return(parser)) { ast_node_t *return_node = parser_parse_return_stmt(parser); if (return_node == NULL) { @@ -348,82 +418,51 @@ parser_parse_block_declarations(parser_t *parser) } vector_push_back(body, return_node); - } else { - token_t token; - lexer_lookahead(parser->lexer, &token, 2); - - switch (token.kind) { - case TOKEN_COLON: { - ast_node_t *variable_node = ast_node_new(); - - if (!parser_parse_variable_definition(parser, variable_node)) { - ast_node_destroy(variable_node); - vector_destroy(body); - return NULL; - } - - vector_push_back(body, variable_node); - break; - } - case TOKEN_EQUAL: { - ast_node_t *variable_assignment = ast_node_new(); - - if (!parser_parse_variable_assignment(parser, variable_assignment)) { - ast_node_destroy(variable_assignment); - vector_destroy(body); - return NULL; - } - - vector_push_back(body, variable_assignment); - break; - } - case TOKEN_NAME: - case TOKEN_OPAREN: - case TOKEN_CPAREN: - case TOKEN_SEMICOLON: - case TOKEN_OCURLY: - case TOKEN_CCURLY: - case TOKEN_NUMBER: - case TOKEN_PLUS: - case TOKEN_KEYWORD_RETURN: - case TOKEN_MINUS: - case TOKEN_STAR: - case TOKEN_SLASH: - case TOKEN_EOF: - case TOKEN_UNKNOWN: - // FIXME: Show an error it means syntax error - lexer_drop_next_token(parser->lexer); - break; + continue; + } + + if (is_next_statement_a_variable_declaration(parser)) { + ast_node_t *variable_node = ast_node_new(); + + if (!parser_parse_variable_definition(parser, variable_node)) { + scope_leave(parser->scope); + ast_node_destroy(variable_node); + vector_destroy(body); + return NULL; } + + vector_push_back(body, variable_node); + continue; } - lexer_peek_next_token(parser->lexer, ¤t_token); - } + if (is_next_statement_a_variable_assignement(parser)) { + ast_node_t *variable_assignment = ast_node_new(); - lexer_next_token(parser->lexer, ¤t_token); + if (!parser_parse_variable_assignment(parser, variable_assignment)) { + scope_leave(parser->scope); + ast_node_destroy(variable_assignment); + vector_destroy(body); + return NULL; + } + + vector_push_back(body, variable_assignment); + continue; + } + + parser_error_report_unexpected_token(parser); - if (current_token.kind == TOKEN_EOF) { - parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_CCURLY); scope_leave(parser->scope); vector_destroy(body); return NULL; } - ast_node_t *latest_node = vector_at(body, body->size - 1); - - if (latest_node->kind != AST_RETURN_STMT) { - parser_error_t error; - error.token = current_token; - - sprintf(error.message, "expected 'return' keyword."); + scope_leave(parser->scope); - parser->errors[parser->errors_len++] = error; - scope_leave(parser->scope); + if (!drop_expected_token(parser, TOKEN_CCURLY)) { vector_destroy(body); return NULL; } - scope_leave(parser->scope); return body; } @@ -462,6 +501,11 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } + if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) { + vector_destroy(body); + return NULL; + } + ast_node_t *node = ast_node_new(); ast_node_init_function_declaration(node, func_name_token.value, return_type, body); diff --git a/src/scope.c b/src/scope.c index 662e59c..6338e60 100644 --- a/src/scope.c +++ b/src/scope.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ #include "scope.h" +#include #include #include #include @@ -58,6 +59,7 @@ scope_destroy(scope_t *scope) fprintf(stderr, "Stack not cleaned before destroying. This may lead to memory leaks.\n" "Please make sure to call the leave function before destroying it."); + assert(scope->stack->size == 1); } for (size_t i = 0; i < scope->stack->size; i++) { -- cgit v1.2.3