diff options
author | Carlos Maniero <carlos@maniero.me> | 2023-05-11 15:00:10 -0300 |
---|---|---|
committer | Carlos Maniero <carlos@maniero.me> | 2023-05-11 15:29:57 -0300 |
commit | ea7f65fe1250be8f49edcaaedd3410aed1401648 (patch) | |
tree | ffe7d6ec48bc6018b5eea7e9ef573cd1bbd57c2f /src/parser.c | |
parent | 5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 (diff) |
gas: implement recursion and late evaluation
Until now the below code was not valid for pipac.
fn main(): i32 {
return fib(13);
}
fn fib(n: i32): i32 {
if n <= 1 {
return n;
}
return fib(n - 1) + fib(n - 2);
}
Pipa's parser was adding a function to scope after they were fully
parsed which means that fib's self-reference would not work.
Also, functions were required to follow the be called in the order they
are declared for the same scope reason so, the main function was
required to be defined after fib.
And how it is working now?
When a TOKEN_NAME is not found in the scope, instead of returning an
error, an unknown token is created as placeholder. The parser stores the
node reference and the token it was trying to parse.
During type checks, if the parser detects an unknown node, instead of
returning an error, it stores in that node what was the expected type.
After the NS is fully parsed a reevaluation is made on those unknown
nodes by setting the lexer back on the node's token position and parsing
the TOKEN_NAME again.
Ps: There is a typo on the unknown token. It will be addressed in
another commit since this issue was not introduced by this change.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
Diffstat (limited to 'src/parser.c')
-rw-r--r-- | src/parser.c | 199 |
1 files changed, 154 insertions, 45 deletions
diff --git a/src/parser.c b/src/parser.c index fe2a2f0..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include <assert.h> +#include <errno.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type); static bool is_next_token_cparen(parser_t *parser); +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -43,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + +static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; @@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * * <expression> ::= <comparison1> @@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - ast_node_t *referenced_node = scope_get(parser->scope, token.value); - - if (referenced_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); + default: { + parser_error_t error; + error.token = token; + sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); + parser->errors[parser->errors_len++] = error; + return NULL; + } + } +} - if (referenced_node->kind == AST_VARIABLE_DECLARATION) { - return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, - referenced_node->data.variable_declaration.type); - } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } - // Function parameters are also variables. - if (referenced_node->kind == AST_FUNCTION_PARAMETER) { - return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, - referenced_node->data.function_parameter.type); - } + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} - // Parse function parameters - if (!drop_expected_token(parser, TOKEN_OPAREN)) { - return NULL; - } +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); - vector_t *arguments = vector_new(); + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); - while (!is_next_token_cparen(parser)) { - ast_node_t *argument = parser_parse_expression(parser); + parser_to_be_reevaluated_push(parser, node, *token); - if (argument == NULL) { - ast_node_destroy_vector(arguments); - return NULL; - } + parser_drop_until_close_paren(parser); + return node; + } - vector_push_back(arguments, argument); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } - if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { - ast_node_destroy_vector(arguments); - return NULL; - } - } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } - drop_expected_token(parser, TOKEN_CPAREN); + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } - return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), - referenced_node->data.function.prototype.return_type, - arguments); + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; } - default: { - parser_error_t error; - error.token = token; - sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); return NULL; } } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } static ast_node_t * @@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -626,6 +683,14 @@ is_next_token_cparen(parser_t *parser) } static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + +static bool is_next_token_eof(parser_t *parser) { token_t token; @@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser) return node; } +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + ast_node_t * parser_parse_ns(parser_t *parser) { @@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser) return NULL; } + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + return ast_node_new_namespace(nodes); } |