diff options
author | Carlos Maniero <carlos@maniero.me> | 2023-05-11 15:00:10 -0300 |
---|---|---|
committer | Carlos Maniero <carlos@maniero.me> | 2023-05-11 15:29:57 -0300 |
commit | ea7f65fe1250be8f49edcaaedd3410aed1401648 (patch) | |
tree | ffe7d6ec48bc6018b5eea7e9ef573cd1bbd57c2f | |
parent | 5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 (diff) |
gas: implement recursion and late evaluation
Until now the below code was not valid for pipac.
fn main(): i32 {
return fib(13);
}
fn fib(n: i32): i32 {
if n <= 1 {
return n;
}
return fib(n - 1) + fib(n - 2);
}
Pipa's parser was adding a function to scope after they were fully
parsed which means that fib's self-reference would not work.
Also, functions were required to follow the be called in the order they
are declared for the same scope reason so, the main function was
required to be defined after fib.
And how it is working now?
When a TOKEN_NAME is not found in the scope, instead of returning an
error, an unknown token is created as placeholder. The parser stores the
node reference and the token it was trying to parse.
During type checks, if the parser detects an unknown node, instead of
returning an error, it stores in that node what was the expected type.
After the NS is fully parsed a reevaluation is made on those unknown
nodes by setting the lexer back on the node's token position and parsing
the TOKEN_NAME again.
Ps: There is a typo on the unknown token. It will be addressed in
another commit since this issue was not introduced by this change.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r-- | examples/fibbonacci.pipa | 10 | ||||
-rw-r--r-- | src/ast.h | 12 | ||||
-rw-r--r-- | src/gas_assembly_generator.c | 5 | ||||
-rw-r--r-- | src/lexer.c | 8 | ||||
-rw-r--r-- | src/lexer.h | 3 | ||||
-rw-r--r-- | src/parser.c | 199 | ||||
-rw-r--r-- | src/parser.h | 7 | ||||
-rw-r--r-- | test/integration_test.c | 1 | ||||
-rw-r--r-- | test/parser_test.c | 2 |
9 files changed, 198 insertions, 49 deletions
diff --git a/examples/fibbonacci.pipa b/examples/fibbonacci.pipa new file mode 100644 index 0000000..c19e65d --- /dev/null +++ b/examples/fibbonacci.pipa @@ -0,0 +1,10 @@ +fn main(): i32 { + return fib(13); +} + +fn fib(n: i32): i32 { + if n <= 1 { + return n; + } + return fib(n - 1) + fib(n - 2); +} @@ -16,6 +16,7 @@ */ #ifndef AST_H #define AST_H +#include "lexer.h" #include "string_view.h" #include "vector.h" #include <stdint.h> @@ -140,6 +141,12 @@ typedef struct ast_if_stmt_t ast_node_t *body; } ast_if_stmt_t; +typedef struct ast_unkown_token_t +{ + type_t expected_type; + token_t reference_token; +} ast_unkown_token_t; + typedef enum { AST_NAMESPACE, @@ -151,10 +158,10 @@ typedef enum AST_LITERAL, AST_RETURN_STMT, AST_IF_STMT, - AST_UNKOWN_NODE, AST_VARIABLE_DECLARATION, AST_VARIABLE_ASSIGNMENT, - AST_VARIABLE + AST_VARIABLE, + AST_UNKOWN_NODE } ast_node_kind_t; typedef union @@ -171,6 +178,7 @@ typedef union ast_variable_declaration_t variable_declaration; ast_variable_assignment_t variable_assignment; ast_variable_t variable; + ast_unkown_token_t unknown; } ast_node_data_t; typedef struct ast_node_t diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 26bf3f9..7641147 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -214,6 +214,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct fprintf(gen->stream, SVFMT ":\n", SVARG(&func->prototype.identifier.name)); fprintf(gen->stream, " push %%rbp\n"); fprintf(gen->stream, " mov %%rsp, %%rbp\n"); + fprintf(gen->stream, " subq $16, %%rsp\n"); for (size_t i = 0; i < func->prototype.parameters->size; i++) { char *reg = calling_registers[i]; @@ -232,7 +233,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct gas_assembly_generator_compile(gen, func->body); fprintf(gen->stream, ".L%ld:\n", return_label_index); - fprintf(gen->stream, " pop %%rbp\n"); + fprintf(gen->stream, " leave\n"); fprintf(gen->stream, " ret\n"); gen->return_label_index = previous_index; @@ -515,7 +516,7 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL || binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) { - fprintf(gen->stream, " cmp %%rcx, %%rax\n"); + fprintf(gen->stream, " cmpq %%rcx, %%rax\n"); return; } diff --git a/src/lexer.c b/src/lexer.c index 7c668d3..c00c944 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -349,6 +349,14 @@ lexer_load_file_contents(lexer_t *lexer) } void +lexer_step_back_to(lexer_t *lexer, token_t *token) +{ + lexer->cur = token->bol + token->col; + lexer->row = token->row; + lexer->bol = token->bol; +} + +void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level) { uint32_t cur = lexer->cur; diff --git a/src/lexer.h b/src/lexer.h index 46912dc..c544a15 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -115,6 +115,9 @@ void lexer_peek_next_token(lexer_t *lexer, token_t *token); void +lexer_step_back_to(lexer_t *lexer, token_t *token); + +void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level); char * diff --git a/src/parser.c b/src/parser.c index fe2a2f0..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include <assert.h> +#include <errno.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type); static bool is_next_token_cparen(parser_t *parser); +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -43,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + +static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; @@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * * <expression> ::= <comparison1> @@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - ast_node_t *referenced_node = scope_get(parser->scope, token.value); - - if (referenced_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); + default: { + parser_error_t error; + error.token = token; + sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); + parser->errors[parser->errors_len++] = error; + return NULL; + } + } +} - if (referenced_node->kind == AST_VARIABLE_DECLARATION) { - return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, - referenced_node->data.variable_declaration.type); - } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } - // Function parameters are also variables. - if (referenced_node->kind == AST_FUNCTION_PARAMETER) { - return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, - referenced_node->data.function_parameter.type); - } + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} - // Parse function parameters - if (!drop_expected_token(parser, TOKEN_OPAREN)) { - return NULL; - } +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); - vector_t *arguments = vector_new(); + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); - while (!is_next_token_cparen(parser)) { - ast_node_t *argument = parser_parse_expression(parser); + parser_to_be_reevaluated_push(parser, node, *token); - if (argument == NULL) { - ast_node_destroy_vector(arguments); - return NULL; - } + parser_drop_until_close_paren(parser); + return node; + } - vector_push_back(arguments, argument); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } - if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { - ast_node_destroy_vector(arguments); - return NULL; - } - } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } - drop_expected_token(parser, TOKEN_CPAREN); + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } - return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), - referenced_node->data.function.prototype.return_type, - arguments); + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; } - default: { - parser_error_t error; - error.token = token; - sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); return NULL; } } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } static ast_node_t * @@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -626,6 +683,14 @@ is_next_token_cparen(parser_t *parser) } static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + +static bool is_next_token_eof(parser_t *parser) { token_t token; @@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser) return node; } +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + ast_node_t * parser_parse_ns(parser_t *parser) { @@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser) return NULL; } + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + return ast_node_new_namespace(nodes); } diff --git a/src/parser.h b/src/parser.h index 7bebee7..cfeb1f0 100644 --- a/src/parser.h +++ b/src/parser.h @@ -28,10 +28,17 @@ typedef struct parser_error_t char message[256]; } parser_error_t; +typedef struct reevaluation_node_t +{ + ast_node_t *node; + token_t token; +} reevaluation_node_t; + typedef struct parser_t { lexer_t *lexer; scope_t *scope; + vector_t *to_be_reevaluated; int errors_len; // FIXME: replace with vector parser_error_t errors[1]; diff --git a/test/integration_test.c b/test/integration_test.c index 608764a..8866f83 100644 --- a/test/integration_test.c +++ b/test/integration_test.c @@ -45,6 +45,7 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture) assert_exit_status("../examples/variables.pipa", 28); assert_exit_status("../examples/if.pipa", 42); assert_exit_status("../examples/function_call.pipa", 42); + assert_exit_status("../examples/fibbonacci.pipa", 233); return MUNIT_OK; } diff --git a/test/parser_test.c b/test/parser_test.c index 44ffd57..acc238f 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -430,6 +430,8 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or assert_parser_error("fn main(): i32 { return fn_404(); }", "identifier 'fn_404' not defined"); assert_parser_error("fn my_bool_fn(): bool { return true; } fn main(): i32 { return my_bool_fn(); }", "incompatible types: expected 'i32', actual 'bool'."); + assert_parser_error("fn main(): i32 { return my_bool_fn(); } fn my_bool_fn(): bool { return true; }", + "incompatible types: expected 'i32', actual 'bool'."); assert_parser_error("fn main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'"); return MUNIT_OK; |