summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Maniero <carlos@maniero.me>2023-05-11 15:00:10 -0300
committerCarlos Maniero <carlos@maniero.me>2023-05-11 15:29:57 -0300
commitea7f65fe1250be8f49edcaaedd3410aed1401648 (patch)
treeffe7d6ec48bc6018b5eea7e9ef573cd1bbd57c2f
parent5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 (diff)
gas: implement recursion and late evaluation
Until now the below code was not valid for pipac. fn main(): i32 { return fib(13); } fn fib(n: i32): i32 { if n <= 1 { return n; } return fib(n - 1) + fib(n - 2); } Pipa's parser was adding a function to scope after they were fully parsed which means that fib's self-reference would not work. Also, functions were required to follow the be called in the order they are declared for the same scope reason so, the main function was required to be defined after fib. And how it is working now? When a TOKEN_NAME is not found in the scope, instead of returning an error, an unknown token is created as placeholder. The parser stores the node reference and the token it was trying to parse. During type checks, if the parser detects an unknown node, instead of returning an error, it stores in that node what was the expected type. After the NS is fully parsed a reevaluation is made on those unknown nodes by setting the lexer back on the node's token position and parsing the TOKEN_NAME again. Ps: There is a typo on the unknown token. It will be addressed in another commit since this issue was not introduced by this change. Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r--examples/fibbonacci.pipa10
-rw-r--r--src/ast.h12
-rw-r--r--src/gas_assembly_generator.c5
-rw-r--r--src/lexer.c8
-rw-r--r--src/lexer.h3
-rw-r--r--src/parser.c199
-rw-r--r--src/parser.h7
-rw-r--r--test/integration_test.c1
-rw-r--r--test/parser_test.c2
9 files changed, 198 insertions, 49 deletions
diff --git a/examples/fibbonacci.pipa b/examples/fibbonacci.pipa
new file mode 100644
index 0000000..c19e65d
--- /dev/null
+++ b/examples/fibbonacci.pipa
@@ -0,0 +1,10 @@
+fn main(): i32 {
+ return fib(13);
+}
+
+fn fib(n: i32): i32 {
+ if n <= 1 {
+ return n;
+ }
+ return fib(n - 1) + fib(n - 2);
+}
diff --git a/src/ast.h b/src/ast.h
index 9e0e514..42f02ac 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -16,6 +16,7 @@
*/
#ifndef AST_H
#define AST_H
+#include "lexer.h"
#include "string_view.h"
#include "vector.h"
#include <stdint.h>
@@ -140,6 +141,12 @@ typedef struct ast_if_stmt_t
ast_node_t *body;
} ast_if_stmt_t;
+typedef struct ast_unkown_token_t
+{
+ type_t expected_type;
+ token_t reference_token;
+} ast_unkown_token_t;
+
typedef enum
{
AST_NAMESPACE,
@@ -151,10 +158,10 @@ typedef enum
AST_LITERAL,
AST_RETURN_STMT,
AST_IF_STMT,
- AST_UNKOWN_NODE,
AST_VARIABLE_DECLARATION,
AST_VARIABLE_ASSIGNMENT,
- AST_VARIABLE
+ AST_VARIABLE,
+ AST_UNKOWN_NODE
} ast_node_kind_t;
typedef union
@@ -171,6 +178,7 @@ typedef union
ast_variable_declaration_t variable_declaration;
ast_variable_assignment_t variable_assignment;
ast_variable_t variable;
+ ast_unkown_token_t unknown;
} ast_node_data_t;
typedef struct ast_node_t
diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c
index 26bf3f9..7641147 100644
--- a/src/gas_assembly_generator.c
+++ b/src/gas_assembly_generator.c
@@ -214,6 +214,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct
fprintf(gen->stream, SVFMT ":\n", SVARG(&func->prototype.identifier.name));
fprintf(gen->stream, " push %%rbp\n");
fprintf(gen->stream, " mov %%rsp, %%rbp\n");
+ fprintf(gen->stream, " subq $16, %%rsp\n");
for (size_t i = 0; i < func->prototype.parameters->size; i++) {
char *reg = calling_registers[i];
@@ -232,7 +233,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct
gas_assembly_generator_compile(gen, func->body);
fprintf(gen->stream, ".L%ld:\n", return_label_index);
- fprintf(gen->stream, " pop %%rbp\n");
+ fprintf(gen->stream, " leave\n");
fprintf(gen->stream, " ret\n");
gen->return_label_index = previous_index;
@@ -515,7 +516,7 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar
binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL ||
binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT ||
binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) {
- fprintf(gen->stream, " cmp %%rcx, %%rax\n");
+ fprintf(gen->stream, " cmpq %%rcx, %%rax\n");
return;
}
diff --git a/src/lexer.c b/src/lexer.c
index 7c668d3..c00c944 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -349,6 +349,14 @@ lexer_load_file_contents(lexer_t *lexer)
}
void
+lexer_step_back_to(lexer_t *lexer, token_t *token)
+{
+ lexer->cur = token->bol + token->col;
+ lexer->row = token->row;
+ lexer->bol = token->bol;
+}
+
+void
lexer_lookahead(lexer_t *lexer, token_t *token, size_t level)
{
uint32_t cur = lexer->cur;
diff --git a/src/lexer.h b/src/lexer.h
index 46912dc..c544a15 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -115,6 +115,9 @@ void
lexer_peek_next_token(lexer_t *lexer, token_t *token);
void
+lexer_step_back_to(lexer_t *lexer, token_t *token);
+
+void
lexer_lookahead(lexer_t *lexer, token_t *token, size_t level);
char *
diff --git a/src/parser.c b/src/parser.c
index fe2a2f0..8180351 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -16,6 +16,7 @@
*/
#include <assert.h>
+#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type);
static bool
is_next_token_cparen(parser_t *parser);
+static bool
+is_next_token_oparen(parser_t *parser);
+
+static bool
+is_next_token_eof(parser_t *parser);
+
void
parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
{
@@ -43,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
assert(lexer && "lexer must be defined");
parser->lexer = lexer;
parser->scope = scope;
+ // FIXME: change parser_init to parser_new and create a destruction function
+ // for it
+ parser->to_be_reevaluated = vector_new();
parser->errors_len = 0;
}
static void
+parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token)
+{
+ reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t));
+
+ if (reevaluation_node == NULL) {
+ fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ *reevaluation_node = (reevaluation_node_t){ .node = node, token = token };
+
+ vector_push_back(parser->to_be_reevaluated, reevaluation_node);
+}
+
+static void
parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected)
{
parser_error_t *error = &parser->errors[parser->errors_len++];
@@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser);
static ast_node_t *
parser_parse_factor(parser_t *parser);
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token);
+
/**
*
* <expression> ::= <comparison1>
@@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser)
return expression;
}
- case TOKEN_NAME: {
- ast_node_t *referenced_node = scope_get(parser->scope, token.value);
-
- if (referenced_node == NULL) {
- parser_error_t error;
- error.token = token;
- sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value));
- parser->errors[parser->errors_len++] = error;
- return NULL;
- }
+ case TOKEN_NAME:
+ return parser_parse_name_factor(parser, &token);
+ default: {
+ parser_error_t error;
+ error.token = token;
+ sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value));
+ parser->errors[parser->errors_len++] = error;
+ return NULL;
+ }
+ }
+}
- if (referenced_node->kind == AST_VARIABLE_DECLARATION) {
- return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier,
- referenced_node->data.variable_declaration.type);
- }
+static void
+parser_drop_until_close_paren(parser_t *parser)
+{
+ if (!is_next_token_oparen(parser)) {
+ return;
+ }
- // Function parameters are also variables.
- if (referenced_node->kind == AST_FUNCTION_PARAMETER) {
- return ast_node_new_variable(&referenced_node->data.function_parameter.identifier,
- referenced_node->data.function_parameter.type);
- }
+ int open_parens = 0;
+ do {
+ if (is_next_token_oparen(parser)) {
+ open_parens++;
+ } else if (is_next_token_cparen(parser)) {
+ open_parens--;
+ }
+ lexer_drop_next_token(parser->lexer);
+ } while (open_parens != 0 && !is_next_token_eof(parser));
+}
- // Parse function parameters
- if (!drop_expected_token(parser, TOKEN_OPAREN)) {
- return NULL;
- }
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token)
+{
+ ast_node_t *referenced_node = scope_get(parser->scope, token->value);
- vector_t *arguments = vector_new();
+ if (referenced_node == NULL) {
+ ast_node_t *node = ast_node_new();
- while (!is_next_token_cparen(parser)) {
- ast_node_t *argument = parser_parse_expression(parser);
+ parser_to_be_reevaluated_push(parser, node, *token);
- if (argument == NULL) {
- ast_node_destroy_vector(arguments);
- return NULL;
- }
+ parser_drop_until_close_paren(parser);
+ return node;
+ }
- vector_push_back(arguments, argument);
+ if (referenced_node->kind == AST_VARIABLE_DECLARATION) {
+ return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier,
+ referenced_node->data.variable_declaration.type);
+ }
- if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
- ast_node_destroy_vector(arguments);
- return NULL;
- }
- }
+ // Function parameters are also variables.
+ if (referenced_node->kind == AST_FUNCTION_PARAMETER) {
+ return ast_node_new_variable(&referenced_node->data.function_parameter.identifier,
+ referenced_node->data.function_parameter.type);
+ }
- drop_expected_token(parser, TOKEN_CPAREN);
+ // Parse function parameters
+ if (!drop_expected_token(parser, TOKEN_OPAREN)) {
+ return NULL;
+ }
- return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node),
- referenced_node->data.function.prototype.return_type,
- arguments);
+ vector_t *arguments = vector_new();
+
+ while (!is_next_token_cparen(parser)) {
+ ast_node_t *argument = parser_parse_expression(parser);
+
+ if (argument == NULL) {
+ ast_node_destroy_vector(arguments);
+ return NULL;
}
- default: {
- parser_error_t error;
- error.token = token;
- sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value));
- parser->errors[parser->errors_len++] = error;
+
+ vector_push_back(arguments, argument);
+
+ if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
+ ast_node_destroy_vector(arguments);
return NULL;
}
}
+
+ drop_expected_token(parser, TOKEN_CPAREN);
+
+ return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node),
+ referenced_node->data.function.prototype.return_type,
+ arguments);
}
static ast_node_t *
@@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser)
static bool
parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token)
{
+ if (expression->kind == AST_UNKOWN_NODE) {
+ expression->data.unknown.expected_type = type;
+ expression->data.unknown.reference_token = token;
+ return true;
+ }
if (expression->result_type != type) {
parser_error_t error;
error.token = token;
@@ -626,6 +683,14 @@ is_next_token_cparen(parser_t *parser)
}
static bool
+is_next_token_oparen(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_OPAREN;
+}
+
+static bool
is_next_token_eof(parser_t *parser)
{
token_t token;
@@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser)
return node;
}
+static bool
+parser_reevaluate_nodes(parser_t *parser)
+{
+ for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) {
+ reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i);
+
+ lexer_step_back_to(parser->lexer, &reevaluation->token);
+ lexer_drop_next_token(parser->lexer);
+
+ ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token);
+
+ if (node == NULL) {
+ return false;
+ }
+
+ if (node->kind == AST_UNKOWN_NODE) {
+ parser_error_t error;
+ error.token = reevaluation->token;
+ sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value));
+ parser->errors[parser->errors_len++] = error;
+ ast_node_destroy(node);
+ return false;
+ }
+
+ if (!parser_expression_matches_the_expected_type(parser,
+ node,
+ reevaluation->node->data.unknown.expected_type,
+ reevaluation->node->data.unknown.reference_token)) {
+ ast_node_destroy(node);
+ return false;
+ }
+
+ *reevaluation->node = *node;
+
+ free(node);
+ }
+ return true;
+}
+
ast_node_t *
parser_parse_ns(parser_t *parser)
{
@@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser)
return NULL;
}
+ if (!parser_reevaluate_nodes(parser)) {
+ ast_node_destroy_vector(nodes);
+ return NULL;
+ }
+
return ast_node_new_namespace(nodes);
}
diff --git a/src/parser.h b/src/parser.h
index 7bebee7..cfeb1f0 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -28,10 +28,17 @@ typedef struct parser_error_t
char message[256];
} parser_error_t;
+typedef struct reevaluation_node_t
+{
+ ast_node_t *node;
+ token_t token;
+} reevaluation_node_t;
+
typedef struct parser_t
{
lexer_t *lexer;
scope_t *scope;
+ vector_t *to_be_reevaluated;
int errors_len;
// FIXME: replace with vector
parser_error_t errors[1];
diff --git a/test/integration_test.c b/test/integration_test.c
index 608764a..8866f83 100644
--- a/test/integration_test.c
+++ b/test/integration_test.c
@@ -45,6 +45,7 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture)
assert_exit_status("../examples/variables.pipa", 28);
assert_exit_status("../examples/if.pipa", 42);
assert_exit_status("../examples/function_call.pipa", 42);
+ assert_exit_status("../examples/fibbonacci.pipa", 233);
return MUNIT_OK;
}
diff --git a/test/parser_test.c b/test/parser_test.c
index 44ffd57..acc238f 100644
--- a/test/parser_test.c
+++ b/test/parser_test.c
@@ -430,6 +430,8 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or
assert_parser_error("fn main(): i32 { return fn_404(); }", "identifier 'fn_404' not defined");
assert_parser_error("fn my_bool_fn(): bool { return true; } fn main(): i32 { return my_bool_fn(); }",
"incompatible types: expected 'i32', actual 'bool'.");
+ assert_parser_error("fn main(): i32 { return my_bool_fn(); } fn my_bool_fn(): bool { return true; }",
+ "incompatible types: expected 'i32', actual 'bool'.");
assert_parser_error("fn main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'");
return MUNIT_OK;