summaryrefslogtreecommitdiff
path: root/src/parser.c
diff options
context:
space:
mode:
authorCarlos Maniero <carlos@maniero.me>2023-05-11 15:00:10 -0300
committerCarlos Maniero <carlos@maniero.me>2023-05-11 15:29:57 -0300
commitea7f65fe1250be8f49edcaaedd3410aed1401648 (patch)
treeffe7d6ec48bc6018b5eea7e9ef573cd1bbd57c2f /src/parser.c
parent5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 (diff)
gas: implement recursion and late evaluation
Until now the below code was not valid for pipac. fn main(): i32 { return fib(13); } fn fib(n: i32): i32 { if n <= 1 { return n; } return fib(n - 1) + fib(n - 2); } Pipa's parser was adding a function to scope after they were fully parsed which means that fib's self-reference would not work. Also, functions were required to follow the be called in the order they are declared for the same scope reason so, the main function was required to be defined after fib. And how it is working now? When a TOKEN_NAME is not found in the scope, instead of returning an error, an unknown token is created as placeholder. The parser stores the node reference and the token it was trying to parse. During type checks, if the parser detects an unknown node, instead of returning an error, it stores in that node what was the expected type. After the NS is fully parsed a reevaluation is made on those unknown nodes by setting the lexer back on the node's token position and parsing the TOKEN_NAME again. Ps: There is a typo on the unknown token. It will be addressed in another commit since this issue was not introduced by this change. Signed-off-by: Carlos Maniero <carlos@maniero.me>
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c199
1 files changed, 154 insertions, 45 deletions
diff --git a/src/parser.c b/src/parser.c
index fe2a2f0..8180351 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -16,6 +16,7 @@
*/
#include <assert.h>
+#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type);
static bool
is_next_token_cparen(parser_t *parser);
+static bool
+is_next_token_oparen(parser_t *parser);
+
+static bool
+is_next_token_eof(parser_t *parser);
+
void
parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
{
@@ -43,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
assert(lexer && "lexer must be defined");
parser->lexer = lexer;
parser->scope = scope;
+ // FIXME: change parser_init to parser_new and create a destruction function
+ // for it
+ parser->to_be_reevaluated = vector_new();
parser->errors_len = 0;
}
static void
+parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token)
+{
+ reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t));
+
+ if (reevaluation_node == NULL) {
+ fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ *reevaluation_node = (reevaluation_node_t){ .node = node, token = token };
+
+ vector_push_back(parser->to_be_reevaluated, reevaluation_node);
+}
+
+static void
parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected)
{
parser_error_t *error = &parser->errors[parser->errors_len++];
@@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser);
static ast_node_t *
parser_parse_factor(parser_t *parser);
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token);
+
/**
*
* <expression> ::= <comparison1>
@@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser)
return expression;
}
- case TOKEN_NAME: {
- ast_node_t *referenced_node = scope_get(parser->scope, token.value);
-
- if (referenced_node == NULL) {
- parser_error_t error;
- error.token = token;
- sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value));
- parser->errors[parser->errors_len++] = error;
- return NULL;
- }
+ case TOKEN_NAME:
+ return parser_parse_name_factor(parser, &token);
+ default: {
+ parser_error_t error;
+ error.token = token;
+ sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value));
+ parser->errors[parser->errors_len++] = error;
+ return NULL;
+ }
+ }
+}
- if (referenced_node->kind == AST_VARIABLE_DECLARATION) {
- return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier,
- referenced_node->data.variable_declaration.type);
- }
+static void
+parser_drop_until_close_paren(parser_t *parser)
+{
+ if (!is_next_token_oparen(parser)) {
+ return;
+ }
- // Function parameters are also variables.
- if (referenced_node->kind == AST_FUNCTION_PARAMETER) {
- return ast_node_new_variable(&referenced_node->data.function_parameter.identifier,
- referenced_node->data.function_parameter.type);
- }
+ int open_parens = 0;
+ do {
+ if (is_next_token_oparen(parser)) {
+ open_parens++;
+ } else if (is_next_token_cparen(parser)) {
+ open_parens--;
+ }
+ lexer_drop_next_token(parser->lexer);
+ } while (open_parens != 0 && !is_next_token_eof(parser));
+}
- // Parse function parameters
- if (!drop_expected_token(parser, TOKEN_OPAREN)) {
- return NULL;
- }
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token)
+{
+ ast_node_t *referenced_node = scope_get(parser->scope, token->value);
- vector_t *arguments = vector_new();
+ if (referenced_node == NULL) {
+ ast_node_t *node = ast_node_new();
- while (!is_next_token_cparen(parser)) {
- ast_node_t *argument = parser_parse_expression(parser);
+ parser_to_be_reevaluated_push(parser, node, *token);
- if (argument == NULL) {
- ast_node_destroy_vector(arguments);
- return NULL;
- }
+ parser_drop_until_close_paren(parser);
+ return node;
+ }
- vector_push_back(arguments, argument);
+ if (referenced_node->kind == AST_VARIABLE_DECLARATION) {
+ return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier,
+ referenced_node->data.variable_declaration.type);
+ }
- if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
- ast_node_destroy_vector(arguments);
- return NULL;
- }
- }
+ // Function parameters are also variables.
+ if (referenced_node->kind == AST_FUNCTION_PARAMETER) {
+ return ast_node_new_variable(&referenced_node->data.function_parameter.identifier,
+ referenced_node->data.function_parameter.type);
+ }
- drop_expected_token(parser, TOKEN_CPAREN);
+ // Parse function parameters
+ if (!drop_expected_token(parser, TOKEN_OPAREN)) {
+ return NULL;
+ }
- return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node),
- referenced_node->data.function.prototype.return_type,
- arguments);
+ vector_t *arguments = vector_new();
+
+ while (!is_next_token_cparen(parser)) {
+ ast_node_t *argument = parser_parse_expression(parser);
+
+ if (argument == NULL) {
+ ast_node_destroy_vector(arguments);
+ return NULL;
}
- default: {
- parser_error_t error;
- error.token = token;
- sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value));
- parser->errors[parser->errors_len++] = error;
+
+ vector_push_back(arguments, argument);
+
+ if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
+ ast_node_destroy_vector(arguments);
return NULL;
}
}
+
+ drop_expected_token(parser, TOKEN_CPAREN);
+
+ return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node),
+ referenced_node->data.function.prototype.return_type,
+ arguments);
}
static ast_node_t *
@@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser)
static bool
parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token)
{
+ if (expression->kind == AST_UNKOWN_NODE) {
+ expression->data.unknown.expected_type = type;
+ expression->data.unknown.reference_token = token;
+ return true;
+ }
if (expression->result_type != type) {
parser_error_t error;
error.token = token;
@@ -626,6 +683,14 @@ is_next_token_cparen(parser_t *parser)
}
static bool
+is_next_token_oparen(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_OPAREN;
+}
+
+static bool
is_next_token_eof(parser_t *parser)
{
token_t token;
@@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser)
return node;
}
+static bool
+parser_reevaluate_nodes(parser_t *parser)
+{
+ for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) {
+ reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i);
+
+ lexer_step_back_to(parser->lexer, &reevaluation->token);
+ lexer_drop_next_token(parser->lexer);
+
+ ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token);
+
+ if (node == NULL) {
+ return false;
+ }
+
+ if (node->kind == AST_UNKOWN_NODE) {
+ parser_error_t error;
+ error.token = reevaluation->token;
+ sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value));
+ parser->errors[parser->errors_len++] = error;
+ ast_node_destroy(node);
+ return false;
+ }
+
+ if (!parser_expression_matches_the_expected_type(parser,
+ node,
+ reevaluation->node->data.unknown.expected_type,
+ reevaluation->node->data.unknown.reference_token)) {
+ ast_node_destroy(node);
+ return false;
+ }
+
+ *reevaluation->node = *node;
+
+ free(node);
+ }
+ return true;
+}
+
ast_node_t *
parser_parse_ns(parser_t *parser)
{
@@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser)
return NULL;
}
+ if (!parser_reevaluate_nodes(parser)) {
+ ast_node_destroy_vector(nodes);
+ return NULL;
+ }
+
return ast_node_new_namespace(nodes);
}