summaryrefslogtreecommitdiff
path: root/src/parser.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c333
1 files changed, 305 insertions, 28 deletions
diff --git a/src/parser.c b/src/parser.c
index 9167f1e..8180351 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -16,6 +16,7 @@
*/
#include <assert.h>
+#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
@@ -33,6 +34,15 @@ parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expres
static ast_node_t *
parser_parse_block_declarations(parser_t *parser, type_t result_type);
+static bool
+is_next_token_cparen(parser_t *parser);
+
+static bool
+is_next_token_oparen(parser_t *parser);
+
+static bool
+is_next_token_eof(parser_t *parser);
+
void
parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
{
@@ -40,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
assert(lexer && "lexer must be defined");
parser->lexer = lexer;
parser->scope = scope;
+ // FIXME: change parser_init to parser_new and create a destruction function
+ // for it
+ parser->to_be_reevaluated = vector_new();
parser->errors_len = 0;
}
static void
+parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token)
+{
+ reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t));
+
+ if (reevaluation_node == NULL) {
+ fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ *reevaluation_node = (reevaluation_node_t){ .node = node, token = token };
+
+ vector_push_back(parser->to_be_reevaluated, reevaluation_node);
+}
+
+static void
parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected)
{
parser_error_t *error = &parser->errors[parser->errors_len++];
@@ -156,7 +184,10 @@ ast_node_t *
parser_parse_expression(parser_t *parser);
static ast_node_t *
-parser_parse_comparison(parser_t *parser);
+parser_parse_comparison1(parser_t *parser);
+
+static ast_node_t *
+parser_parse_comparison2(parser_t *parser);
static ast_node_t *
parser_parse_arithmetic1(parser_t *parser);
@@ -167,10 +198,14 @@ parser_parse_arithmetic2(parser_t *parser);
static ast_node_t *
parser_parse_factor(parser_t *parser);
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token);
+
/**
*
- * <expression> ::= <comparison>
- * <comparison> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)*
+ * <expression> ::= <comparison1>
+ * <comparison1> ::= <comparison2> (('>' | '>=' | '=' | ...) comparison2)*
+ * <comparison2> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)*
* <arithmetic1> ::= <arithmetic2> (('+' | '-') arithmetic2)*
* <arithmetic2> ::= <factor> (('*' | '/') factor)*
* <factor> ::= <integer> | '(' <expression> ')'
@@ -179,11 +214,42 @@ parser_parse_factor(parser_t *parser);
ast_node_t *
parser_parse_expression(parser_t *parser)
{
- return parser_parse_comparison(parser);
+ return parser_parse_comparison1(parser);
+}
+
+static ast_node_t *
+parser_parse_comparison1(parser_t *parser)
+{
+ ast_node_t *node = parser_parse_comparison2(parser);
+
+ if (node == NULL) {
+ return NULL;
+ }
+
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+
+ while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) {
+ lexer_drop_next_token(parser->lexer);
+
+ ast_node_t *left = node;
+ ast_node_t *right = parser_parse_comparison2(parser);
+
+ if (right == NULL) {
+ ast_node_destroy(node);
+ return NULL;
+ }
+
+ node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL);
+
+ lexer_peek_next_token(parser->lexer, &token);
+ }
+
+ return node;
}
static ast_node_t *
-parser_parse_comparison(parser_t *parser)
+parser_parse_comparison2(parser_t *parser)
{
ast_node_t *node = parser_parse_arithmetic1(parser);
@@ -195,8 +261,7 @@ parser_parse_comparison(parser_t *parser)
lexer_peek_next_token(parser->lexer, &token);
while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL ||
- token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL ||
- token.kind == TOKEN_AND || token.kind == TOKEN_OR) {
+ token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) {
lexer_drop_next_token(parser->lexer);
ast_node_t *left = node;
@@ -304,21 +369,8 @@ parser_parse_factor(parser_t *parser)
return expression;
}
- case TOKEN_NAME: {
- // TODO: Check node kind, today accepts only variables
- ast_node_t *var_node = scope_get(parser->scope, token.value);
-
- if (var_node == NULL) {
- parser_error_t error;
- error.token = token;
- sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value));
- parser->errors[parser->errors_len++] = error;
- return NULL;
- }
-
- return ast_node_new_variable(&var_node->data.variable_declaration.identifier,
- var_node->data.variable_declaration.type);
- }
+ case TOKEN_NAME:
+ return parser_parse_name_factor(parser, &token);
default: {
parser_error_t error;
error.token = token;
@@ -329,6 +381,79 @@ parser_parse_factor(parser_t *parser)
}
}
+static void
+parser_drop_until_close_paren(parser_t *parser)
+{
+ if (!is_next_token_oparen(parser)) {
+ return;
+ }
+
+ int open_parens = 0;
+ do {
+ if (is_next_token_oparen(parser)) {
+ open_parens++;
+ } else if (is_next_token_cparen(parser)) {
+ open_parens--;
+ }
+ lexer_drop_next_token(parser->lexer);
+ } while (open_parens != 0 && !is_next_token_eof(parser));
+}
+
+static ast_node_t *
+parser_parse_name_factor(parser_t *parser, token_t *token)
+{
+ ast_node_t *referenced_node = scope_get(parser->scope, token->value);
+
+ if (referenced_node == NULL) {
+ ast_node_t *node = ast_node_new();
+
+ parser_to_be_reevaluated_push(parser, node, *token);
+
+ parser_drop_until_close_paren(parser);
+ return node;
+ }
+
+ if (referenced_node->kind == AST_VARIABLE_DECLARATION) {
+ return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier,
+ referenced_node->data.variable_declaration.type);
+ }
+
+ // Function parameters are also variables.
+ if (referenced_node->kind == AST_FUNCTION_PARAMETER) {
+ return ast_node_new_variable(&referenced_node->data.function_parameter.identifier,
+ referenced_node->data.function_parameter.type);
+ }
+
+ // Parse function parameters
+ if (!drop_expected_token(parser, TOKEN_OPAREN)) {
+ return NULL;
+ }
+
+ vector_t *arguments = vector_new();
+
+ while (!is_next_token_cparen(parser)) {
+ ast_node_t *argument = parser_parse_expression(parser);
+
+ if (argument == NULL) {
+ ast_node_destroy_vector(arguments);
+ return NULL;
+ }
+
+ vector_push_back(arguments, argument);
+
+ if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
+ ast_node_destroy_vector(arguments);
+ return NULL;
+ }
+ }
+
+ drop_expected_token(parser, TOKEN_CPAREN);
+
+ return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node),
+ referenced_node->data.function.prototype.return_type,
+ arguments);
+}
+
static ast_node_t *
parser_parse_return_stmt(parser_t *parser, type_t result_type)
{
@@ -404,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser)
static bool
parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token)
{
+ if (expression->kind == AST_UNKOWN_NODE) {
+ expression->data.unknown.expected_type = type;
+ expression->data.unknown.reference_token = token;
+ return true;
+ }
if (expression->result_type != type) {
parser_error_t error;
error.token = token;
@@ -537,6 +667,38 @@ is_next_statement_return(parser_t *parser)
}
static bool
+is_next_function_declaration(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_KEYWORD_FN;
+}
+
+static bool
+is_next_token_cparen(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_CPAREN;
+}
+
+static bool
+is_next_token_oparen(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_OPAREN;
+}
+
+static bool
+is_next_token_eof(parser_t *parser)
+{
+ token_t token;
+ lexer_peek_next_token(parser->lexer, &token);
+ return token.kind == TOKEN_EOF;
+}
+
+static bool
is_block_end(parser_t *parser)
{
token_t token;
@@ -660,10 +822,48 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type)
return ast_node_new_block(body);
}
-static bool
-parser_parse_function_arguments(parser_t *parser)
+static vector_t *
+parser_parse_function_parameters(parser_t *parser)
{
- return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN);
+ if (!drop_expected_token(parser, TOKEN_OPAREN)) {
+ return NULL;
+ }
+
+ vector_t *vector = vector_new();
+
+ while (!is_next_token_cparen(parser)) {
+ token_t token_name;
+ type_t type;
+
+ if (!expected_token(&token_name, parser, TOKEN_NAME)) {
+ ast_node_destroy_vector(vector);
+ return NULL;
+ }
+ if (!drop_expected_token(parser, TOKEN_COLON)) {
+ ast_node_destroy_vector(vector);
+ return NULL;
+ }
+
+ if (!parser_parse_type(parser, &type)) {
+ ast_node_destroy_vector(vector);
+ return NULL;
+ }
+
+ ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type);
+
+ scope_push(parser->scope, &parameter->data.function_parameter.identifier, parameter);
+
+ vector_push_back(vector, parameter);
+
+ if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) {
+ ast_node_destroy_vector(vector);
+ return NULL;
+ }
+ }
+
+ drop_expected_token(parser, TOKEN_CPAREN);
+
+ return vector;
}
ast_node_t *
@@ -679,7 +879,9 @@ parser_parse_function_declaration(parser_t *parser)
return NULL;
}
- if (!parser_parse_function_arguments(parser)) {
+ vector_t *parameters = parser_parse_function_parameters(parser);
+
+ if (parameters == NULL) {
return NULL;
}
@@ -704,11 +906,86 @@ parser_parse_function_declaration(parser_t *parser)
return NULL;
}
- ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, body);
+ ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body);
// TODO: When implementing function calls the scope must be pushed before the
// body to be parsed, otherwise recursion not gonna work.
- scope_push(parser->scope, &node->data.function.identifier, node);
+ scope_push(parser->scope, &node->data.function.prototype.identifier, node);
return node;
}
+
+static bool
+parser_reevaluate_nodes(parser_t *parser)
+{
+ for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) {
+ reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i);
+
+ lexer_step_back_to(parser->lexer, &reevaluation->token);
+ lexer_drop_next_token(parser->lexer);
+
+ ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token);
+
+ if (node == NULL) {
+ return false;
+ }
+
+ if (node->kind == AST_UNKOWN_NODE) {
+ parser_error_t error;
+ error.token = reevaluation->token;
+ sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value));
+ parser->errors[parser->errors_len++] = error;
+ ast_node_destroy(node);
+ return false;
+ }
+
+ if (!parser_expression_matches_the_expected_type(parser,
+ node,
+ reevaluation->node->data.unknown.expected_type,
+ reevaluation->node->data.unknown.reference_token)) {
+ ast_node_destroy(node);
+ return false;
+ }
+
+ *reevaluation->node = *node;
+
+ free(node);
+ }
+ return true;
+}
+
+ast_node_t *
+parser_parse_ns(parser_t *parser)
+{
+ vector_t *nodes = vector_new();
+
+ while (!is_next_token_eof(parser)) {
+ if (is_next_function_declaration(parser)) {
+ ast_node_t *node = parser_parse_function_declaration(parser);
+
+ if (node == NULL) {
+ ast_node_destroy_vector(nodes);
+ return NULL;
+ }
+
+ vector_push_back(nodes, node);
+ continue;
+ }
+
+ parser_error_t error;
+
+ lexer_peek_next_token(parser->lexer, &error.token);
+ sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value));
+ parser->errors[parser->errors_len++] = error;
+
+ ast_node_destroy_vector(nodes);
+ return NULL;
+ }
+
+ if (!parser_reevaluate_nodes(parser)) {
+ ast_node_destroy_vector(nodes);
+ return NULL;
+ }
+
+ return ast_node_new_namespace(nodes);
+}