diff options
Diffstat (limited to 'src/parser.c')
-rw-r--r-- | src/parser.c | 333 |
1 files changed, 305 insertions, 28 deletions
diff --git a/src/parser.c b/src/parser.c index 9167f1e..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include <assert.h> +#include <errno.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -33,6 +34,15 @@ parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expres static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type); +static bool +is_next_token_cparen(parser_t *parser); + +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -40,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + +static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; @@ -156,7 +184,10 @@ ast_node_t * parser_parse_expression(parser_t *parser); static ast_node_t * -parser_parse_comparison(parser_t *parser); +parser_parse_comparison1(parser_t *parser); + +static ast_node_t * +parser_parse_comparison2(parser_t *parser); static ast_node_t * parser_parse_arithmetic1(parser_t *parser); @@ -167,10 +198,14 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * - * <expression> ::= <comparison> - * <comparison> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)* + * <expression> ::= <comparison1> + * <comparison1> ::= <comparison2> (('>' | '>=' | '=' | ...) comparison2)* + * <comparison2> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)* * <arithmetic1> ::= <arithmetic2> (('+' | '-') arithmetic2)* * <arithmetic2> ::= <factor> (('*' | '/') factor)* * <factor> ::= <integer> | '(' <expression> ')' @@ -179,11 +214,42 @@ parser_parse_factor(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser) { - return parser_parse_comparison(parser); + return parser_parse_comparison1(parser); +} + +static ast_node_t * +parser_parse_comparison1(parser_t *parser) +{ + ast_node_t *node = parser_parse_comparison2(parser); + + if (node == NULL) { + return NULL; + } + + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + lexer_drop_next_token(parser->lexer); + + ast_node_t *left = node; + ast_node_t *right = parser_parse_comparison2(parser); + + if (right == NULL) { + ast_node_destroy(node); + return NULL; + } + + node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); + + lexer_peek_next_token(parser->lexer, &token); + } + + return node; } static ast_node_t * -parser_parse_comparison(parser_t *parser) +parser_parse_comparison2(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic1(parser); @@ -195,8 +261,7 @@ parser_parse_comparison(parser_t *parser) lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL || - token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL || - token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; @@ -304,21 +369,8 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - // TODO: Check node kind, today accepts only variables - ast_node_t *var_node = scope_get(parser->scope, token.value); - - if (var_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } - - return ast_node_new_variable(&var_node->data.variable_declaration.identifier, - var_node->data.variable_declaration.type); - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); default: { parser_error_t error; error.token = token; @@ -329,6 +381,79 @@ parser_parse_factor(parser_t *parser) } } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } + + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} + +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); + + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); + + parser_to_be_reevaluated_push(parser, node, *token); + + parser_drop_until_close_paren(parser); + return node; + } + + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } + + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } + + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; + } + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); +} + static ast_node_t * parser_parse_return_stmt(parser_t *parser, type_t result_type) { @@ -404,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -537,6 +667,38 @@ is_next_statement_return(parser_t *parser) } static bool +is_next_function_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_FN; +} + +static bool +is_next_token_cparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CPAREN; +} + +static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + +static bool +is_next_token_eof(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_EOF; +} + +static bool is_block_end(parser_t *parser) { token_t token; @@ -660,10 +822,48 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type) return ast_node_new_block(body); } -static bool -parser_parse_function_arguments(parser_t *parser) +static vector_t * +parser_parse_function_parameters(parser_t *parser) { - return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN); + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *vector = vector_new(); + + while (!is_next_token_cparen(parser)) { + token_t token_name; + type_t type; + + if (!expected_token(&token_name, parser, TOKEN_NAME)) { + ast_node_destroy_vector(vector); + return NULL; + } + if (!drop_expected_token(parser, TOKEN_COLON)) { + ast_node_destroy_vector(vector); + return NULL; + } + + if (!parser_parse_type(parser, &type)) { + ast_node_destroy_vector(vector); + return NULL; + } + + ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type); + + scope_push(parser->scope, ¶meter->data.function_parameter.identifier, parameter); + + vector_push_back(vector, parameter); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(vector); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return vector; } ast_node_t * @@ -679,7 +879,9 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - if (!parser_parse_function_arguments(parser)) { + vector_t *parameters = parser_parse_function_parameters(parser); + + if (parameters == NULL) { return NULL; } @@ -704,11 +906,86 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, body); + ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body); // TODO: When implementing function calls the scope must be pushed before the // body to be parsed, otherwise recursion not gonna work. - scope_push(parser->scope, &node->data.function.identifier, node); + scope_push(parser->scope, &node->data.function.prototype.identifier, node); return node; } + +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + +ast_node_t * +parser_parse_ns(parser_t *parser) +{ + vector_t *nodes = vector_new(); + + while (!is_next_token_eof(parser)) { + if (is_next_function_declaration(parser)) { + ast_node_t *node = parser_parse_function_declaration(parser); + + if (node == NULL) { + ast_node_destroy_vector(nodes); + return NULL; + } + + vector_push_back(nodes, node); + continue; + } + + parser_error_t error; + + lexer_peek_next_token(parser->lexer, &error.token); + sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value)); + parser->errors[parser->errors_len++] = error; + + ast_node_destroy_vector(nodes); + return NULL; + } + + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + + return ast_node_new_namespace(nodes); +} |