/* * Copyright (C) 2023 Johnny Richard * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include "ast.h" #include "lexer.h" #include "parser.h" #include "scope.h" #include "vector.h" static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token); static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type); static bool is_next_token_cparen(parser_t *parser); static bool is_next_token_oparen(parser_t *parser); static bool is_next_token_eof(parser_t *parser); void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { assert(parser && "parser must be defined"); assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; // FIXME: change parser_init to parser_new and create a destruction function // for it parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } static void parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) { reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); if (reevaluation_node == NULL) { fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); exit(EXIT_FAILURE); } *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; vector_push_back(parser->to_be_reevaluated, reevaluation_node); } static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; error->token = *token; if (token->kind == TOKEN_EOF) { sprintf(error->message, "expected '%s' but got end of file", token_kind_to_str(expected)); return; } sprintf(error->message, "expected '%s' but got '%s'", token_kind_to_str(expected), token_kind_to_str(token->kind)); } static bool expected_token(token_t *token, parser_t *parser, token_kind_t kind) { lexer_next_token(parser->lexer, token); if (token->kind != kind) { parser_error_push_unexpected_kind(parser, token, kind); return false; } return true; } static bool drop_expected_token(parser_t *parser, token_kind_t kind) { token_t ignored_token; return expected_token(&ignored_token, parser, kind); } static bool parser_parse_type(parser_t *parser, type_t *type) { token_t token; if (!expected_token(&token, parser, TOKEN_NAME)) return false; if (string_view_eq(token.value, string_view_from_str("i32"))) { *type = TYPE_I32; return true; } if (string_view_eq(token.value, string_view_from_str("bool"))) { *type = TYPE_BOOL; return true; } parser_error_t error; error.token = token; sprintf(error.message, "type '" SVFMT "' is not defined", SVARG(&token.value)); parser->errors[parser->errors_len++] = error; return false; } static ast_node_t * parser_literal_integer_node(token_t *token) { char number_as_str[token->value.size]; string_view_to_str(&token->value, number_as_str); return ast_node_new_literal_integer(atoi(number_as_str)); } static ast_node_t * parser_literal_bool_node(token_t *token) { return ast_node_new_literal_bool(token->kind == TOKEN_TRUE); } static ast_binary_operation_kind_t token_to_binary_operation_kind(token_t *token) { switch (token->kind) { case TOKEN_PLUS: return AST_BINOP_ADITION; case TOKEN_MINUS: return AST_BINOP_SUBTRACTION; case TOKEN_STAR: return AST_BINOP_MULTIPLICATION; case TOKEN_SLASH: return AST_BINOP_DIVISION; case TOKEN_EQUAL: return AST_BINOP_EQUAL; case TOKEN_NOT_EQUAL: return AST_BINOP_NOT_EQUAL; case TOKEN_AND: return AST_BINOP_AND; case TOKEN_OR: return AST_BINOP_OR; case TOKEN_GT: return AST_BINOP_GT; case TOKEN_GT_EQUAL: return AST_BINOP_GT_EQUAL; case TOKEN_LT: return AST_BINOP_LT; case TOKEN_LT_EQUAL: return AST_BINOP_LT_EQUAL; default: assert(false && "token mapping not found."); } } ast_node_t * parser_parse_expression(parser_t *parser); static ast_node_t * parser_parse_comparison1(parser_t *parser); static ast_node_t * parser_parse_comparison2(parser_t *parser); static ast_node_t * parser_parse_arithmetic1(parser_t *parser); static ast_node_t * parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); static ast_node_t * parser_parse_name_factor(parser_t *parser, token_t *token); /** * * ::= * ::= (('>' | '>=' | '=' | ...) comparison2)* * ::= (('>' | '>=' | '=' | ...) arithmetic1)* * ::= (('+' | '-') arithmetic2)* * ::= (('*' | '/') factor)* * ::= | '(' ')' * */ ast_node_t * parser_parse_expression(parser_t *parser) { return parser_parse_comparison1(parser); } static ast_node_t * parser_parse_comparison1(parser_t *parser) { ast_node_t *node = parser_parse_comparison2(parser); if (node == NULL) { return NULL; } token_t token; lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; ast_node_t *right = parser_parse_comparison2(parser); if (right == NULL) { ast_node_destroy(node); return NULL; } node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); lexer_peek_next_token(parser->lexer, &token); } return node; } static ast_node_t * parser_parse_comparison2(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic1(parser); if (node == NULL) { return NULL; } token_t token; lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL || token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; ast_node_t *right = parser_parse_arithmetic1(parser); if (right == NULL) { ast_node_destroy(node); return NULL; } node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); lexer_peek_next_token(parser->lexer, &token); } return node; } static ast_node_t * parser_parse_arithmetic1(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic2(parser); if (node == NULL) { return NULL; } token_t token; lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_PLUS || token.kind == TOKEN_MINUS) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; ast_node_t *right = parser_parse_arithmetic2(parser); if (right == NULL) { ast_node_destroy(left); return NULL; } node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_I32); lexer_peek_next_token(parser->lexer, &token); } return node; } static ast_node_t * parser_parse_arithmetic2(parser_t *parser) { ast_node_t *node = parser_parse_factor(parser); if (node == NULL) { return NULL; } token_t token; lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_STAR || token.kind == TOKEN_SLASH) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; ast_node_t *right = parser_parse_factor(parser); if (right == NULL) { ast_node_destroy(node); return NULL; } node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_I32); lexer_peek_next_token(parser->lexer, &token); } return node; } static ast_node_t * parser_parse_factor(parser_t *parser) { token_t token; lexer_next_token(parser->lexer, &token); switch (token.kind) { case TOKEN_NUMBER: return parser_literal_integer_node(&token); case TOKEN_TRUE: return parser_literal_bool_node(&token); case TOKEN_FALSE: return parser_literal_bool_node(&token); case TOKEN_OPAREN: { ast_node_t *expression = parser_parse_expression(parser); if (expression == NULL) { return NULL; } if (!drop_expected_token(parser, TOKEN_CPAREN)) { ast_node_destroy(expression); return NULL; } return expression; } case TOKEN_NAME: return parser_parse_name_factor(parser, &token); default: { parser_error_t error; error.token = token; sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); parser->errors[parser->errors_len++] = error; return NULL; } } } static void parser_drop_until_close_paren(parser_t *parser) { if (!is_next_token_oparen(parser)) { return; } int open_parens = 0; do { if (is_next_token_oparen(parser)) { open_parens++; } else if (is_next_token_cparen(parser)) { open_parens--; } lexer_drop_next_token(parser->lexer); } while (open_parens != 0 && !is_next_token_eof(parser)); } static ast_node_t * parser_parse_name_factor(parser_t *parser, token_t *token) { ast_node_t *referenced_node = scope_get(parser->scope, token->value); if (referenced_node == NULL) { ast_node_t *node = ast_node_new(); parser_to_be_reevaluated_push(parser, node, *token); parser_drop_until_close_paren(parser); return node; } if (referenced_node->kind == AST_VARIABLE_DECLARATION) { return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, referenced_node->data.variable_declaration.type); } // Function parameters are also variables. if (referenced_node->kind == AST_FUNCTION_PARAMETER) { return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, referenced_node->data.function_parameter.type); } // Parse function parameters if (!drop_expected_token(parser, TOKEN_OPAREN)) { return NULL; } vector_t *arguments = vector_new(); while (!is_next_token_cparen(parser)) { ast_node_t *argument = parser_parse_expression(parser); if (argument == NULL) { ast_node_destroy_vector(arguments); return NULL; } vector_push_back(arguments, argument); if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { ast_node_destroy_vector(arguments); return NULL; } } drop_expected_token(parser, TOKEN_CPAREN); return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), referenced_node->data.function.prototype.return_type, arguments); } static ast_node_t * parser_parse_return_stmt(parser_t *parser, type_t result_type) { token_t token; if (!expected_token(&token, parser, TOKEN_KEYWORD_RETURN)) { return NULL; } ast_node_t *expression = parser_parse_expression(parser); if (expression == NULL) { return NULL; } if (!drop_expected_token(parser, TOKEN_SEMICOLON)) { ast_node_destroy(expression); return NULL; } if (!parser_expression_matches_the_expected_type(parser, expression, result_type, token)) { ast_node_destroy(expression); return NULL; } return ast_node_new_return_stmt(expression); } static ast_node_t * parser_parse_variable_assignment(parser_t *parser) { token_t variable_token; if (!expected_token(&variable_token, parser, TOKEN_NAME)) { return NULL; } if (!drop_expected_token(parser, TOKEN_ASSIGN)) return NULL; ast_node_t *expression = parser_parse_expression(parser); if (expression == NULL) { return NULL; } if (!drop_expected_token(parser, TOKEN_SEMICOLON)) { ast_node_destroy(expression); return NULL; } ast_node_t *variable_declaration_node = scope_get(parser->scope, variable_token.value); if (variable_declaration_node == NULL) { parser_error_t error; error.token = variable_token; sprintf(error.message, "trying to assign '" SVFMT "' before defining it.", SVARG(&variable_token.value)); parser->errors[parser->errors_len++] = error; return NULL; } assert(variable_declaration_node->kind == AST_VARIABLE_DECLARATION); if (!parser_expression_matches_the_expected_type( parser, expression, variable_declaration_node->data.variable_declaration.type, variable_token)) { ast_node_destroy(expression); return false; } return ast_node_new_variable_assignment(&variable_declaration_node->data.variable_declaration.identifier, expression); } static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { if (expression->kind == AST_UNKOWN_NODE) { expression->data.unknown.expected_type = type; expression->data.unknown.reference_token = token; return true; } if (expression->result_type != type) { parser_error_t error; error.token = token; sprintf(error.message, "incompatible types: expected '%s', actual '%s'.", ast_type_to_str(type), ast_type_to_str(expression->result_type)); parser->errors[parser->errors_len++] = error; return false; } return true; } static ast_node_t * parser_parse_variable_declaration(parser_t *parser) { token_t variable_name; if (!drop_expected_token(parser, TOKEN_KEYWORD_LET)) { return NULL; } if (!expected_token(&variable_name, parser, TOKEN_NAME)) { return NULL; } if (!drop_expected_token(parser, TOKEN_COLON)) { return NULL; } type_t type; if (!parser_parse_type(parser, &type)) { return NULL; } if (!drop_expected_token(parser, TOKEN_ASSIGN)) { return NULL; } ast_node_t *expression = parser_parse_expression(parser); if (expression == NULL) { return NULL; } if (!drop_expected_token(parser, TOKEN_SEMICOLON)) { ast_node_destroy(expression); return NULL; } if (!parser_expression_matches_the_expected_type(parser, expression, type, variable_name)) { ast_node_destroy(expression); return NULL; } ast_node_t *node = ast_node_new_variable_declaration(variable_name.value, type, expression); scope_push(parser->scope, &node->data.variable_declaration.identifier, node); return node; } static ast_node_t * parser_parse_if_stmt(parser_t *parser, type_t result_type) { token_t if_token; if (!expected_token(&if_token, parser, TOKEN_KEYWORD_IF)) { return NULL; } ast_node_t *condition = parser_parse_expression(parser); if (condition == NULL) { return NULL; } if (!parser_expression_matches_the_expected_type(parser, condition, TYPE_BOOL, if_token)) { ast_node_destroy(condition); return NULL; } ast_node_t *body = parser_parse_block_declarations(parser, result_type); if (body == NULL) { ast_node_destroy(condition); return NULL; } return ast_node_new_if_stmt(condition, body); } static bool is_next_statement_a_variable_declaration(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_KEYWORD_LET; } static bool is_next_statement_a_variable_assignement(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); if (token.kind != TOKEN_NAME) { return false; } lexer_lookahead(parser->lexer, &token, 2); return token.kind == TOKEN_ASSIGN; } static bool is_next_statement_a_if_stmt(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_KEYWORD_IF; } static bool is_next_statement_return(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_KEYWORD_RETURN; } static bool is_next_function_declaration(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_KEYWORD_FN; } static bool is_next_token_cparen(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_CPAREN; } static bool is_next_token_oparen(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_OPAREN; } static bool is_next_token_eof(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_EOF; } static bool is_block_end(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF; } static void parser_error_report_unexpected_token(parser_t *parser) { token_t token; lexer_peek_next_token(parser->lexer, &token); parser_error_t *error = &parser->errors[parser->errors_len++]; error->token = token; sprintf( error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value)); } static bool parser_ensure_function_return_statement(parser_t *parser, ast_node_t *block, token_t *function_token) { assert(block->kind == AST_BLOCK); ast_node_t *latest_node = vector_at(block->data.block.body, block->data.block.body->size - 1); if (latest_node->kind != AST_RETURN_STMT) { parser_error_t error; error.token = *function_token; sprintf(error.message, "expected 'return' keyword."); parser->errors[parser->errors_len++] = error; return false; } return true; } static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type) { if (!drop_expected_token(parser, TOKEN_OCURLY)) { return NULL; } token_t current_token; lexer_peek_next_token(parser->lexer, ¤t_token); scope_enter(parser->scope); vector_t *body = vector_new(); while (!is_block_end(parser)) { if (is_next_statement_return(parser)) { ast_node_t *return_node = parser_parse_return_stmt(parser, result_type); if (return_node == NULL) { scope_leave(parser->scope); ast_node_destroy_vector(body); return NULL; } vector_push_back(body, return_node); continue; } if (is_next_statement_a_if_stmt(parser)) { ast_node_t *if_stmt = parser_parse_if_stmt(parser, result_type); if (if_stmt == NULL) { scope_leave(parser->scope); ast_node_destroy_vector(body); return NULL; } vector_push_back(body, if_stmt); continue; } if (is_next_statement_a_variable_declaration(parser)) { ast_node_t *variable_node = parser_parse_variable_declaration(parser); if (variable_node == NULL) { scope_leave(parser->scope); ast_node_destroy_vector(body); return NULL; } vector_push_back(body, variable_node); continue; } if (is_next_statement_a_variable_assignement(parser)) { ast_node_t *variable_assignment = parser_parse_variable_assignment(parser); if (variable_assignment == NULL) { scope_leave(parser->scope); ast_node_destroy_vector(body); return NULL; } vector_push_back(body, variable_assignment); continue; } parser_error_report_unexpected_token(parser); scope_leave(parser->scope); ast_node_destroy_vector(body); return NULL; } scope_leave(parser->scope); if (!drop_expected_token(parser, TOKEN_CCURLY)) { vector_destroy(body); return NULL; } return ast_node_new_block(body); } static vector_t * parser_parse_function_parameters(parser_t *parser) { if (!drop_expected_token(parser, TOKEN_OPAREN)) { return NULL; } vector_t *vector = vector_new(); while (!is_next_token_cparen(parser)) { token_t token_name; type_t type; if (!expected_token(&token_name, parser, TOKEN_NAME)) { ast_node_destroy_vector(vector); return NULL; } if (!drop_expected_token(parser, TOKEN_COLON)) { ast_node_destroy_vector(vector); return NULL; } if (!parser_parse_type(parser, &type)) { ast_node_destroy_vector(vector); return NULL; } ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type); scope_push(parser->scope, ¶meter->data.function_parameter.identifier, parameter); vector_push_back(vector, parameter); if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { ast_node_destroy_vector(vector); return NULL; } } drop_expected_token(parser, TOKEN_CPAREN); return vector; } ast_node_t * parser_parse_function_declaration(parser_t *parser) { token_t func_name_token; if (!drop_expected_token(parser, TOKEN_KEYWORD_FN)) { return NULL; } if (!expected_token(&func_name_token, parser, TOKEN_NAME)) { return NULL; } vector_t *parameters = parser_parse_function_parameters(parser); if (parameters == NULL) { return NULL; } if (!drop_expected_token(parser, TOKEN_COLON)) { return NULL; } type_t return_type; if (!parser_parse_type(parser, &return_type)) { return NULL; } ast_node_t *body = parser_parse_block_declarations(parser, return_type); if (body == NULL) { return NULL; } if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) { ast_node_destroy(body); return NULL; } ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body); // TODO: When implementing function calls the scope must be pushed before the // body to be parsed, otherwise recursion not gonna work. scope_push(parser->scope, &node->data.function.prototype.identifier, node); return node; } static bool parser_reevaluate_nodes(parser_t *parser) { for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); lexer_step_back_to(parser->lexer, &reevaluation->token); lexer_drop_next_token(parser->lexer); ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); if (node == NULL) { return false; } if (node->kind == AST_UNKOWN_NODE) { parser_error_t error; error.token = reevaluation->token; sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); parser->errors[parser->errors_len++] = error; ast_node_destroy(node); return false; } if (!parser_expression_matches_the_expected_type(parser, node, reevaluation->node->data.unknown.expected_type, reevaluation->node->data.unknown.reference_token)) { ast_node_destroy(node); return false; } *reevaluation->node = *node; free(node); } return true; } ast_node_t * parser_parse_ns(parser_t *parser) { vector_t *nodes = vector_new(); while (!is_next_token_eof(parser)) { if (is_next_function_declaration(parser)) { ast_node_t *node = parser_parse_function_declaration(parser); if (node == NULL) { ast_node_destroy_vector(nodes); return NULL; } vector_push_back(nodes, node); continue; } parser_error_t error; lexer_peek_next_token(parser->lexer, &error.token); sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value)); parser->errors[parser->errors_len++] = error; ast_node_destroy_vector(nodes); return NULL; } if (!parser_reevaluate_nodes(parser)) { ast_node_destroy_vector(nodes); return NULL; } return ast_node_new_namespace(nodes); }