/* * Copyright (C) 2023 Johnny Richard * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include "ast.h" #include "lexer.h" #include "parser.h" #include "vector.h" void parser_init(parser_t *parser, lexer_t *lexer) { assert(parser && "parser must be defined"); assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->errors_len = 0; } static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; error->token = *token; if (token->kind == TOKEN_EOF) { sprintf(error->message, "expected '%s' but got end of file", token_kind_to_str(expected)); return; } sprintf(error->message, "expected '%s' but got '%s'", token_kind_to_str(expected), token_kind_to_str(token->kind)); } static bool expected_token(token_t *token, parser_t *parser, token_kind_t kind) { lexer_next_token(parser->lexer, token); if (token->kind != kind) { parser_error_push_unexpected_kind(parser, token, kind); return false; } return true; } static bool drop_expected_token(parser_t *parser, token_kind_t kind) { token_t ignored_token; return expected_token(&ignored_token, parser, kind); } static bool parser_parse_type(type_t *type, parser_t *parser) { token_t token; if (!expected_token(&token, parser, TOKEN_NAME)) return false; if (string_view_eq(token.value, string_view_from_str("i32"))) { *type = TYPE_I32; return true; } parser_error_t error; error.token = token; sprintf(error.message, "type '" SVFMT "' is not defined", SVARG(&token.value)); parser->errors[parser->errors_len++] = error; return false; } bool parser_literal_integer_node(ast_node_t *node, token_t *token) { char number_as_str[token->value.size]; string_view_to_str(&token->value, number_as_str); ast_literal_integer_create(node, atoi(number_as_str)); return true; } bool parser_parse_factor(parser_t *parser, ast_node_t *node) { token_t token; lexer_next_token(parser->lexer, &token); if (token.kind == TOKEN_NUMBER) { return parser_literal_integer_node(node, &token); } else if (token.kind == TOKEN_OPAREN) { parser_parse_expression(parser, node); if (!drop_expected_token(parser, TOKEN_CPAREN)) return false; return true; } else if (token.kind == TOKEN_NAME) { /// FIXME: Check if the identifier is defined ast_node_init_identifier(node, token.value); return true; } // FIXME: Extract this erros logic to a function parser_error_t error; error.token = token; sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); parser->errors[parser->errors_len++] = error; return false; } bool parser_parse_term(parser_t *parser, ast_node_t *node) { if (!parser_parse_factor(parser, node)) return false; token_t token; lexer_next_token(parser->lexer, &token); while (token.kind == TOKEN_STAR || token.kind == TOKEN_SLASH) { ast_node_t *left = ast_node_new(); *left = *node; ast_node_t *right = ast_node_new(); if (!parser_parse_factor(parser, right)) return false; ast_node_init_binary_operation(node, token.value, left, right); lexer_next_token(parser->lexer, &token); } lexer_step_back_to(parser->lexer, &token); return true; } /** * * ::= (('+' | '-') term)* * ::= (('*' | '/') factor)* * ::= | '(' ')' * */ bool parser_parse_expression(parser_t *parser, ast_node_t *node) { if (!parser_parse_term(parser, node)) return false; token_t token; lexer_next_token(parser->lexer, &token); while (token.kind == TOKEN_PLUS || token.kind == TOKEN_MINUS) { ast_node_t *left = ast_node_new(); *left = *node; ast_node_t *right = ast_node_new(); if (!parser_parse_term(parser, right)) return false; ast_node_init_binary_operation(node, token.value, left, right); lexer_next_token(parser->lexer, &token); } lexer_step_back_to(parser->lexer, &token); return true; } bool parser_parse_return_stmt(parser_t *parser, ast_node_t *node) { ast_node_t *argument_token = ast_node_new(); if (!parser_parse_expression(parser, argument_token)) return false; if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false; ast_node_init_return_stmt(node, argument_token); return true; } bool parser_parse_variable_definition(parser_t *parser, string_view_t variable_name, ast_node_t *node) { if (!drop_expected_token(parser, TOKEN_COLON)) return false; type_t type; // FIXME: change the parameters order if (!parser_parse_type(&type, parser)) return false; token_t equal_token; if (!expected_token(&equal_token, parser, TOKEN_EQUAL)) return false; ast_node_t *expression = ast_node_new(); if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) { ast_node_destroy(expression); return false; } ast_node_init_variable_declaration(node, variable_name, type, expression); return true; } bool parser_parse_block_declarations(parser_t *parser, vector_t *body) { token_t current_token; lexer_next_token(parser->lexer, ¤t_token); while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) { if (current_token.kind != TOKEN_NAME) { parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_NAME); return false; } if (string_view_eq(current_token.value, string_view_from_str("return"))) { ast_node_t *return_node = ast_node_new(); bool parsed_return = parser_parse_return_stmt(parser, return_node); if (!parsed_return) { ast_node_destroy(return_node); return false; } vector_push_back(body, return_node); } else { ast_node_t *variable_node = ast_node_new(); bool parsed_variable = parser_parse_variable_definition(parser, current_token.value, variable_node); if (!parsed_variable) { ast_node_destroy(variable_node); return false; } vector_push_back(body, variable_node); } lexer_next_token(parser->lexer, ¤t_token); } if (current_token.kind != TOKEN_CCURLY) { parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_CCURLY); return false; } ast_node_t *latest_node = vector_at(body, body->size - 1); if (latest_node->kind != AST_RETURN_STMT) { parser_error_t error; error.token = current_token; sprintf(error.message, "expected 'return' keyword."); parser->errors[parser->errors_len++] = error; return false; } return true; } bool parser_parse_function_declaration(parser_t *parser, ast_node_t *node) { token_t func_name_token; if (!expected_token(&func_name_token, parser, TOKEN_NAME)) { return false; } if (!drop_expected_token(parser, TOKEN_OPAREN)) return false; if (!drop_expected_token(parser, TOKEN_CPAREN)) return false; if (!drop_expected_token(parser, TOKEN_COLON)) return false; type_t return_type; if (!parser_parse_type(&return_type, parser)) { return false; } if (!drop_expected_token(parser, TOKEN_OCURLY)) return false; vector_t *body = vector_new(); if (!parser_parse_block_declarations(parser, body)) return false; ast_node_init_function_declaration(node, func_name_token.value, return_type, body); return true; }