/*
* Copyright (C) 2023 Johnny Richard
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include "ast.h"
#include "lexer.h"
#include "parser.h"
#include "scope.h"
#include "vector.h"
void
parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope)
{
assert(parser && "parser must be defined");
assert(lexer && "lexer must be defined");
parser->lexer = lexer;
parser->scope = scope;
parser->errors_len = 0;
}
static void
parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected)
{
parser_error_t *error = &parser->errors[parser->errors_len++];
error->token = *token;
if (token->kind == TOKEN_EOF) {
sprintf(error->message, "expected '%s' but got end of file", token_kind_to_str(expected));
return;
}
sprintf(error->message, "expected '%s' but got '%s'", token_kind_to_str(expected), token_kind_to_str(token->kind));
}
static bool
expected_token(token_t *token, parser_t *parser, token_kind_t kind)
{
lexer_next_token(parser->lexer, token);
if (token->kind != kind) {
parser_error_push_unexpected_kind(parser, token, kind);
return false;
}
return true;
}
static bool
drop_expected_token(parser_t *parser, token_kind_t kind)
{
token_t ignored_token;
return expected_token(&ignored_token, parser, kind);
}
static bool
parser_parse_type(parser_t *parser, type_t *type)
{
token_t token;
if (!expected_token(&token, parser, TOKEN_NAME))
return false;
if (string_view_eq(token.value, string_view_from_str("i32"))) {
*type = TYPE_I32;
return true;
}
parser_error_t error;
error.token = token;
sprintf(error.message, "type '" SVFMT "' is not defined", SVARG(&token.value));
parser->errors[parser->errors_len++] = error;
return false;
}
static bool
parser_literal_integer_node(ast_node_t *node, token_t *token)
{
char number_as_str[token->value.size];
string_view_to_str(&token->value, number_as_str);
ast_literal_integer_create(node, atoi(number_as_str));
return true;
}
static ast_binary_operation_kind_t
token_to_binary_operation_kind(token_t *token)
{
switch (token->kind) {
case TOKEN_PLUS:
return AST_BINOP_ADITION;
case TOKEN_MINUS:
return AST_BINOP_SUBTRACTION;
case TOKEN_STAR:
return AST_BINOP_MULTIPLICATION;
case TOKEN_SLASH:
return AST_BINOP_DIVISION;
default:
assert(false && "token mapping not found.");
}
}
static bool
parser_parse_factor(parser_t *parser, ast_node_t *node)
{
token_t token;
lexer_next_token(parser->lexer, &token);
switch (token.kind) {
case TOKEN_NUMBER:
return parser_literal_integer_node(node, &token);
case TOKEN_OPAREN:
if (!parser_parse_expression(parser, node))
return false;
if (!drop_expected_token(parser, TOKEN_CPAREN))
return false;
return true;
case TOKEN_NAME: {
// TODO: Check node kind, today accepts only variables
ast_node_t *var_node = scope_get(parser->scope, token.value);
if (var_node == NULL) {
parser_error_t error;
error.token = token;
sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value));
parser->errors[parser->errors_len++] = error;
return false;
}
ast_node_init_variable(node, &var_node->data.variable_declaration.identifier);
return true;
}
default: {
parser_error_t error;
error.token = token;
sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value));
parser->errors[parser->errors_len++] = error;
return false;
}
}
}
static bool
parser_parse_term(parser_t *parser, ast_node_t *node)
{
if (!parser_parse_factor(parser, node))
return false;
token_t token;
lexer_peek_next_token(parser->lexer, &token);
while (token.kind == TOKEN_STAR || token.kind == TOKEN_SLASH) {
lexer_drop_next_token(parser->lexer);
ast_node_t *left = ast_node_new();
*left = *node;
ast_node_t *right = ast_node_new();
if (!parser_parse_factor(parser, right))
return false;
ast_node_init_binary_operation(node, token_to_binary_operation_kind(&token), left, right);
lexer_peek_next_token(parser->lexer, &token);
}
return true;
}
/**
*
* ::= (('+' | '-') term)*
* ::= (('*' | '/') factor)*
* ::= | '(' ')'
*
*/
bool
parser_parse_expression(parser_t *parser, ast_node_t *node)
{
if (!parser_parse_term(parser, node))
return false;
token_t token;
lexer_peek_next_token(parser->lexer, &token);
while (token.kind == TOKEN_PLUS || token.kind == TOKEN_MINUS) {
lexer_drop_next_token(parser->lexer);
ast_node_t *left = ast_node_new();
*left = *node;
ast_node_t *right = ast_node_new();
if (!parser_parse_term(parser, right))
return false;
ast_node_init_binary_operation(node, token_to_binary_operation_kind(&token), left, right);
lexer_peek_next_token(parser->lexer, &token);
}
return true;
}
static ast_node_t *
parser_parse_return_stmt(parser_t *parser)
{
if (!drop_expected_token(parser, TOKEN_KEYWORD_RETURN)) {
return NULL;
}
ast_node_t *argument_token = ast_node_new();
if (!parser_parse_expression(parser, argument_token)) {
ast_node_destroy(argument_token);
return NULL;
}
if (!drop_expected_token(parser, TOKEN_SEMICOLON)) {
ast_node_destroy(argument_token);
return NULL;
}
ast_node_t *node = ast_node_new();
ast_node_init_return_stmt(node, argument_token);
return node;
}
static bool
parser_parse_variable_assignment(parser_t *parser, ast_node_t *node)
{
token_t variable_token;
if (!expected_token(&variable_token, parser, TOKEN_NAME)) {
return false;
}
if (!drop_expected_token(parser, TOKEN_EQUAL))
return false;
ast_node_t *expression = ast_node_new();
if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) {
ast_node_destroy(expression);
return false;
}
ast_node_t *variable_declaration_node = scope_get(parser->scope, variable_token.value);
if (variable_declaration_node == NULL) {
parser_error_t error;
error.token = variable_token;
sprintf(error.message, "trying to assign '" SVFMT "' before defining it.", SVARG(&variable_token.value));
parser->errors[parser->errors_len++] = error;
return false;
}
assert(variable_declaration_node->kind == AST_VARIABLE_DECLARATION);
node->kind = AST_VARIABLE_ASSIGNMENT;
node->data = (ast_node_data_t){ .variable_assignment = {
.identifier = &variable_declaration_node->data.variable_declaration.identifier,
.expression = expression } };
return true;
}
static ast_node_t *
parser_parse_variable_definition(parser_t *parser)
{
token_t variable_name;
if (!expected_token(&variable_name, parser, TOKEN_NAME)) {
return NULL;
}
if (!drop_expected_token(parser, TOKEN_COLON)) {
return NULL;
}
type_t type;
if (!parser_parse_type(parser, &type)) {
return NULL;
}
if (!drop_expected_token(parser, TOKEN_EQUAL)) {
return NULL;
}
ast_node_t *expression = ast_node_new();
if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) {
ast_node_destroy(expression);
return NULL;
}
ast_node_t *node = ast_node_new();
ast_node_init_variable_declaration(node, variable_name.value, type, expression);
scope_push(parser->scope, &node->data.variable_declaration.identifier, node);
return node;
}
static bool
is_next_statement_a_variable_declaration(parser_t *parser)
{
token_t token;
lexer_peek_next_token(parser->lexer, &token);
if (token.kind != TOKEN_NAME) {
return false;
}
lexer_lookahead(parser->lexer, &token, 2);
return token.kind == TOKEN_COLON;
}
static bool
is_next_statement_a_variable_assignement(parser_t *parser)
{
token_t token;
lexer_peek_next_token(parser->lexer, &token);
if (token.kind != TOKEN_NAME) {
return false;
}
lexer_lookahead(parser->lexer, &token, 2);
return token.kind == TOKEN_EQUAL;
}
static bool
is_next_statement_return(parser_t *parser)
{
token_t token;
lexer_peek_next_token(parser->lexer, &token);
return token.kind == TOKEN_KEYWORD_RETURN;
}
static bool
is_block_end(parser_t *parser)
{
token_t token;
lexer_peek_next_token(parser->lexer, &token);
return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF;
}
static void
parser_error_report_unexpected_token(parser_t *parser)
{
token_t token;
lexer_peek_next_token(parser->lexer, &token);
parser_error_t *error = &parser->errors[parser->errors_len++];
error->token = token;
sprintf(
error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value));
}
static bool
parser_ensure_function_return_statement(parser_t *parser, vector_t *body, token_t *function_token)
{
ast_node_t *latest_node = vector_at(body, body->size - 1);
if (latest_node->kind != AST_RETURN_STMT) {
parser_error_t error;
error.token = *function_token;
sprintf(error.message, "expected 'return' keyword.");
parser->errors[parser->errors_len++] = error;
vector_destroy(body);
return false;
}
return true;
}
static vector_t *
parser_parse_block_declarations(parser_t *parser)
{
if (!drop_expected_token(parser, TOKEN_OCURLY)) {
return NULL;
}
token_t current_token;
lexer_peek_next_token(parser->lexer, ¤t_token);
scope_enter(parser->scope);
vector_t *body = vector_new();
while (!is_block_end(parser)) {
if (is_next_statement_return(parser)) {
ast_node_t *return_node = parser_parse_return_stmt(parser);
if (return_node == NULL) {
scope_leave(parser->scope);
vector_destroy(body);
return NULL;
}
vector_push_back(body, return_node);
continue;
}
if (is_next_statement_a_variable_declaration(parser)) {
ast_node_t *variable_node = parser_parse_variable_definition(parser);
if (variable_node == NULL) {
scope_leave(parser->scope);
vector_destroy(body);
return NULL;
}
vector_push_back(body, variable_node);
continue;
}
if (is_next_statement_a_variable_assignement(parser)) {
ast_node_t *variable_assignment = ast_node_new();
if (!parser_parse_variable_assignment(parser, variable_assignment)) {
scope_leave(parser->scope);
ast_node_destroy(variable_assignment);
vector_destroy(body);
return NULL;
}
vector_push_back(body, variable_assignment);
continue;
}
parser_error_report_unexpected_token(parser);
scope_leave(parser->scope);
vector_destroy(body);
return NULL;
}
scope_leave(parser->scope);
if (!drop_expected_token(parser, TOKEN_CCURLY)) {
vector_destroy(body);
return NULL;
}
return body;
}
static bool
parser_parse_function_arguments(parser_t *parser)
{
return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN);
}
ast_node_t *
parser_parse_function_declaration(parser_t *parser)
{
token_t func_name_token;
if (!expected_token(&func_name_token, parser, TOKEN_NAME)) {
return NULL;
}
if (!parser_parse_function_arguments(parser)) {
return NULL;
}
if (!drop_expected_token(parser, TOKEN_COLON)) {
return NULL;
}
type_t return_type;
if (!parser_parse_type(parser, &return_type)) {
return NULL;
}
vector_t *body = parser_parse_block_declarations(parser);
if (body == NULL) {
return NULL;
}
if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) {
vector_destroy(body);
return NULL;
}
ast_node_t *node = ast_node_new();
ast_node_init_function_declaration(node, func_name_token.value, return_type, body);
// TODO: When implementing function calls the scope must be pushed before the
// body to be parsed, otherwise recursion not gonna work.
scope_push(parser->scope, &node->data.function.identifier, node);
return node;
}