diff options
author | Johnny Richard <johnny@johnnyrichard.com> | 2023-05-17 15:31:24 +0200 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-05-17 15:31:24 +0200 |
commit | 15196aa56339d34af9f74b019e6aeff5816e8dcc (patch) | |
tree | ffe7d6ec48bc6018b5eea7e9ef573cd1bbd57c2f | |
parent | ad54ee1182b1549880eddc8b1969d3992d9f7f1d (diff) | |
parent | ea7f65fe1250be8f49edcaaedd3410aed1401648 (diff) |
Merge commit 'ea7f65fe1250be8f49edcaaedd3410aed1401648' of https://git.sr.ht/~carlosmaniero/pipac-clone
-rw-r--r-- | examples/fibbonacci.pipa | 10 | ||||
-rw-r--r-- | examples/function_call.pipa | 11 | ||||
-rw-r--r-- | examples/if.pipa | 78 | ||||
-rw-r--r-- | src/ast.c | 117 | ||||
-rw-r--r-- | src/ast.h | 75 | ||||
-rw-r--r-- | src/ast_pretty_printer.c | 39 | ||||
-rw-r--r-- | src/gas_assembly_generator.c | 402 | ||||
-rw-r--r-- | src/gas_assembly_generator.h | 12 | ||||
-rw-r--r-- | src/lexer.c | 16 | ||||
-rw-r--r-- | src/lexer.h | 4 | ||||
-rw-r--r-- | src/main.c | 14 | ||||
-rw-r--r-- | src/parser.c | 333 | ||||
-rw-r--r-- | src/parser.h | 10 | ||||
-rw-r--r-- | test/integration_test.c | 3 | ||||
-rw-r--r-- | test/parser_test.c | 49 |
15 files changed, 1045 insertions, 128 deletions
diff --git a/examples/fibbonacci.pipa b/examples/fibbonacci.pipa new file mode 100644 index 0000000..c19e65d --- /dev/null +++ b/examples/fibbonacci.pipa @@ -0,0 +1,10 @@ +fn main(): i32 { + return fib(13); +} + +fn fib(n: i32): i32 { + if n <= 1 { + return n; + } + return fib(n - 1) + fib(n - 2); +} diff --git a/examples/function_call.pipa b/examples/function_call.pipa new file mode 100644 index 0000000..833c195 --- /dev/null +++ b/examples/function_call.pipa @@ -0,0 +1,11 @@ +fn add(n: i32, n2: i32): i32 { + return n + n2; +} + +fn id(n: i32): i32 { + return n; +} + +fn main(): i32 { + return id(add(40, 44)) / 2; +} diff --git a/examples/if.pipa b/examples/if.pipa index 2c5578d..79f5724 100644 --- a/examples/if.pipa +++ b/examples/if.pipa @@ -1,8 +1,80 @@ fn main(): i32 { - let n: i32 = 42; + let n: i32 = 11; - if n > 42 { - return 42; + let falsy: bool = false || n == 11 && n > 11; + let truth: bool = n + 1 > 11 && n - 1 < 11; + let literal_falsy: bool = false; + + if falsy { + return 201; + } + + if literal_falsy { + return 200; + } + + if (n == 11) { + if n != 12 { + if n < 12 { + if n <= 11 { + if n > 10 { + if n >= 11 { + if n >= 11 || n < 11 { + if n > 11 || n < 11 { + return 199; + } + if n > 11 || n <= 11 { + if false || false { + return 199; + } + if true || false { + if false || true { + if true && false { + return 197; + } + if false && true { + return 196; + } + if true && true { + if n >= 11 && n < 11 || n == 11 { + if false || n == 11 && n > 11 { + return 195; + } + if false || n >= 11 && n <= 11 { + if false || false && false && false || true { + if n + 1 > 11 && n - 1 < 11 { + if truth { + return 42; + } + return 14; + } + return 13; + } + return 12; + } + return 11; + } + return 10; + } + return 9; + } + return 8; + } + return 7; + } + return 6; + } + return 5; + } + return 4; + } + return 3; + } + return 2; + } + return 1; + } + return 0; } return n; @@ -44,8 +44,12 @@ void ast_node_destroy(ast_node_t *node) { switch (node->kind) { + case AST_NAMESPACE: + ast_node_destroy_vector(node->data.ns.nodes); + break; case AST_FUNCTION_DECLARATION: ast_node_destroy(node->data.function.body); + ast_node_destroy_vector(node->data.function.prototype.parameters); break; case AST_IF_STMT: ast_node_destroy(node->data.if_stmt.condition); @@ -66,9 +70,11 @@ ast_node_destroy(ast_node_t *node) break; case AST_VARIABLE_ASSIGNMENT: ast_node_destroy(node->data.variable_assignment.expression); + case AST_FUNCTION_PARAMETER: case AST_LITERAL: case AST_UNKOWN_NODE: case AST_VARIABLE: + case AST_FUNCTION_CALL: break; } free(node); @@ -89,7 +95,30 @@ ast_node_new_return_stmt(ast_node_t *argument) } ast_node_t * -ast_node_new_function_declaration(string_view_t function_name, type_t return_type, ast_node_t *body) +ast_node_new_function_parameter(string_view_t name, type_t type) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_FUNCTION_PARAMETER, + .data = { + .function_parameter = { + .identifier = { + .name = name, + }, + .type = type, + }, + }, + }; + + return node; +} + +ast_node_t * +ast_node_new_function_declaration(string_view_t function_name, + type_t return_type, + vector_t *parameters, + ast_node_t *body) { ast_node_t *node = ast_node_new(); @@ -97,8 +126,11 @@ ast_node_new_function_declaration(string_view_t function_name, type_t return_typ .kind = AST_FUNCTION_DECLARATION, .data = { .function = { - .identifier = { .name = function_name }, - .return_type = return_type, + .prototype = { + .identifier = { .name = function_name }, + .return_type = return_type, + .parameters = parameters, + }, .body = body, } }, @@ -108,6 +140,24 @@ ast_node_new_function_declaration(string_view_t function_name, type_t return_typ } ast_node_t * +ast_node_new_namespace(vector_t *nodes) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_NAMESPACE, + .result_type = TYPE_VOID, + .data = { + .ns = { + .nodes = nodes, + } + }, + }; + + return node; +} + +ast_node_t * ast_node_new_block(vector_t *body) { ast_node_t *node = ast_node_new(); @@ -254,6 +304,67 @@ ast_node_new_variable(ast_identifier_t *identifier, type_t result_type) return node; } +ast_node_t * +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type, vector_t *arguments) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_FUNCTION_CALL, + .result_type = result_type, + .data = { .function_call = { .identifier = identifier, .arguments = arguments } }, + }; + + return node; +} + +ast_node_t * +ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name) +{ + assert(ns->kind == AST_NAMESPACE); + + for (size_t i = 0; i < ns->data.ns.nodes->size; i++) { + ast_node_t *node = vector_at(ns->data.ns.nodes, i); + + if (node->kind == AST_FUNCTION_DECLARATION && string_view_eq(node->data.function.prototype.identifier.name, name)) { + return node; + } + } + return NULL; +} + +ast_node_t * +ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name) +{ + return ast_node_ns_get_function_node_by_sv(ns, string_view_from_str(function_name)); +} + +bool +ast_node_is_variable_declaration(ast_node_t *node) +{ + return node->kind == AST_VARIABLE_DECLARATION; +} + +bool +ast_node_is_function_declaration(ast_node_t *node) +{ + return node->kind == AST_FUNCTION_DECLARATION; +} + +ast_identifier_t * +ast_node_function_declaration_identifier(ast_node_t *node) +{ + assert(node->kind == AST_FUNCTION_DECLARATION); + + return &node->data.function.prototype.identifier; +} + +string_view_t +ast_node_function_declaration_name(ast_node_t *node) +{ + return ast_node_function_declaration_identifier(node)->name; +} + char * ast_type_to_str(type_t type) { @@ -16,6 +16,7 @@ */ #ifndef AST_H #define AST_H +#include "lexer.h" #include "string_view.h" #include "vector.h" #include <stdint.h> @@ -29,6 +30,11 @@ typedef enum typedef struct ast_node_t ast_node_t; +typedef struct ast_namespace_t +{ + vector_t *nodes; +} ast_namespace_t; + typedef struct ast_block_t { vector_t *body; @@ -49,10 +55,28 @@ typedef struct ast_variable_t ast_identifier_t *identifier; } ast_variable_t; -typedef struct ast_function_declaration_t +typedef struct ast_function_call_t +{ + ast_identifier_t *identifier; + vector_t *arguments; +} ast_function_call_t; + +typedef struct ast_function_parameter_t +{ + ast_identifier_t identifier; + type_t type; +} ast_function_parameter_t; + +typedef struct ast_function_prototype_t { ast_identifier_t identifier; type_t return_type; + vector_t *parameters; +} ast_function_prototype_t; + +typedef struct ast_function_declaration_t +{ + ast_function_prototype_t prototype; ast_node_t *body; } ast_function_declaration_t; @@ -117,24 +141,36 @@ typedef struct ast_if_stmt_t ast_node_t *body; } ast_if_stmt_t; +typedef struct ast_unkown_token_t +{ + type_t expected_type; + token_t reference_token; +} ast_unkown_token_t; + typedef enum { + AST_NAMESPACE, AST_BINARY_OPERATION, AST_BLOCK, AST_FUNCTION_DECLARATION, + AST_FUNCTION_PARAMETER, + AST_FUNCTION_CALL, AST_LITERAL, AST_RETURN_STMT, AST_IF_STMT, - AST_UNKOWN_NODE, AST_VARIABLE_DECLARATION, AST_VARIABLE_ASSIGNMENT, - AST_VARIABLE + AST_VARIABLE, + AST_UNKOWN_NODE } ast_node_kind_t; typedef union { + ast_namespace_t ns; ast_binary_operation_t binary_operation; ast_function_declaration_t function; + ast_function_parameter_t function_parameter; + ast_function_call_t function_call; ast_literal_t literal; ast_block_t block; ast_if_stmt_t if_stmt; @@ -142,6 +178,7 @@ typedef union ast_variable_declaration_t variable_declaration; ast_variable_assignment_t variable_assignment; ast_variable_t variable; + ast_unkown_token_t unknown; } ast_node_data_t; typedef struct ast_node_t @@ -164,13 +201,25 @@ void ast_node_destroy(ast_node_t *node); ast_node_t * +ast_node_new_namespace(vector_t *nodes); + +ast_node_t * ast_node_new_binary_operation(ast_binary_operation_kind_t kind, ast_node_t *left, ast_node_t *right, type_t result_type); ast_node_t * -ast_node_new_function_declaration(string_view_t function_name, type_t return_type, ast_node_t *body); +ast_node_new_function_declaration(string_view_t function_name, + type_t return_type, + vector_t *parameters, + ast_node_t *body); + +ast_node_t * +ast_node_new_function_parameter(string_view_t name, type_t type); + +ast_node_t * +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type, vector_t *arguments); ast_node_t * ast_node_new_return_stmt(ast_node_t *argument); @@ -196,4 +245,22 @@ ast_node_new_variable_assignment(ast_identifier_t *identifier, ast_node_t *expre ast_node_t * ast_node_new_if_stmt(ast_node_t *condition, ast_node_t *body); +ast_node_t * +ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name); + +ast_node_t * +ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name); + +bool +ast_node_is_variable_declaration(ast_node_t *node); + +bool +ast_node_is_function_declaration(ast_node_t *node); + +ast_identifier_t * +ast_node_function_declaration_identifier(ast_node_t *node); + +string_view_t +ast_node_function_declaration_name(ast_node_t *node); + #endif /* AST_H */ diff --git a/src/ast_pretty_printer.c b/src/ast_pretty_printer.c index e2d007a..a211fa4 100644 --- a/src/ast_pretty_printer.c +++ b/src/ast_pretty_printer.c @@ -22,10 +22,12 @@ #include <stdarg.h> #include <stdio.h> -const char operation_kinds[AST_BINOP_N] = { [AST_BINOP_ADITION] = '+', - [AST_BINOP_SUBTRACTION] = '-', - [AST_BINOP_MULTIPLICATION] = '*', - [AST_BINOP_DIVISION] = '/' }; +const char *operation_kinds[AST_BINOP_N] = { + [AST_BINOP_ADITION] = "+", [AST_BINOP_SUBTRACTION] = "-", [AST_BINOP_MULTIPLICATION] = "*", + [AST_BINOP_DIVISION] = "/", [AST_BINOP_EQUAL] = "==", [AST_BINOP_NOT_EQUAL] = "!=", + [AST_BINOP_AND] = "&&", [AST_BINOP_OR] = "||", [AST_BINOP_GT] = ">", + [AST_BINOP_LT] = "<", [AST_BINOP_LT_EQUAL] = "<=", [AST_BINOP_GT_EQUAL] = ">=", +}; void ast_pretty_printer_init(ast_pretty_printer_t *printer, FILE *stream); @@ -63,6 +65,21 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) assert(ast); switch (ast->kind) { + case AST_FUNCTION_PARAMETER: + break; + case AST_NAMESPACE: + ast_pretty_printer_printf(printer, "Namespace\n"); + ast_pretty_printer_add_indentation(printer); + { + for (size_t i = 0; i < ast->data.ns.nodes->size; ++i) { + if (i + 1 >= ast->data.ns.nodes->size) { + ast_pretty_printer_set_lst_children(printer); + } + ast_pretty_printer_print_ast(printer, vector_at(ast->data.ns.nodes, i)); + } + ast_pretty_printer_rm_indentation(printer); + } + break; case AST_IF_STMT: { ast_if_stmt_t if_stmt = ast->data.if_stmt; ast_pretty_printer_printf(printer, "IfStmt\n"); @@ -76,6 +93,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) ast_pretty_printer_rm_indentation(printer); } + ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_printf(printer, "body:\n"); ast_pretty_printer_add_indentation(printer); { @@ -90,7 +108,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) } case AST_BINARY_OPERATION: { ast_binary_operation_t binop = ast->data.binary_operation; - ast_pretty_printer_printf(printer, "BinaryOperation operation='%c'\n", operation_kinds[binop.kind]); + ast_pretty_printer_printf(printer, "BinaryOperation operation='%s'\n", operation_kinds[binop.kind]); ast_pretty_printer_add_indentation(printer); { @@ -120,8 +138,8 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) break; } case AST_FUNCTION_DECLARATION: { - ast_function_declaration_t function = ast->data.function; - ast_pretty_printer_printf(printer, "FunctionDecl name='" SVFMT "'\n", SVARG(&function.identifier.name)); + string_view_t function_name = ast_node_function_declaration_name(ast); + ast_pretty_printer_printf(printer, "FunctionDecl name='" SVFMT "'\n", SVARG(&function_name)); ast_pretty_printer_add_indentation(printer); { @@ -130,6 +148,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) ast_pretty_printer_add_indentation(printer); { + ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_print_ast(printer, ast->data.function.body); ast_pretty_printer_rm_indentation(printer); } @@ -138,8 +157,12 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) } break; } + case AST_FUNCTION_CALL: { + ast_function_call_t var = ast->data.function_call; + ast_pretty_printer_printf(printer, "FunctionCall name='" SVFMT "'\n", SVARG(&var.identifier->name)); + break; + } case AST_BLOCK: { - ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_printf(printer, "Block\n"); ast_pretty_printer_add_indentation(printer); { diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 4b898ed..7641147 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -22,6 +22,9 @@ #include <stdlib.h> #include <string.h> +size_t available_calling_registers = 6; +char *calling_registers[] = { "rdi", "rsi", "rdx", "rcx", "r8", "r9" }; + static void gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binary_operation_t *binary_operation); @@ -29,6 +32,9 @@ static void gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_function_declaration_t *func); static void +gas_assembly_generator_compile_function_call(gas_assembly_generator_t *gen, ast_function_call_t *func_call); + +static void gas_assembly_generator_compile_literal(gas_assembly_generator_t *gen, ast_literal_t *literal); static void @@ -45,6 +51,9 @@ static void gas_assembly_generator_compile_block(gas_assembly_generator_t *gen, ast_block_t *block); static void +gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stmt_t *if_stmt); + +static void gas_assembly_generator_compile_variable(gas_assembly_generator_t *gen, ast_variable_t *variable); static void @@ -55,6 +64,13 @@ gas_assembly_generator_set_latest_evaluation_as_literal(gas_assembly_generator_t } static void +gas_assembly_generator_set_latest_evaluation_as_literal_boolean(gas_assembly_generator_t *gen, bool value) +{ + gen->latest_evaluation = + (evaluation_result_t){ .kind = EVALUATION_RESULT_AS_LITERAL_BOOL, .data = { .literal_bool = value } }; +} + +static void gas_assembly_generator_set_latest_evaluation_to_rax(gas_assembly_generator_t *gen) { gen->latest_evaluation = (evaluation_result_t){ .kind = EVALUATION_RESULT_ON_RAX }; @@ -67,6 +83,14 @@ gas_assembly_generator_set_latest_evaluation_on_stack(gas_assembly_generator_t * (evaluation_result_t){ .kind = EVALUATION_RESULT_ON_STACK, .data = { .stack_offset = stack_offset } }; } +static void +gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, + ast_node_t *condition, + uint64_t true_label_index, + bool jump_when_true, + uint64_t false_label_index, + bool jump_when_false); + typedef struct ref_entry_t { ast_identifier_t *id; @@ -87,16 +111,29 @@ ref_entry_new(void) return entry; } -static ref_entry_t * -find_ref_entry(vector_t *refs, ast_identifier_t *identifier) +static int +ref_find_variable_reference_stack_offset(vector_t *refs, ast_identifier_t *identifier) { for (int i = refs->size - 1; i >= 0; --i) { ref_entry_t *entry = (ref_entry_t *)vector_at(refs, i); if (entry->id == identifier) { - return entry; + return entry->stack_offset; } } - return NULL; + assert(false); +} + +static void +ref_set_variable_reference_stack_offset(vector_t *refs, ast_identifier_t *identifier, int stack_offset) +{ + ref_entry_t *ref_entry = ref_entry_new(); + + *ref_entry = (ref_entry_t){ + .id = identifier, + .stack_offset = stack_offset, + }; + + vector_push_back(refs, ref_entry); } void @@ -106,6 +143,8 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream) gen->stream = stream; gen->refs = vector_new(); gen->stack_offset = 0; + gen->counter_label_index = 1; + gen->return_label_index = 0; } void @@ -114,12 +153,18 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) gen->latest_evaluation.kind = EVALUATION_RESULT_VOID; switch (ast->kind) { + case AST_NAMESPACE: + gas_assembly_generator_compile_ns(gen, &ast->data.ns); + break; case AST_BINARY_OPERATION: gas_assembly_generator_binary_operation(gen, &ast->data.binary_operation); break; case AST_FUNCTION_DECLARATION: gas_assembly_generator_compile_function(gen, &ast->data.function); break; + case AST_FUNCTION_CALL: + gas_assembly_generator_compile_function_call(gen, &ast->data.function_call); + break; case AST_LITERAL: gas_assembly_generator_compile_literal(gen, &ast->data.literal); break; @@ -139,115 +184,237 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) gas_assembly_generator_compile_block(gen, &ast->data.block); break; case AST_IF_STMT: + gas_assembly_generator_compile_if_stmt(gen, &ast->data.if_stmt); + break; + case AST_FUNCTION_PARAMETER: case AST_UNKOWN_NODE: assert(false && "unreachable"); } } +void +gas_assembly_generator_compile_ns(gas_assembly_generator_t *gen, ast_namespace_t *ns) +{ + for (size_t i = 0; i < ns->nodes->size; i++) { + gas_assembly_generator_compile(gen, vector_at(ns->nodes, i)); + } +} + static void gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_function_declaration_t *func) { assert(func); - if (!string_view_eq(func->identifier.name, string_view_from_str("main"))) { - fprintf(stderr, "[ERROR]: no main function has been defined!\n"); - exit(EXIT_FAILURE); - } + uint64_t previous_index = gen->return_label_index; + uint64_t return_label_index = gen->return_label_index = gen->counter_label_index++; - fprintf(gen->stream, ".global _start\n"); - fprintf(gen->stream, ".text\n"); - fprintf(gen->stream, "_start:\n"); + assert((func->prototype.parameters->size <= available_calling_registers) && + "Not enough registers to process this function. Stack parameters will come soon."); + + fprintf(gen->stream, SVFMT ":\n", SVARG(&func->prototype.identifier.name)); fprintf(gen->stream, " push %%rbp\n"); fprintf(gen->stream, " mov %%rsp, %%rbp\n"); + fprintf(gen->stream, " subq $16, %%rsp\n"); + + for (size_t i = 0; i < func->prototype.parameters->size; i++) { + char *reg = calling_registers[i]; + ast_node_t *parameter = vector_at(func->prototype.parameters, i); + + assert(parameter->kind == AST_FUNCTION_PARAMETER); + + gen->stack_offset -= 8; + + fprintf(gen->stream, " mov %%%s, %d(%%rbp)\n", reg, gen->stack_offset); + + ref_set_variable_reference_stack_offset( + gen->refs, ¶meter->data.function_parameter.identifier, gen->stack_offset); + } gas_assembly_generator_compile(gen, func->body); - fprintf(gen->stream, " pop %%rbp\n"); + fprintf(gen->stream, ".L%ld:\n", return_label_index); + fprintf(gen->stream, " leave\n"); + fprintf(gen->stream, " ret\n"); + + gen->return_label_index = previous_index; + gen->stack_offset = 0; } static void -gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_return_stmt_t *return_stmt) +gas_assembly_generator_compile_function_call(gas_assembly_generator_t *gen, ast_function_call_t *func_call) { - assert(gen && return_stmt); + assert((func_call->arguments->size <= available_calling_registers) && + "Not enough registers to process this function. Stack parameters will come soon."); + + for (size_t i = 0; i < func_call->arguments->size; i++) { + char *reg = calling_registers[i]; + ast_node_t *argument = vector_at(func_call->arguments, i); + + gas_assembly_generator_compile(gen, argument); + + switch (gen->latest_evaluation.kind) { + case EVALUATION_RESULT_AS_LITERAL_INTEGER: + fprintf(gen->stream, " mov $%ld, %%%s\n", gen->latest_evaluation.data.literal_int, reg); + break; + case EVALUATION_RESULT_ON_RAX: + fprintf(gen->stream, " mov %%rax, %%%s\n", reg); + break; + case EVALUATION_RESULT_ON_STACK: + fprintf(gen->stream, " mov %d(%%rbp), %%%s\n", gen->latest_evaluation.data.stack_offset, reg); + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov %d(%%rbp), %%%s\n", gen->latest_evaluation.data.literal_bool, reg); + break; + case EVALUATION_RESULT_VOID: + assert(false && "Unexpected"); + } + } + fprintf(gen->stream, " call " SVFMT "\n", SVARG(&func_call->identifier->name)); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); +} - gas_assembly_generator_compile(gen, return_stmt->argument); +void +gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *ns) +{ + if (ast_node_ns_get_function_node_by_name(ns, "main") == NULL) { + fprintf(stderr, "[ERROR]: no main function has been defined!\n"); + exit(EXIT_FAILURE); + } + + fprintf(gen->stream, ".global _start\n"); + fprintf(gen->stream, ".text\n"); + + gas_assembly_generator_compile(gen, ns); - if (gen->latest_evaluation.kind == EVALUATION_RESULT_AS_LITERAL_INTEGER) { - fprintf(gen->stream, " mov $%ld, %%rdi\n", gen->latest_evaluation.data.literal_int); - } else { - fprintf(gen->stream, " mov %%rax, %%rdi\n"); + fprintf(gen->stream, "_start:\n"); + fprintf(gen->stream, " call main\n"); + + switch (gen->latest_evaluation.kind) { + case EVALUATION_RESULT_AS_LITERAL_INTEGER: + fprintf(gen->stream, " mov $%ld, %%rdi\n", gen->latest_evaluation.data.literal_int); + break; + case EVALUATION_RESULT_ON_RAX: + fprintf(gen->stream, " mov %%rax, %%rdi\n"); + break; + case EVALUATION_RESULT_ON_STACK: + fprintf(gen->stream, " mov %d(%%rbp), %%rdi\n", gen->latest_evaluation.data.stack_offset); + break; + case EVALUATION_RESULT_VOID: + fprintf(gen->stream, " xor %%rdi, %%rdi\n"); + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + assert(false && "Unexpected"); + break; } + fprintf(gen->stream, " mov $60, %%rax\n"); fprintf(gen->stream, " syscall\n"); } static void -gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *gen, - ast_variable_declaration_t *variable_declaration) +gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_return_stmt_t *return_stmt) { - gen->stack_offset -= 8; - gas_assembly_generator_compile(gen, variable_declaration->value); + assert(gen && return_stmt); + assert(gen->return_label_index != 0 && "There are no return label index set"); - ref_entry_t *entry = ref_entry_new(); - *entry = (ref_entry_t){ .id = &variable_declaration->identifier, .stack_offset = gen->stack_offset }; - vector_push_back(gen->refs, entry); + gas_assembly_generator_compile(gen, return_stmt->argument); switch (gen->latest_evaluation.kind) { case EVALUATION_RESULT_AS_LITERAL_INTEGER: - fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, entry->stack_offset); + fprintf(gen->stream, " mov $%ld, %%rax\n", gen->latest_evaluation.data.literal_int); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); break; case EVALUATION_RESULT_ON_RAX: - fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", entry->stack_offset); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); break; case EVALUATION_RESULT_ON_STACK: - fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); - fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " mov %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); break; case EVALUATION_RESULT_VOID: - assert(false && "Unexpected void result for variable declaration"); - // FIXME: store the latest node at latest evaluation and print_ast + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + assert(false && "Unexpected"); + break; } + + fprintf(gen->stream, " jmp .L%ld\n", gen->return_label_index); } static void -gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen, - ast_variable_assignment_t *variable_assignment) +gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *gen, + ast_node_t *expression, + int stack_offset) { - ref_entry_t *entry = find_ref_entry(gen->refs, variable_assignment->identifier); - assert(entry && "reference not found"); + if (expression->result_type == TYPE_BOOL) { + if (expression->kind == AST_LITERAL) { + assert(expression->data.literal.kind == AST_LITERAL_BOOL); - gas_assembly_generator_compile(gen, variable_assignment->expression); + fprintf(gen->stream, " movq $%d, %d(%%rbp)\n", expression->data.literal.value.boolean, stack_offset); + return; + } + + uint64_t true_label_index = gen->counter_label_index++; + uint64_t false_label_index = gen->counter_label_index++; + uint64_t after_assign_label_index = gen->counter_label_index++; + + gas_assembly_generator_compile_condition(gen, expression, true_label_index, false, false_label_index, true); + + fprintf(gen->stream, ".L_%ld:\n", true_label_index); + fprintf(gen->stream, " movq $1, %d(%%rbp)\n", stack_offset); + fprintf(gen->stream, " jmp .L_%ld\n", after_assign_label_index); + + fprintf(gen->stream, ".L_%ld:\n", false_label_index); + fprintf(gen->stream, " movq $0, %d(%%rbp)\n", stack_offset); + + fprintf(gen->stream, ".L_%ld:\n", after_assign_label_index); + return; + } + + gas_assembly_generator_compile(gen, expression); switch (gen->latest_evaluation.kind) { case EVALUATION_RESULT_AS_LITERAL_INTEGER: - fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, entry->stack_offset); + fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, stack_offset); break; case EVALUATION_RESULT_ON_RAX: - fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", stack_offset); break; case EVALUATION_RESULT_ON_STACK: fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); - fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", stack_offset); break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for variable declaration"); // FIXME: store the latest node at latest evaluation and print_ast } - // Today we dont't support chain-assignment like this: - // - // a = b = 1; - // - // If we start supporting it, we need to change the latest evaluation kind to - // stack. - gen->latest_evaluation.kind = EVALUATION_RESULT_VOID; +} + +static void +gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *gen, + ast_variable_declaration_t *variable_declaration) +{ + gen->stack_offset -= 8; + ref_set_variable_reference_stack_offset(gen->refs, &variable_declaration->identifier, gen->stack_offset); + gas_assembly_generator_compile_variable_assign_value(gen, variable_declaration->value, gen->stack_offset); +} + +static void +gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen, + ast_variable_assignment_t *variable_assignment) +{ + int stack_offset = ref_find_variable_reference_stack_offset(gen->refs, variable_assignment->identifier); + + gas_assembly_generator_compile_variable_assign_value(gen, variable_assignment->expression, stack_offset); } static void gas_assembly_generator_compile_variable(gas_assembly_generator_t *gen, ast_variable_t *variable) { - ref_entry_t *entry = find_ref_entry(gen->refs, variable->identifier); - assert(entry && "reference not found"); - gas_assembly_generator_set_latest_evaluation_on_stack(gen, entry->stack_offset); + int stack_offset = ref_find_variable_reference_stack_offset(gen->refs, variable->identifier); + + gas_assembly_generator_set_latest_evaluation_on_stack(gen, stack_offset); } static void @@ -257,6 +424,9 @@ gas_assembly_generator_compile_literal(gas_assembly_generator_t *gen, ast_litera case AST_LITERAL_INTEGER: gas_assembly_generator_set_latest_evaluation_as_literal(gen, literal->value.integer); return; + case AST_LITERAL_BOOL: + gas_assembly_generator_set_latest_evaluation_as_literal_boolean(gen, literal->value.boolean); + return; default: assert(false && "no code generation strategy."); } @@ -295,6 +465,9 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar case EVALUATION_RESULT_ON_STACK: fprintf(gen->stream, " movq %d(%%rbp), %%rcx\n", right_evaluation.data.stack_offset); break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov $%d, %%rcx\n", right_evaluation.data.literal_bool); + break; case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for binary operation"); // FIXME: store the latest node at latest evaluation and print_ast @@ -311,6 +484,9 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar break; case EVALUATION_RESULT_ON_RAX: break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov $%d, %%rax\n", left_evaluation.data.literal_bool); + break; case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for binary operation"); } @@ -336,5 +512,131 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar return; } + if (binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_GT || + binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL || + binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT || + binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) { + fprintf(gen->stream, " cmpq %%rcx, %%rax\n"); + return; + } + assert(false && "No strategy defined for binary operation"); } + +static char *jump_map[AST_BINOP_N] = { + [AST_BINOP_EQUAL] = "je", [AST_BINOP_NOT_EQUAL] = "jne", [AST_BINOP_LT] = "jl", + [AST_BINOP_LT_EQUAL] = "jle", [AST_BINOP_GT] = "jg", [AST_BINOP_GT_EQUAL] = "jge", +}; + +static char *inverse_jump_map[AST_BINOP_N] = { + [AST_BINOP_EQUAL] = "jne", [AST_BINOP_NOT_EQUAL] = "je", [AST_BINOP_LT] = "jge", + [AST_BINOP_LT_EQUAL] = "jg", [AST_BINOP_GT] = "jle", [AST_BINOP_GT_EQUAL] = "jl", +}; + +static void +gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, + ast_node_t *condition, + uint64_t true_label_index, + bool jump_when_true, + uint64_t false_label_index, + bool jump_when_false) +{ + assert(condition->kind == AST_BINARY_OPERATION || condition->kind == AST_LITERAL); + + if (condition->kind == AST_LITERAL) { + assert(condition->data.literal.kind == AST_LITERAL_BOOL); + + if (jump_when_false && !condition->data.literal.value.boolean) { + fprintf(gen->stream, " jmp .L_%ld\n", false_label_index); + return; + } + + if (jump_when_true && condition->data.literal.value.boolean) { + fprintf(gen->stream, " jmp .L_%ld\n", true_label_index); + } + + return; + } + + if (condition->data.binary_operation.kind == AST_BINOP_OR) { + uint64_t or_false_branch_label_index = gen->counter_label_index++; + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.left, true_label_index, true, or_false_branch_label_index, false); + + fprintf(gen->stream, ".L_%ld:\n", or_false_branch_label_index); + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.right, true_label_index, false, false_label_index, true); + return; + } + + if (condition->data.binary_operation.kind == AST_BINOP_AND) { + uint64_t and_true_branch_label_index = gen->counter_label_index++; + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.left, and_true_branch_label_index, false, false_label_index, true); + + fprintf(gen->stream, ".L_%ld:\n", and_true_branch_label_index); + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.right, true_label_index, false, false_label_index, true); + return; + } + + gas_assembly_generator_compile(gen, condition); + + if (jump_when_false) { + char *jumper = inverse_jump_map[condition->data.binary_operation.kind]; + + assert(jumper != NULL); + + fprintf(gen->stream, " %s .L_%ld\n", jumper, false_label_index); + return; + } + + if (jump_when_true) { + char *jumper = jump_map[condition->data.binary_operation.kind]; + + assert(jumper != NULL); + + fprintf(gen->stream, " %s .L_%ld\n", jumper, true_label_index); + } +} + +static void +gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stmt_t *if_stmt) +{ + switch (if_stmt->condition->kind) { + case AST_BINARY_OPERATION: + case AST_LITERAL: { + uint64_t true_label_index = gen->counter_label_index++; + uint64_t false_label_index = gen->counter_label_index++; + + gas_assembly_generator_compile_condition( + gen, if_stmt->condition, true_label_index, false, false_label_index, true); + + fprintf(gen->stream, ".L_%ld:\n", true_label_index); + gas_assembly_generator_compile(gen, if_stmt->body); + + fprintf(gen->stream, ".L_%ld:\n", false_label_index); + break; + } + case AST_VARIABLE: { + uint64_t false_label_index = gen->counter_label_index++; + int stack_offset = + ref_find_variable_reference_stack_offset(gen->refs, if_stmt->condition->data.variable.identifier); + + fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", stack_offset); + fprintf(gen->stream, " cmpq $1, %%rax\n"); + fprintf(gen->stream, " jne .L_%ld\n", false_label_index); + + gas_assembly_generator_compile(gen, if_stmt->body); + + fprintf(gen->stream, ".L_%ld:\n", false_label_index); + break; + } + default: + assert(false); + } +} diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index 92972f9..26aa87b 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -26,12 +26,14 @@ typedef enum evaluation_result_kind_t EVALUATION_RESULT_VOID, EVALUATION_RESULT_ON_RAX, EVALUATION_RESULT_ON_STACK, - EVALUATION_RESULT_AS_LITERAL_INTEGER + EVALUATION_RESULT_AS_LITERAL_INTEGER, + EVALUATION_RESULT_AS_LITERAL_BOOL } evaluation_result_kind_t; typedef union evaluation_result_data_t { int64_t literal_int; + bool literal_bool; int stack_offset; } evaluation_result_data_t; @@ -46,6 +48,8 @@ typedef struct gas_assembly_generator_t FILE *stream; vector_t *refs; int stack_offset; + uint64_t counter_label_index; + uint64_t return_label_index; evaluation_result_t latest_evaluation; } gas_assembly_generator_t; @@ -55,4 +59,10 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream); void gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast); +void +gas_assembly_generator_compile_ns(gas_assembly_generator_t *gen, ast_namespace_t *ns); + +void +gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *func); + #endif /* GAS_ASSEMBLY_GENERATOR_H */ diff --git a/src/lexer.c b/src/lexer.c index a6c8ae2..c00c944 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -165,6 +165,12 @@ lexer_next_token(lexer_t *lexer, token_t *token) return; } + if (lexer_current_char(lexer) == ',') { + lexer_define_literal_token_props(lexer, token, TOKEN_COMMA); + lexer_drop_char(lexer); + return; + } + if (lexer_current_char(lexer) == ';') { lexer_define_literal_token_props(lexer, token, TOKEN_SEMICOLON); lexer_drop_char(lexer); @@ -343,6 +349,14 @@ lexer_load_file_contents(lexer_t *lexer) } void +lexer_step_back_to(lexer_t *lexer, token_t *token) +{ + lexer->cur = token->bol + token->col; + lexer->row = token->row; + lexer->bol = token->bol; +} + +void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level) { uint32_t cur = lexer->cur; @@ -412,6 +426,8 @@ token_kind_to_str(token_kind_t kind) return ")"; case TOKEN_COLON: return ":"; + case TOKEN_COMMA: + return ","; case TOKEN_SEMICOLON: return ";"; case TOKEN_OCURLY: diff --git a/src/lexer.h b/src/lexer.h index 0e36ada..c544a15 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -32,6 +32,7 @@ typedef enum // Literal Tokens TOKEN_OPAREN, TOKEN_CPAREN, + TOKEN_COMMA, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_OCURLY, @@ -114,6 +115,9 @@ void lexer_peek_next_token(lexer_t *lexer, token_t *token); void +lexer_step_back_to(lexer_t *lexer, token_t *token); + +void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level); char * @@ -27,11 +27,11 @@ #include "string_view.h" static void -generate_gas_x86_64_linux(ast_node_t *func) +generate_gas_x86_64_linux(ast_node_t *ns) { gas_assembly_generator_t gen; gas_assembly_generator_init(&gen, stdout); - gas_assembly_generator_compile(&gen, func); + gas_assembly_generator_compile_linux_main(&gen, ns); } static void @@ -89,20 +89,20 @@ main(int argc, char **argv) parser_t parser; parser_init(&parser, &lexer, scope); - ast_node_t *func = parser_parse_function_declaration(&parser); + ast_node_t *ns = parser_parse_ns(&parser); - if (func == NULL) { + if (ns == NULL) { parser_print_errors(&parser); return EXIT_FAILURE; } if (should_dump_ast) { - pretty_print_ast(func); + pretty_print_ast(ns); } else { - generate_gas_x86_64_linux(func); + generate_gas_x86_64_linux(ns); } scope_destroy(scope); - ast_node_destroy(func); + ast_node_destroy(ns); return EXIT_SUCCESS; } diff --git a/src/parser.c b/src/parser.c index 9167f1e..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include <assert.h> +#include <errno.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -33,6 +34,15 @@ parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expres static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type); +static bool +is_next_token_cparen(parser_t *parser); + +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -40,10 +50,28 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + +static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { parser_error_t *error = &parser->errors[parser->errors_len++]; @@ -156,7 +184,10 @@ ast_node_t * parser_parse_expression(parser_t *parser); static ast_node_t * -parser_parse_comparison(parser_t *parser); +parser_parse_comparison1(parser_t *parser); + +static ast_node_t * +parser_parse_comparison2(parser_t *parser); static ast_node_t * parser_parse_arithmetic1(parser_t *parser); @@ -167,10 +198,14 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * - * <expression> ::= <comparison> - * <comparison> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)* + * <expression> ::= <comparison1> + * <comparison1> ::= <comparison2> (('>' | '>=' | '=' | ...) comparison2)* + * <comparison2> ::= <arithmetic1> (('>' | '>=' | '=' | ...) arithmetic1)* * <arithmetic1> ::= <arithmetic2> (('+' | '-') arithmetic2)* * <arithmetic2> ::= <factor> (('*' | '/') factor)* * <factor> ::= <integer> | '(' <expression> ')' @@ -179,11 +214,42 @@ parser_parse_factor(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser) { - return parser_parse_comparison(parser); + return parser_parse_comparison1(parser); +} + +static ast_node_t * +parser_parse_comparison1(parser_t *parser) +{ + ast_node_t *node = parser_parse_comparison2(parser); + + if (node == NULL) { + return NULL; + } + + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + lexer_drop_next_token(parser->lexer); + + ast_node_t *left = node; + ast_node_t *right = parser_parse_comparison2(parser); + + if (right == NULL) { + ast_node_destroy(node); + return NULL; + } + + node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); + + lexer_peek_next_token(parser->lexer, &token); + } + + return node; } static ast_node_t * -parser_parse_comparison(parser_t *parser) +parser_parse_comparison2(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic1(parser); @@ -195,8 +261,7 @@ parser_parse_comparison(parser_t *parser) lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL || - token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL || - token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; @@ -304,21 +369,8 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - // TODO: Check node kind, today accepts only variables - ast_node_t *var_node = scope_get(parser->scope, token.value); - - if (var_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } - - return ast_node_new_variable(&var_node->data.variable_declaration.identifier, - var_node->data.variable_declaration.type); - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); default: { parser_error_t error; error.token = token; @@ -329,6 +381,79 @@ parser_parse_factor(parser_t *parser) } } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } + + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} + +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); + + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); + + parser_to_be_reevaluated_push(parser, node, *token); + + parser_drop_until_close_paren(parser); + return node; + } + + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } + + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } + + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; + } + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); +} + static ast_node_t * parser_parse_return_stmt(parser_t *parser, type_t result_type) { @@ -404,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -537,6 +667,38 @@ is_next_statement_return(parser_t *parser) } static bool +is_next_function_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_FN; +} + +static bool +is_next_token_cparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CPAREN; +} + +static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + +static bool +is_next_token_eof(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_EOF; +} + +static bool is_block_end(parser_t *parser) { token_t token; @@ -660,10 +822,48 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type) return ast_node_new_block(body); } -static bool -parser_parse_function_arguments(parser_t *parser) +static vector_t * +parser_parse_function_parameters(parser_t *parser) { - return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN); + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *vector = vector_new(); + + while (!is_next_token_cparen(parser)) { + token_t token_name; + type_t type; + + if (!expected_token(&token_name, parser, TOKEN_NAME)) { + ast_node_destroy_vector(vector); + return NULL; + } + if (!drop_expected_token(parser, TOKEN_COLON)) { + ast_node_destroy_vector(vector); + return NULL; + } + + if (!parser_parse_type(parser, &type)) { + ast_node_destroy_vector(vector); + return NULL; + } + + ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type); + + scope_push(parser->scope, ¶meter->data.function_parameter.identifier, parameter); + + vector_push_back(vector, parameter); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(vector); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return vector; } ast_node_t * @@ -679,7 +879,9 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - if (!parser_parse_function_arguments(parser)) { + vector_t *parameters = parser_parse_function_parameters(parser); + + if (parameters == NULL) { return NULL; } @@ -704,11 +906,86 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, body); + ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body); // TODO: When implementing function calls the scope must be pushed before the // body to be parsed, otherwise recursion not gonna work. - scope_push(parser->scope, &node->data.function.identifier, node); + scope_push(parser->scope, &node->data.function.prototype.identifier, node); return node; } + +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + +ast_node_t * +parser_parse_ns(parser_t *parser) +{ + vector_t *nodes = vector_new(); + + while (!is_next_token_eof(parser)) { + if (is_next_function_declaration(parser)) { + ast_node_t *node = parser_parse_function_declaration(parser); + + if (node == NULL) { + ast_node_destroy_vector(nodes); + return NULL; + } + + vector_push_back(nodes, node); + continue; + } + + parser_error_t error; + + lexer_peek_next_token(parser->lexer, &error.token); + sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value)); + parser->errors[parser->errors_len++] = error; + + ast_node_destroy_vector(nodes); + return NULL; + } + + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + + return ast_node_new_namespace(nodes); +} diff --git a/src/parser.h b/src/parser.h index ef5dff5..cfeb1f0 100644 --- a/src/parser.h +++ b/src/parser.h @@ -28,10 +28,17 @@ typedef struct parser_error_t char message[256]; } parser_error_t; +typedef struct reevaluation_node_t +{ + ast_node_t *node; + token_t token; +} reevaluation_node_t; + typedef struct parser_t { lexer_t *lexer; scope_t *scope; + vector_t *to_be_reevaluated; int errors_len; // FIXME: replace with vector parser_error_t errors[1]; @@ -46,4 +53,7 @@ parser_parse_function_declaration(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser); +ast_node_t * +parser_parse_ns(parser_t *parser); + #endif /* PARSER_H */ diff --git a/test/integration_test.c b/test/integration_test.c index 4178b42..8866f83 100644 --- a/test/integration_test.c +++ b/test/integration_test.c @@ -43,6 +43,9 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture) assert_exit_status("../examples/main.pipa", 69); assert_exit_status("../examples/arithmetics.pipa", 13); assert_exit_status("../examples/variables.pipa", 28); + assert_exit_status("../examples/if.pipa", 42); + assert_exit_status("../examples/function_call.pipa", 42); + assert_exit_status("../examples/fibbonacci.pipa", 233); return MUNIT_OK; } diff --git a/test/parser_test.c b/test/parser_test.c index 43eb27b..acc238f 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -48,9 +48,9 @@ assert_parser_error(char *src, char *error_msg) make_lexer_from_static_src(&lexer, src); parser_init(&parser, &lexer, scope); - ast_node_t *ast_function = parser_parse_function_declaration(&parser); + ast_node_t *ast_ns = parser_parse_ns(&parser); - assert_false(ast_function != NULL); + assert_false(ast_ns != NULL); assert_int(1, ==, parser.errors_len); assert_string_equal(error_msg, parser.errors[0].message); @@ -64,17 +64,15 @@ test_parse_function(const MunitParameter params[], void *user_data_or_fixture) lexer_t lexer; scope_t *scope = scope_new(); - make_lexer_from_static_src(&lexer, "fn main(): i32 { \nreturn 42;\n }"); + make_lexer_from_static_src(&lexer, "fn add(): i32 { return 2; } fn main(): i32 { \nreturn 42;\n }"); parser_init(&parser, &lexer, scope); - ast_node_t *ast_function = parser_parse_function_declaration(&parser); + ast_node_t *ast_ns = parser_parse_ns(&parser); - assert_not_null(ast_function); + assert_not_null(ast_ns); - char actual[5]; + ast_node_t *ast_function = ast_node_ns_get_function_node_by_name(ast_ns, "main"); - string_view_to_str(&ast_function->data.function.identifier.name, actual); - assert_string_equal("main", actual); - assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); + assert_not_null(ast_function); ast_node_t *ast_return = vector_at(ast_function->data.function.body->data.block.body, 0); @@ -106,7 +104,7 @@ test_parse_variable_definition(const MunitParameter params[], void *user_data_or char actual[5]; - string_view_to_str(&ast_function->data.function.identifier.name, actual); + string_view_to_str(&ast_function->data.function.prototype.identifier.name, actual); assert_string_equal("main", actual); assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); @@ -143,7 +141,7 @@ test_parse_boolean(const MunitParameter params[], void *user_data_or_fixture) assert_true(ast_function != NULL); - assert_string_view_equal("my_bool_fn", ast_function->data.function.identifier.name); + assert_string_view_equal("my_bool_fn", ast_node_function_declaration_name(ast_function)); assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); ast_node_t *ast_variable = vector_at(ast_function->data.function.body->data.block.body, 0); @@ -320,35 +318,35 @@ test_parse_all_boolean_expression(const MunitParameter params[], void *user_data ast_node_t *exp1 = ast_expression; assert_int(TYPE_BOOL, ==, exp1->result_type); - assert_int(AST_BINOP_NOT_EQUAL, ==, exp1->data.binary_operation.kind); + assert_int(AST_BINOP_AND, ==, exp1->data.binary_operation.kind); ast_node_t *exp2 = exp1->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp2->result_type); - assert_int(AST_BINOP_EQUAL, ==, exp2->data.binary_operation.kind); + assert_int(AST_BINOP_OR, ==, exp2->data.binary_operation.kind); - ast_node_t *exp3 = exp2->data.binary_operation.left; + ast_node_t *exp3 = exp1->data.binary_operation.right; assert_int(TYPE_BOOL, ==, exp3->result_type); - assert_int(AST_BINOP_LT_EQUAL, ==, exp3->data.binary_operation.kind); + assert_int(AST_BINOP_NOT_EQUAL, ==, exp3->data.binary_operation.kind); ast_node_t *exp4 = exp3->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp4->result_type); - assert_int(AST_BINOP_GT_EQUAL, ==, exp4->data.binary_operation.kind); + assert_int(AST_BINOP_EQUAL, ==, exp4->data.binary_operation.kind); ast_node_t *exp5 = exp4->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp5->result_type); - assert_int(AST_BINOP_LT, ==, exp5->data.binary_operation.kind); + assert_int(AST_BINOP_LT_EQUAL, ==, exp5->data.binary_operation.kind); ast_node_t *exp6 = exp5->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp6->result_type); - assert_int(AST_BINOP_GT, ==, exp6->data.binary_operation.kind); + assert_int(AST_BINOP_GT_EQUAL, ==, exp6->data.binary_operation.kind); ast_node_t *exp7 = exp6->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp7->result_type); - assert_int(AST_BINOP_AND, ==, exp7->data.binary_operation.kind); + assert_int(AST_BINOP_LT, ==, exp7->data.binary_operation.kind); ast_node_t *exp8 = exp7->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp8->result_type); - assert_int(AST_BINOP_OR, ==, exp8->data.binary_operation.kind); + assert_int(AST_BINOP_GT, ==, exp8->data.binary_operation.kind); ast_node_destroy(ast_expression); scope_destroy(scope); @@ -396,10 +394,10 @@ assert_string_view_equal(char *expected, string_view_t actual) static MunitResult test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or_fixture) { - assert_parser_error("main(): i32 { return 42; }", "expected 'fn' but got 'TOKEN_NAME'"); + assert_parser_error("main(): i32 { return 42; }", "Unexpected token 'main'"); assert_parser_error("fn (): i32 { return 42; }", "expected 'TOKEN_NAME' but got '('"); assert_parser_error("fn main): i32 { return 42; }", "expected '(' but got ')'"); - assert_parser_error("fn main(: i32 { return 42; }", "expected ')' but got ':'"); + assert_parser_error("fn main(: i32 { return 42; }", "expected 'TOKEN_NAME' but got ':'"); assert_parser_error("fn main() i32 { return 42; }", "expected ':' but got 'TOKEN_NAME'"); assert_parser_error("fn main(): { return 42; }", "expected 'TOKEN_NAME' but got '{'"); assert_parser_error("fn main(): i32 return 42; }", "expected '{' but got 'return'"); @@ -429,8 +427,11 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or "incompatible types: expected 'bool', actual 'i32'."); assert_parser_error("fn main(): i32 { if true { return true; } \nreturn 42;\n }", "incompatible types: expected 'i32', actual 'bool'."); - // FIXME: once function calls are implemented, this error should inform that - // neither a variable or function call was found. + assert_parser_error("fn main(): i32 { return fn_404(); }", "identifier 'fn_404' not defined"); + assert_parser_error("fn my_bool_fn(): bool { return true; } fn main(): i32 { return my_bool_fn(); }", + "incompatible types: expected 'i32', actual 'bool'."); + assert_parser_error("fn main(): i32 { return my_bool_fn(); } fn my_bool_fn(): bool { return true; }", + "incompatible types: expected 'i32', actual 'bool'."); assert_parser_error("fn main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'"); return MUNIT_OK; |