From 5de2e1fd9f426348127a66d2c51c300cb90cc3a4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 00:20:00 -0300 Subject: gas: Generate code for if statement If statements are now working, the only exception is for the comparators || and && that will be addressed in a further commit. Checks tested: fn main(): i32 { let n: i32 = 11; if (n == 11) { if n != 12 { if n < 12 { if n <= 11 { if n > 10 { if n >= 11 { return 42; } } } } } } return n; } To compile the && and || a precedence issue must be addressed: they must have the highest precedence, witch is not working now: 1 == 2 || 3 != 2 The or should be the higher level of the tree in the example above. Signed-off-by: Carlos Maniero --- examples/if.pipa | 16 +++++++++-- src/ast_pretty_printer.c | 15 ++++++---- src/gas_assembly_generator.c | 67 ++++++++++++++++++++++++++++++++++++++++++++ src/gas_assembly_generator.h | 1 + test/integration_test.c | 1 + 5 files changed, 91 insertions(+), 9 deletions(-) diff --git a/examples/if.pipa b/examples/if.pipa index 2c5578d..b7cadcb 100644 --- a/examples/if.pipa +++ b/examples/if.pipa @@ -1,8 +1,18 @@ fn main(): i32 { - let n: i32 = 42; + let n: i32 = 11; - if n > 42 { - return 42; + if (n == 11) { + if n != 12 { + if n < 12 { + if n <= 11 { + if n > 10 { + if n >= 11 { + return 42; + } + } + } + } + } } return n; diff --git a/src/ast_pretty_printer.c b/src/ast_pretty_printer.c index e2d007a..b14bf34 100644 --- a/src/ast_pretty_printer.c +++ b/src/ast_pretty_printer.c @@ -22,10 +22,12 @@ #include #include -const char operation_kinds[AST_BINOP_N] = { [AST_BINOP_ADITION] = '+', - [AST_BINOP_SUBTRACTION] = '-', - [AST_BINOP_MULTIPLICATION] = '*', - [AST_BINOP_DIVISION] = '/' }; +const char *operation_kinds[AST_BINOP_N] = { + [AST_BINOP_ADITION] = "+", [AST_BINOP_SUBTRACTION] = "-", [AST_BINOP_MULTIPLICATION] = "*", + [AST_BINOP_DIVISION] = "/", [AST_BINOP_EQUAL] = "==", [AST_BINOP_NOT_EQUAL] = "!=", + [AST_BINOP_AND] = "&&", [AST_BINOP_OR] = "||", [AST_BINOP_GT] = ">", + [AST_BINOP_LT] = "<", [AST_BINOP_LT_EQUAL] = "<=", [AST_BINOP_GT_EQUAL] = ">=", +}; void ast_pretty_printer_init(ast_pretty_printer_t *printer, FILE *stream); @@ -76,6 +78,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) ast_pretty_printer_rm_indentation(printer); } + ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_printf(printer, "body:\n"); ast_pretty_printer_add_indentation(printer); { @@ -90,7 +93,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) } case AST_BINARY_OPERATION: { ast_binary_operation_t binop = ast->data.binary_operation; - ast_pretty_printer_printf(printer, "BinaryOperation operation='%c'\n", operation_kinds[binop.kind]); + ast_pretty_printer_printf(printer, "BinaryOperation operation='%s'\n", operation_kinds[binop.kind]); ast_pretty_printer_add_indentation(printer); { @@ -130,6 +133,7 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) ast_pretty_printer_add_indentation(printer); { + ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_print_ast(printer, ast->data.function.body); ast_pretty_printer_rm_indentation(printer); } @@ -139,7 +143,6 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) break; } case AST_BLOCK: { - ast_pretty_printer_set_lst_children(printer); ast_pretty_printer_printf(printer, "Block\n"); ast_pretty_printer_add_indentation(printer); { diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 4b898ed..f63e35b 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -44,6 +44,9 @@ gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen static void gas_assembly_generator_compile_block(gas_assembly_generator_t *gen, ast_block_t *block); +static void +gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stmt_t *if_stmt); + static void gas_assembly_generator_compile_variable(gas_assembly_generator_t *gen, ast_variable_t *variable); @@ -106,6 +109,7 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream) gen->stream = stream; gen->refs = vector_new(); gen->stack_offset = 0; + gen->label_counter = 0; } void @@ -139,6 +143,8 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) gas_assembly_generator_compile_block(gen, &ast->data.block); break; case AST_IF_STMT: + gas_assembly_generator_compile_if_stmt(gen, &ast->data.if_stmt); + break; case AST_UNKOWN_NODE: assert(false && "unreachable"); } @@ -336,5 +342,66 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar return; } + if (binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_GT || + binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL || + binary_operation->kind == AST_BINOP_AND || binary_operation->kind == AST_BINOP_OR || + binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT || + binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) { + fprintf(gen->stream, " cmp %%rcx, %%rax\n"); + return; + } + assert(false && "No strategy defined for binary operation"); } + +static void +gas_assembly_generator_compile_binop_if(gas_assembly_generator_t *gen, ast_node_t *binop_node) +{ + assert(binop_node->kind == AST_BINARY_OPERATION); + + char *jump_map[AST_BINOP_N] = { + [AST_BINOP_EQUAL] = "jne", [AST_BINOP_NOT_EQUAL] = "je", [AST_BINOP_LT] = "jge", + [AST_BINOP_LT_EQUAL] = "jg", [AST_BINOP_GT] = "jle", [AST_BINOP_GT_EQUAL] = "jl", + }; + + char *jumper = jump_map[binop_node->data.binary_operation.kind]; + + assert(jumper != NULL); + + gas_assembly_generator_compile(gen, binop_node); + + fprintf(gen->stream, " %s .IF%ld\n", jumper, gen->label_counter); +} + +static void +gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stmt_t *if_stmt) +{ + switch (if_stmt->condition->kind) { + case AST_LITERAL: { + int if_counter = gen->label_counter++; + + assert(if_stmt->condition->data.literal.kind == AST_LITERAL_BOOL); + + if (!if_stmt->condition->data.literal.value.boolean) { + fprintf(gen->stream, " jmp .IF%d\n", if_counter); + } + + gas_assembly_generator_compile(gen, if_stmt->body); + + fprintf(gen->stream, ".IF%d:\n", if_counter); + break; + } + case AST_BINARY_OPERATION: { + gas_assembly_generator_compile_binop_if(gen, if_stmt->condition); + + int if_counter = gen->label_counter++; + + gas_assembly_generator_compile(gen, if_stmt->body); + + fprintf(gen->stream, ".IF%d:\n", if_counter); + break; + } + default: + assert(false); + } +} diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index 92972f9..3d9a43c 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -46,6 +46,7 @@ typedef struct gas_assembly_generator_t FILE *stream; vector_t *refs; int stack_offset; + uint64_t label_counter; evaluation_result_t latest_evaluation; } gas_assembly_generator_t; diff --git a/test/integration_test.c b/test/integration_test.c index 4178b42..985e574 100644 --- a/test/integration_test.c +++ b/test/integration_test.c @@ -43,6 +43,7 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture) assert_exit_status("../examples/main.pipa", 69); assert_exit_status("../examples/arithmetics.pipa", 13); assert_exit_status("../examples/variables.pipa", 28); + assert_exit_status("../examples/if.pipa", 42); return MUNIT_OK; } -- cgit v1.2.3 From 3065f54e3a122dd3d8c2deffdec72ec48ea4f165 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 00:20:01 -0300 Subject: parser: Fixes boolean binary operation precedence The comparators && and || should have precedence over others comparators (> < >= <= == !=). Signed-off-by: Carlos Maniero --- src/parser.c | 48 +++++++++++++++++++++++++++++++++++++++++------- test/parser_test.c | 18 +++++++++--------- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/parser.c b/src/parser.c index 9167f1e..e3f157f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -156,7 +156,10 @@ ast_node_t * parser_parse_expression(parser_t *parser); static ast_node_t * -parser_parse_comparison(parser_t *parser); +parser_parse_comparison1(parser_t *parser); + +static ast_node_t * +parser_parse_comparison2(parser_t *parser); static ast_node_t * parser_parse_arithmetic1(parser_t *parser); @@ -169,8 +172,9 @@ parser_parse_factor(parser_t *parser); /** * - * ::= - * ::= (('>' | '>=' | '=' | ...) arithmetic1)* + * ::= + * ::= (('>' | '>=' | '=' | ...) comparison2)* + * ::= (('>' | '>=' | '=' | ...) arithmetic1)* * ::= (('+' | '-') arithmetic2)* * ::= (('*' | '/') factor)* * ::= | '(' ')' @@ -179,11 +183,42 @@ parser_parse_factor(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser) { - return parser_parse_comparison(parser); + return parser_parse_comparison1(parser); +} + +static ast_node_t * +parser_parse_comparison1(parser_t *parser) +{ + ast_node_t *node = parser_parse_comparison2(parser); + + if (node == NULL) { + return NULL; + } + + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + lexer_drop_next_token(parser->lexer); + + ast_node_t *left = node; + ast_node_t *right = parser_parse_comparison2(parser); + + if (right == NULL) { + ast_node_destroy(node); + return NULL; + } + + node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); + + lexer_peek_next_token(parser->lexer, &token); + } + + return node; } static ast_node_t * -parser_parse_comparison(parser_t *parser) +parser_parse_comparison2(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic1(parser); @@ -195,8 +230,7 @@ parser_parse_comparison(parser_t *parser) lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL || - token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL || - token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; diff --git a/test/parser_test.c b/test/parser_test.c index 43eb27b..8565dc8 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -320,35 +320,35 @@ test_parse_all_boolean_expression(const MunitParameter params[], void *user_data ast_node_t *exp1 = ast_expression; assert_int(TYPE_BOOL, ==, exp1->result_type); - assert_int(AST_BINOP_NOT_EQUAL, ==, exp1->data.binary_operation.kind); + assert_int(AST_BINOP_AND, ==, exp1->data.binary_operation.kind); ast_node_t *exp2 = exp1->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp2->result_type); - assert_int(AST_BINOP_EQUAL, ==, exp2->data.binary_operation.kind); + assert_int(AST_BINOP_OR, ==, exp2->data.binary_operation.kind); - ast_node_t *exp3 = exp2->data.binary_operation.left; + ast_node_t *exp3 = exp1->data.binary_operation.right; assert_int(TYPE_BOOL, ==, exp3->result_type); - assert_int(AST_BINOP_LT_EQUAL, ==, exp3->data.binary_operation.kind); + assert_int(AST_BINOP_NOT_EQUAL, ==, exp3->data.binary_operation.kind); ast_node_t *exp4 = exp3->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp4->result_type); - assert_int(AST_BINOP_GT_EQUAL, ==, exp4->data.binary_operation.kind); + assert_int(AST_BINOP_EQUAL, ==, exp4->data.binary_operation.kind); ast_node_t *exp5 = exp4->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp5->result_type); - assert_int(AST_BINOP_LT, ==, exp5->data.binary_operation.kind); + assert_int(AST_BINOP_LT_EQUAL, ==, exp5->data.binary_operation.kind); ast_node_t *exp6 = exp5->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp6->result_type); - assert_int(AST_BINOP_GT, ==, exp6->data.binary_operation.kind); + assert_int(AST_BINOP_GT_EQUAL, ==, exp6->data.binary_operation.kind); ast_node_t *exp7 = exp6->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp7->result_type); - assert_int(AST_BINOP_AND, ==, exp7->data.binary_operation.kind); + assert_int(AST_BINOP_LT, ==, exp7->data.binary_operation.kind); ast_node_t *exp8 = exp7->data.binary_operation.left; assert_int(TYPE_BOOL, ==, exp8->result_type); - assert_int(AST_BINOP_OR, ==, exp8->data.binary_operation.kind); + assert_int(AST_BINOP_GT, ==, exp8->data.binary_operation.kind); ast_node_destroy(ast_expression); scope_destroy(scope); -- cgit v1.2.3 From 88630ebbea03e85119cf9795320a83cb846bdd20 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 01:43:40 -0300 Subject: gas: Implement && and || for if statements Now if statements are complete! The function %gas_assembly_generator_compile_condition% is generic and will be used for any other flow-control statment. The only requirement to it work is having two labels: One to jump when the condition is true, and another one when the condition is false. Signed-off-by: Carlos Maniero --- examples/if.pipa | 50 +++++++++++++++++++- src/gas_assembly_generator.c | 109 +++++++++++++++++++++++++++++++------------ src/gas_assembly_generator.h | 2 +- 3 files changed, 130 insertions(+), 31 deletions(-) diff --git a/examples/if.pipa b/examples/if.pipa index b7cadcb..d65fc24 100644 --- a/examples/if.pipa +++ b/examples/if.pipa @@ -7,12 +7,60 @@ fn main(): i32 { if n <= 11 { if n > 10 { if n >= 11 { - return 42; + if n >= 11 || n < 11 { + if n > 11 || n < 11 { + return 199; + } + if n > 11 || n <= 11 { + if false || false { + return 199; + } + + if true || false { + if false || true { + if true && false { + return 197; + } + if false && true { + return 196; + } + if true && true { + if n >= 11 && n < 11 || n == 11 { + if false || n == 11 && n > 11 { + return 195; + } + if false || n >= 11 && n <= 11 { + if false || false && false && false || true { + if n + 1 > 11 && n - 1 < 11 { + return 42; + } + return 13; + } + return 12; + } + return 11; + } + return 10; + } + return 9; + } + return 8; + } + return 7; + } + return 6; + } + return 5; } + return 4; } + return 3; } + return 2; } + return 1; } + return 0; } return n; diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index f63e35b..c85d7a7 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -109,7 +109,7 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream) gen->stream = stream; gen->refs = vector_new(); gen->stack_offset = 0; - gen->label_counter = 0; + gen->cur_label_index = 0; } void @@ -344,7 +344,6 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar if (binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL || - binary_operation->kind == AST_BINOP_AND || binary_operation->kind == AST_BINOP_OR || binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) { fprintf(gen->stream, " cmp %%rcx, %%rax\n"); @@ -354,51 +353,103 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar assert(false && "No strategy defined for binary operation"); } +static char *jump_map[AST_BINOP_N] = { + [AST_BINOP_EQUAL] = "je", [AST_BINOP_NOT_EQUAL] = "jne", [AST_BINOP_LT] = "jl", + [AST_BINOP_LT_EQUAL] = "jle", [AST_BINOP_GT] = "jg", [AST_BINOP_GT_EQUAL] = "jge", +}; + +static char *inverse_jump_map[AST_BINOP_N] = { + [AST_BINOP_EQUAL] = "jne", [AST_BINOP_NOT_EQUAL] = "je", [AST_BINOP_LT] = "jge", + [AST_BINOP_LT_EQUAL] = "jg", [AST_BINOP_GT] = "jle", [AST_BINOP_GT_EQUAL] = "jl", +}; + static void -gas_assembly_generator_compile_binop_if(gas_assembly_generator_t *gen, ast_node_t *binop_node) +gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, + ast_node_t *condition, + uint64_t true_label_index, + bool jump_when_true, + uint64_t false_label_index, + bool jump_when_false) { - assert(binop_node->kind == AST_BINARY_OPERATION); + assert(condition->kind == AST_BINARY_OPERATION || condition->kind == AST_LITERAL); + + if (condition->kind == AST_LITERAL) { + assert(condition->data.literal.kind == AST_LITERAL_BOOL); + + if (jump_when_false && !condition->data.literal.value.boolean) { + fprintf(gen->stream, " jmp .L_%ld\n", false_label_index); + return; + } + + if (jump_when_true && condition->data.literal.value.boolean) { + fprintf(gen->stream, " jmp .L_%ld\n", true_label_index); + } + + return; + } + + if (condition->data.binary_operation.kind == AST_BINOP_OR) { + uint64_t or_false_branch_label_index = gen->cur_label_index++; + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.left, true_label_index, true, or_false_branch_label_index, false); + + fprintf(gen->stream, ".L_%ld:\n", or_false_branch_label_index); + + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.right, true_label_index, false, false_label_index, true); + return; + } + + if (condition->data.binary_operation.kind == AST_BINOP_AND) { + uint64_t and_true_branch_label_index = gen->cur_label_index++; - char *jump_map[AST_BINOP_N] = { - [AST_BINOP_EQUAL] = "jne", [AST_BINOP_NOT_EQUAL] = "je", [AST_BINOP_LT] = "jge", - [AST_BINOP_LT_EQUAL] = "jg", [AST_BINOP_GT] = "jle", [AST_BINOP_GT_EQUAL] = "jl", - }; + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.left, and_true_branch_label_index, false, false_label_index, true); - char *jumper = jump_map[binop_node->data.binary_operation.kind]; + fprintf(gen->stream, ".L_%ld:\n", and_true_branch_label_index); - assert(jumper != NULL); + gas_assembly_generator_compile_condition( + gen, condition->data.binary_operation.right, true_label_index, false, false_label_index, true); + return; + } + + gas_assembly_generator_compile(gen, condition); + + if (jump_when_false) { + char *jumper = inverse_jump_map[condition->data.binary_operation.kind]; + + assert(jumper != NULL); + + fprintf(gen->stream, " %s .L_%ld\n", jumper, false_label_index); + return; + } + + if (jump_when_true) { + char *jumper = jump_map[condition->data.binary_operation.kind]; - gas_assembly_generator_compile(gen, binop_node); + assert(jumper != NULL); - fprintf(gen->stream, " %s .IF%ld\n", jumper, gen->label_counter); + fprintf(gen->stream, " %s .L_%ld\n", jumper, true_label_index); + } } static void gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stmt_t *if_stmt) { switch (if_stmt->condition->kind) { + case AST_BINARY_OPERATION: case AST_LITERAL: { - int if_counter = gen->label_counter++; - - assert(if_stmt->condition->data.literal.kind == AST_LITERAL_BOOL); - - if (!if_stmt->condition->data.literal.value.boolean) { - fprintf(gen->stream, " jmp .IF%d\n", if_counter); - } - - gas_assembly_generator_compile(gen, if_stmt->body); - - fprintf(gen->stream, ".IF%d:\n", if_counter); - break; - } - case AST_BINARY_OPERATION: { - gas_assembly_generator_compile_binop_if(gen, if_stmt->condition); + uint64_t true_label_index = gen->cur_label_index++; + uint64_t false_label_index = gen->cur_label_index++; - int if_counter = gen->label_counter++; + gas_assembly_generator_compile_condition( + gen, if_stmt->condition, true_label_index, false, false_label_index, true); + fprintf(gen->stream, ".L_%ld:\n", true_label_index); gas_assembly_generator_compile(gen, if_stmt->body); - fprintf(gen->stream, ".IF%d:\n", if_counter); + fprintf(gen->stream, ".L_%ld:\n", false_label_index); break; } default: diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index 3d9a43c..2e195fb 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -46,7 +46,7 @@ typedef struct gas_assembly_generator_t FILE *stream; vector_t *refs; int stack_offset; - uint64_t label_counter; + uint64_t cur_label_index; evaluation_result_t latest_evaluation; } gas_assembly_generator_t; -- cgit v1.2.3 From a96a0cac034e16dcd7455a3b2fabf2b5b3e716bd Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 12:20:04 -0300 Subject: gas: Compile boolean variable assignment When the assignment value is a literal, it just assigns zero or one to the variable stack's location. If the value is an expression, it compiles the expression and assign zeros and ones based on expression result. --- examples/if.pipa | 18 +++++- src/gas_assembly_generator.c | 133 +++++++++++++++++++++++++++++++------------ src/gas_assembly_generator.h | 4 +- 3 files changed, 117 insertions(+), 38 deletions(-) diff --git a/examples/if.pipa b/examples/if.pipa index d65fc24..79f5724 100644 --- a/examples/if.pipa +++ b/examples/if.pipa @@ -1,6 +1,18 @@ fn main(): i32 { let n: i32 = 11; + let falsy: bool = false || n == 11 && n > 11; + let truth: bool = n + 1 > 11 && n - 1 < 11; + let literal_falsy: bool = false; + + if falsy { + return 201; + } + + if literal_falsy { + return 200; + } + if (n == 11) { if n != 12 { if n < 12 { @@ -15,7 +27,6 @@ fn main(): i32 { if false || false { return 199; } - if true || false { if false || true { if true && false { @@ -32,7 +43,10 @@ fn main(): i32 { if false || n >= 11 && n <= 11 { if false || false && false && false || true { if n + 1 > 11 && n - 1 < 11 { - return 42; + if truth { + return 42; + } + return 14; } return 13; } diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index c85d7a7..44467db 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -57,6 +57,13 @@ gas_assembly_generator_set_latest_evaluation_as_literal(gas_assembly_generator_t (evaluation_result_t){ .kind = EVALUATION_RESULT_AS_LITERAL_INTEGER, .data = { .literal_int = value } }; } +static void +gas_assembly_generator_set_latest_evaluation_as_literal_boolean(gas_assembly_generator_t *gen, bool value) +{ + gen->latest_evaluation = + (evaluation_result_t){ .kind = EVALUATION_RESULT_AS_LITERAL_BOOL, .data = { .literal_bool = value } }; +} + static void gas_assembly_generator_set_latest_evaluation_to_rax(gas_assembly_generator_t *gen) { @@ -70,6 +77,14 @@ gas_assembly_generator_set_latest_evaluation_on_stack(gas_assembly_generator_t * (evaluation_result_t){ .kind = EVALUATION_RESULT_ON_STACK, .data = { .stack_offset = stack_offset } }; } +static void +gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, + ast_node_t *condition, + uint64_t true_label_index, + bool jump_when_true, + uint64_t false_label_index, + bool jump_when_false); + typedef struct ref_entry_t { ast_identifier_t *id; @@ -178,25 +193,59 @@ gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_re gas_assembly_generator_compile(gen, return_stmt->argument); - if (gen->latest_evaluation.kind == EVALUATION_RESULT_AS_LITERAL_INTEGER) { - fprintf(gen->stream, " mov $%ld, %%rdi\n", gen->latest_evaluation.data.literal_int); - } else { - fprintf(gen->stream, " mov %%rax, %%rdi\n"); + switch (gen->latest_evaluation.kind) { + case EVALUATION_RESULT_AS_LITERAL_INTEGER: + fprintf(gen->stream, " mov $%ld, %%rdi\n", gen->latest_evaluation.data.literal_int); + break; + case EVALUATION_RESULT_ON_RAX: + fprintf(gen->stream, " mov %%rax, %%rdi\n"); + break; + case EVALUATION_RESULT_ON_STACK: + fprintf(gen->stream, " mov %d(%%rbp), %%rdi\n", gen->latest_evaluation.data.stack_offset); + break; + case EVALUATION_RESULT_VOID: + fprintf(gen->stream, " xor %%rdi, %%rdi\n"); + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + assert(false && "Unexpected"); + break; } + fprintf(gen->stream, " mov $60, %%rax\n"); fprintf(gen->stream, " syscall\n"); } static void -gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *gen, - ast_variable_declaration_t *variable_declaration) +gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *gen, + ast_node_t *expression, + ref_entry_t *entry) { - gen->stack_offset -= 8; - gas_assembly_generator_compile(gen, variable_declaration->value); + if (expression->result_type == TYPE_BOOL) { + if (expression->kind == AST_LITERAL) { + assert(expression->data.literal.kind == AST_LITERAL_BOOL); - ref_entry_t *entry = ref_entry_new(); - *entry = (ref_entry_t){ .id = &variable_declaration->identifier, .stack_offset = gen->stack_offset }; - vector_push_back(gen->refs, entry); + fprintf(gen->stream, " movq $%d, %d(%%rbp)\n", expression->data.literal.value.boolean, entry->stack_offset); + return; + } + + uint64_t true_label_index = gen->cur_label_index++; + uint64_t false_label_index = gen->cur_label_index++; + uint64_t after_assign_label_index = gen->cur_label_index++; + + gas_assembly_generator_compile_condition(gen, expression, true_label_index, false, false_label_index, true); + + fprintf(gen->stream, ".L_%ld:\n", true_label_index); + fprintf(gen->stream, " movq $1, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " jmp .L_%ld\n", after_assign_label_index); + + fprintf(gen->stream, ".L_%ld:\n", false_label_index); + fprintf(gen->stream, " movq $0, %d(%%rbp)\n", entry->stack_offset); + + fprintf(gen->stream, ".L_%ld:\n", after_assign_label_index); + return; + } + + gas_assembly_generator_compile(gen, expression); switch (gen->latest_evaluation.kind) { case EVALUATION_RESULT_AS_LITERAL_INTEGER: @@ -209,12 +258,26 @@ gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *ge fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", entry->stack_offset); break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for variable declaration"); // FIXME: store the latest node at latest evaluation and print_ast } } +static void +gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *gen, + ast_variable_declaration_t *variable_declaration) +{ + gen->stack_offset -= 8; + + ref_entry_t *entry = ref_entry_new(); + *entry = (ref_entry_t){ .id = &variable_declaration->identifier, .stack_offset = gen->stack_offset }; + vector_push_back(gen->refs, entry); + + gas_assembly_generator_compile_variable_assign_value(gen, variable_declaration->value, entry); +} + static void gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen, ast_variable_assignment_t *variable_assignment) @@ -222,30 +285,7 @@ gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen ref_entry_t *entry = find_ref_entry(gen->refs, variable_assignment->identifier); assert(entry && "reference not found"); - gas_assembly_generator_compile(gen, variable_assignment->expression); - - switch (gen->latest_evaluation.kind) { - case EVALUATION_RESULT_AS_LITERAL_INTEGER: - fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, entry->stack_offset); - break; - case EVALUATION_RESULT_ON_RAX: - fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", entry->stack_offset); - break; - case EVALUATION_RESULT_ON_STACK: - fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); - fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", entry->stack_offset); - break; - case EVALUATION_RESULT_VOID: - assert(false && "Unexpected void result for variable declaration"); - // FIXME: store the latest node at latest evaluation and print_ast - } - // Today we dont't support chain-assignment like this: - // - // a = b = 1; - // - // If we start supporting it, we need to change the latest evaluation kind to - // stack. - gen->latest_evaluation.kind = EVALUATION_RESULT_VOID; + gas_assembly_generator_compile_variable_assign_value(gen, variable_assignment->expression, entry); } static void @@ -263,6 +303,9 @@ gas_assembly_generator_compile_literal(gas_assembly_generator_t *gen, ast_litera case AST_LITERAL_INTEGER: gas_assembly_generator_set_latest_evaluation_as_literal(gen, literal->value.integer); return; + case AST_LITERAL_BOOL: + gas_assembly_generator_set_latest_evaluation_as_literal_boolean(gen, literal->value.boolean); + return; default: assert(false && "no code generation strategy."); } @@ -301,6 +344,9 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar case EVALUATION_RESULT_ON_STACK: fprintf(gen->stream, " movq %d(%%rbp), %%rcx\n", right_evaluation.data.stack_offset); break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov $%d, %%rcx\n", right_evaluation.data.literal_bool); + break; case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for binary operation"); // FIXME: store the latest node at latest evaluation and print_ast @@ -317,6 +363,9 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar break; case EVALUATION_RESULT_ON_RAX: break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov $%d, %%rax\n", left_evaluation.data.literal_bool); + break; case EVALUATION_RESULT_VOID: assert(false && "Unexpected void result for binary operation"); } @@ -452,6 +501,20 @@ gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stm fprintf(gen->stream, ".L_%ld:\n", false_label_index); break; } + case AST_VARIABLE: { + uint64_t false_label_index = gen->cur_label_index++; + ref_entry_t *entry = find_ref_entry(gen->refs, if_stmt->condition->data.variable.identifier); + + fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", entry->stack_offset); + fprintf(gen->stream, " cmpq $1, %%rax\n"); + fprintf(gen->stream, " jne .L_%ld\n", false_label_index); + + gas_assembly_generator_compile(gen, if_stmt->body); + + fprintf(gen->stream, ".L_%ld:\n", false_label_index); + assert(entry); + break; + } default: assert(false); } diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index 2e195fb..e400d2c 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -26,12 +26,14 @@ typedef enum evaluation_result_kind_t EVALUATION_RESULT_VOID, EVALUATION_RESULT_ON_RAX, EVALUATION_RESULT_ON_STACK, - EVALUATION_RESULT_AS_LITERAL_INTEGER + EVALUATION_RESULT_AS_LITERAL_INTEGER, + EVALUATION_RESULT_AS_LITERAL_BOOL } evaluation_result_kind_t; typedef union evaluation_result_data_t { int64_t literal_int; + bool literal_bool; int stack_offset; } evaluation_result_data_t; -- cgit v1.2.3 From f1b1c191975581d5ef13bd04f33b6965235d00b4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 13:53:53 -0300 Subject: gas: Abstract refs with helper functions This commit abstract the complexity of an entry so then, the users of the ref map does not need to understand how is it implemented. Signed-off-by: Carlos Maniero --- src/gas_assembly_generator.c | 60 +++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 44467db..97ba9ce 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -105,16 +105,29 @@ ref_entry_new(void) return entry; } -static ref_entry_t * -find_ref_entry(vector_t *refs, ast_identifier_t *identifier) +static int +ref_find_variable_reference_stack_offset(vector_t *refs, ast_identifier_t *identifier) { for (int i = refs->size - 1; i >= 0; --i) { ref_entry_t *entry = (ref_entry_t *)vector_at(refs, i); if (entry->id == identifier) { - return entry; + return entry->stack_offset; } } - return NULL; + assert(false); +} + +static void +ref_set_variable_reference_stack_offset(vector_t *refs, ast_identifier_t *identifier, int stack_offset) +{ + ref_entry_t *ref_entry = ref_entry_new(); + + *ref_entry = (ref_entry_t){ + .id = identifier, + .stack_offset = stack_offset, + }; + + vector_push_back(refs, ref_entry); } void @@ -218,13 +231,13 @@ gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_re static void gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *gen, ast_node_t *expression, - ref_entry_t *entry) + int stack_offset) { if (expression->result_type == TYPE_BOOL) { if (expression->kind == AST_LITERAL) { assert(expression->data.literal.kind == AST_LITERAL_BOOL); - fprintf(gen->stream, " movq $%d, %d(%%rbp)\n", expression->data.literal.value.boolean, entry->stack_offset); + fprintf(gen->stream, " movq $%d, %d(%%rbp)\n", expression->data.literal.value.boolean, stack_offset); return; } @@ -235,11 +248,11 @@ gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *g gas_assembly_generator_compile_condition(gen, expression, true_label_index, false, false_label_index, true); fprintf(gen->stream, ".L_%ld:\n", true_label_index); - fprintf(gen->stream, " movq $1, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " movq $1, %d(%%rbp)\n", stack_offset); fprintf(gen->stream, " jmp .L_%ld\n", after_assign_label_index); fprintf(gen->stream, ".L_%ld:\n", false_label_index); - fprintf(gen->stream, " movq $0, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " movq $0, %d(%%rbp)\n", stack_offset); fprintf(gen->stream, ".L_%ld:\n", after_assign_label_index); return; @@ -249,14 +262,14 @@ gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *g switch (gen->latest_evaluation.kind) { case EVALUATION_RESULT_AS_LITERAL_INTEGER: - fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, entry->stack_offset); + fprintf(gen->stream, " movq $%ld, %d(%%rbp)\n", gen->latest_evaluation.data.literal_int, stack_offset); break; case EVALUATION_RESULT_ON_RAX: - fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " mov %%rax, %d(%%rbp)\n", stack_offset); break; case EVALUATION_RESULT_ON_STACK: fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); - fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", entry->stack_offset); + fprintf(gen->stream, " movq %%rax, %d(%%rbp)\n", stack_offset); break; case EVALUATION_RESULT_AS_LITERAL_BOOL: case EVALUATION_RESULT_VOID: @@ -270,30 +283,25 @@ gas_assembly_generator_compile_variable_declaration(gas_assembly_generator_t *ge ast_variable_declaration_t *variable_declaration) { gen->stack_offset -= 8; - - ref_entry_t *entry = ref_entry_new(); - *entry = (ref_entry_t){ .id = &variable_declaration->identifier, .stack_offset = gen->stack_offset }; - vector_push_back(gen->refs, entry); - - gas_assembly_generator_compile_variable_assign_value(gen, variable_declaration->value, entry); + ref_set_variable_reference_stack_offset(gen->refs, &variable_declaration->identifier, gen->stack_offset); + gas_assembly_generator_compile_variable_assign_value(gen, variable_declaration->value, gen->stack_offset); } static void gas_assembly_generator_compile_variable_assignment(gas_assembly_generator_t *gen, ast_variable_assignment_t *variable_assignment) { - ref_entry_t *entry = find_ref_entry(gen->refs, variable_assignment->identifier); - assert(entry && "reference not found"); + int stack_offset = ref_find_variable_reference_stack_offset(gen->refs, variable_assignment->identifier); - gas_assembly_generator_compile_variable_assign_value(gen, variable_assignment->expression, entry); + gas_assembly_generator_compile_variable_assign_value(gen, variable_assignment->expression, stack_offset); } static void gas_assembly_generator_compile_variable(gas_assembly_generator_t *gen, ast_variable_t *variable) { - ref_entry_t *entry = find_ref_entry(gen->refs, variable->identifier); - assert(entry && "reference not found"); - gas_assembly_generator_set_latest_evaluation_on_stack(gen, entry->stack_offset); + int stack_offset = ref_find_variable_reference_stack_offset(gen->refs, variable->identifier); + + gas_assembly_generator_set_latest_evaluation_on_stack(gen, stack_offset); } static void @@ -503,16 +511,16 @@ gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stm } case AST_VARIABLE: { uint64_t false_label_index = gen->cur_label_index++; - ref_entry_t *entry = find_ref_entry(gen->refs, if_stmt->condition->data.variable.identifier); + int stack_offset = + ref_find_variable_reference_stack_offset(gen->refs, if_stmt->condition->data.variable.identifier); - fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", entry->stack_offset); + fprintf(gen->stream, " movq %d(%%rbp), %%rax\n", stack_offset); fprintf(gen->stream, " cmpq $1, %%rax\n"); fprintf(gen->stream, " jne .L_%ld\n", false_label_index); gas_assembly_generator_compile(gen, if_stmt->body); fprintf(gen->stream, ".L_%ld:\n", false_label_index); - assert(entry); break; } default: -- cgit v1.2.3 From 2cf0bcb409f3a1fd298b664103d57c945c6349f5 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 14:26:40 -0300 Subject: gas: Removes Linux entrypoint logic from function declaration Before this commit, function declarations were making syscalls to interrupt the flow. This is a fair approach considering all our examples just have a main function. But won't work if the namespace has more then a single function. The return now always sets the return value on RAX and jumps to the function return label. The function return label, will restore RBP and jump back to callee's next instruction using RET instruction. Function labels are kept, which means that a function called my_fn will have the assembly label my_fn, so then, they can have INTEROP with other languages. Signed-off-by: Carlos Maniero --- src/gas_assembly_generator.c | 83 +++++++++++++++++++++++++++++++++----------- src/gas_assembly_generator.h | 6 +++- src/main.c | 2 +- 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 97ba9ce..413fbb0 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -137,7 +137,8 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream) gen->stream = stream; gen->refs = vector_new(); gen->stack_offset = 0; - gen->cur_label_index = 0; + gen->counter_label_index = 1; + gen->return_label_index = 0; } void @@ -183,28 +184,40 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct { assert(func); - if (!string_view_eq(func->identifier.name, string_view_from_str("main"))) { - fprintf(stderr, "[ERROR]: no main function has been defined!\n"); - exit(EXIT_FAILURE); - } + uint64_t previous_index = gen->return_label_index; + uint64_t return_label_index = gen->return_label_index = gen->counter_label_index++; - fprintf(gen->stream, ".global _start\n"); - fprintf(gen->stream, ".text\n"); - fprintf(gen->stream, "_start:\n"); + fprintf(gen->stream, SVFMT ":\n", SVARG(&func->identifier.name)); fprintf(gen->stream, " push %%rbp\n"); fprintf(gen->stream, " mov %%rsp, %%rbp\n"); gas_assembly_generator_compile(gen, func->body); + fprintf(gen->stream, ".L%ld:\n", return_label_index); fprintf(gen->stream, " pop %%rbp\n"); + fprintf(gen->stream, " ret\n"); + + gen->return_label_index = previous_index; } -static void -gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_return_stmt_t *return_stmt) +void +gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *func) { - assert(gen && return_stmt); + assert(func); + assert(func->kind == AST_FUNCTION_DECLARATION); - gas_assembly_generator_compile(gen, return_stmt->argument); + if (!string_view_eq(func->data.function.identifier.name, string_view_from_str("main"))) { + fprintf(stderr, "[ERROR]: no main function has been defined!\n"); + exit(EXIT_FAILURE); + } + + fprintf(gen->stream, ".global _start\n"); + fprintf(gen->stream, ".text\n"); + + gas_assembly_generator_compile(gen, func); + + fprintf(gen->stream, "_start:\n"); + fprintf(gen->stream, " call main\n"); switch (gen->latest_evaluation.kind) { case EVALUATION_RESULT_AS_LITERAL_INTEGER: @@ -228,6 +241,36 @@ gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_re fprintf(gen->stream, " syscall\n"); } +static void +gas_assembly_generator_compile_return_stmt(gas_assembly_generator_t *gen, ast_return_stmt_t *return_stmt) +{ + assert(gen && return_stmt); + assert(gen->return_label_index != 0 && "There are no return label index set"); + + gas_assembly_generator_compile(gen, return_stmt->argument); + + switch (gen->latest_evaluation.kind) { + case EVALUATION_RESULT_AS_LITERAL_INTEGER: + fprintf(gen->stream, " mov $%ld, %%rax\n", gen->latest_evaluation.data.literal_int); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); + break; + case EVALUATION_RESULT_ON_RAX: + gas_assembly_generator_set_latest_evaluation_to_rax(gen); + break; + case EVALUATION_RESULT_ON_STACK: + fprintf(gen->stream, " mov %d(%%rbp), %%rax\n", gen->latest_evaluation.data.stack_offset); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); + break; + case EVALUATION_RESULT_VOID: + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + assert(false && "Unexpected"); + break; + } + + fprintf(gen->stream, " jmp .L%ld\n", gen->return_label_index); +} + static void gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *gen, ast_node_t *expression, @@ -241,9 +284,9 @@ gas_assembly_generator_compile_variable_assign_value(gas_assembly_generator_t *g return; } - uint64_t true_label_index = gen->cur_label_index++; - uint64_t false_label_index = gen->cur_label_index++; - uint64_t after_assign_label_index = gen->cur_label_index++; + uint64_t true_label_index = gen->counter_label_index++; + uint64_t false_label_index = gen->counter_label_index++; + uint64_t after_assign_label_index = gen->counter_label_index++; gas_assembly_generator_compile_condition(gen, expression, true_label_index, false, false_label_index, true); @@ -446,7 +489,7 @@ gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, } if (condition->data.binary_operation.kind == AST_BINOP_OR) { - uint64_t or_false_branch_label_index = gen->cur_label_index++; + uint64_t or_false_branch_label_index = gen->counter_label_index++; gas_assembly_generator_compile_condition( gen, condition->data.binary_operation.left, true_label_index, true, or_false_branch_label_index, false); @@ -459,7 +502,7 @@ gas_assembly_generator_compile_condition(gas_assembly_generator_t *gen, } if (condition->data.binary_operation.kind == AST_BINOP_AND) { - uint64_t and_true_branch_label_index = gen->cur_label_index++; + uint64_t and_true_branch_label_index = gen->counter_label_index++; gas_assembly_generator_compile_condition( gen, condition->data.binary_operation.left, and_true_branch_label_index, false, false_label_index, true); @@ -497,8 +540,8 @@ gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stm switch (if_stmt->condition->kind) { case AST_BINARY_OPERATION: case AST_LITERAL: { - uint64_t true_label_index = gen->cur_label_index++; - uint64_t false_label_index = gen->cur_label_index++; + uint64_t true_label_index = gen->counter_label_index++; + uint64_t false_label_index = gen->counter_label_index++; gas_assembly_generator_compile_condition( gen, if_stmt->condition, true_label_index, false, false_label_index, true); @@ -510,7 +553,7 @@ gas_assembly_generator_compile_if_stmt(gas_assembly_generator_t *gen, ast_if_stm break; } case AST_VARIABLE: { - uint64_t false_label_index = gen->cur_label_index++; + uint64_t false_label_index = gen->counter_label_index++; int stack_offset = ref_find_variable_reference_stack_offset(gen->refs, if_stmt->condition->data.variable.identifier); diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index e400d2c..18ffb7f 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -48,7 +48,8 @@ typedef struct gas_assembly_generator_t FILE *stream; vector_t *refs; int stack_offset; - uint64_t cur_label_index; + uint64_t counter_label_index; + uint64_t return_label_index; evaluation_result_t latest_evaluation; } gas_assembly_generator_t; @@ -58,4 +59,7 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream); void gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast); +void +gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *func); + #endif /* GAS_ASSEMBLY_GENERATOR_H */ diff --git a/src/main.c b/src/main.c index 4f68256..780499c 100644 --- a/src/main.c +++ b/src/main.c @@ -31,7 +31,7 @@ generate_gas_x86_64_linux(ast_node_t *func) { gas_assembly_generator_t gen; gas_assembly_generator_init(&gen, stdout); - gas_assembly_generator_compile(&gen, func); + gas_assembly_generator_compile_linux_main(&gen, func); } static void -- cgit v1.2.3 From 75639fbf01bd6ae1212521b6cf822025eb8b598d Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 16:07:39 -0300 Subject: namespaces: Add a namespace structure that represents a file We have been always parsing a single function. Since we want to have multiple functions in a near future, this patch introduces an namespace that represents an entire file. To ensure a function is defined inside a namespace, a helper function was created. Today our ast_node structure is highly exposed, and this is something that Johnny and I have been discussed. So then, this is a first step to try to protected the code generation from our ast tree. Signed-off-by: Carlos Maniero --- src/ast.c | 42 ++++++++++++++++++++++++++++++++++++++++++ src/ast.h | 16 ++++++++++++++++ src/ast_pretty_printer.c | 13 +++++++++++++ src/gas_assembly_generator.c | 20 ++++++++++++++------ src/gas_assembly_generator.h | 3 +++ src/main.c | 14 +++++++------- src/parser.c | 37 +++++++++++++++++++++++++++++++++++++ src/parser.h | 3 +++ test/parser_test.c | 12 +++++------- 9 files changed, 140 insertions(+), 20 deletions(-) diff --git a/src/ast.c b/src/ast.c index 5790c05..47d658d 100644 --- a/src/ast.c +++ b/src/ast.c @@ -44,6 +44,9 @@ void ast_node_destroy(ast_node_t *node) { switch (node->kind) { + case AST_NAMESPACE: + ast_node_destroy_vector(node->data.ns.nodes); + break; case AST_FUNCTION_DECLARATION: ast_node_destroy(node->data.function.body); break; @@ -107,6 +110,24 @@ ast_node_new_function_declaration(string_view_t function_name, type_t return_typ return node; } +ast_node_t * +ast_node_new_namespace(vector_t *nodes) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_NAMESPACE, + .result_type = TYPE_VOID, + .data = { + .ns = { + .nodes = nodes, + } + }, + }; + + return node; +} + ast_node_t * ast_node_new_block(vector_t *body) { @@ -254,6 +275,27 @@ ast_node_new_variable(ast_identifier_t *identifier, type_t result_type) return node; } +ast_node_t * +ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name) +{ + assert(ns->kind == AST_NAMESPACE); + + for (size_t i = 0; i < ns->data.ns.nodes->size; i++) { + ast_node_t *node = vector_at(ns->data.ns.nodes, i); + + if (node->kind == AST_FUNCTION_DECLARATION && string_view_eq(node->data.function.identifier.name, name)) { + return node; + } + } + return NULL; +} + +ast_node_t * +ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name) +{ + return ast_node_ns_get_function_node_by_sv(ns, string_view_from_str(function_name)); +} + char * ast_type_to_str(type_t type) { diff --git a/src/ast.h b/src/ast.h index b2c565c..7d2d2f1 100644 --- a/src/ast.h +++ b/src/ast.h @@ -29,6 +29,11 @@ typedef enum typedef struct ast_node_t ast_node_t; +typedef struct ast_namespace_t +{ + vector_t *nodes; +} ast_namespace_t; + typedef struct ast_block_t { vector_t *body; @@ -119,6 +124,7 @@ typedef struct ast_if_stmt_t typedef enum { + AST_NAMESPACE, AST_BINARY_OPERATION, AST_BLOCK, AST_FUNCTION_DECLARATION, @@ -133,6 +139,7 @@ typedef enum typedef union { + ast_namespace_t ns; ast_binary_operation_t binary_operation; ast_function_declaration_t function; ast_literal_t literal; @@ -163,6 +170,9 @@ ast_node_destroy_vector(vector_t *vector); void ast_node_destroy(ast_node_t *node); +ast_node_t * +ast_node_new_namespace(vector_t *nodes); + ast_node_t * ast_node_new_binary_operation(ast_binary_operation_kind_t kind, ast_node_t *left, @@ -196,4 +206,10 @@ ast_node_new_variable_assignment(ast_identifier_t *identifier, ast_node_t *expre ast_node_t * ast_node_new_if_stmt(ast_node_t *condition, ast_node_t *body); +ast_node_t * +ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name); + +ast_node_t * +ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name); + #endif /* AST_H */ diff --git a/src/ast_pretty_printer.c b/src/ast_pretty_printer.c index b14bf34..f5fcf70 100644 --- a/src/ast_pretty_printer.c +++ b/src/ast_pretty_printer.c @@ -65,6 +65,19 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) assert(ast); switch (ast->kind) { + case AST_NAMESPACE: + ast_pretty_printer_printf(printer, "Namespace\n"); + ast_pretty_printer_add_indentation(printer); + { + for (size_t i = 0; i < ast->data.ns.nodes->size; ++i) { + if (i + 1 >= ast->data.ns.nodes->size) { + ast_pretty_printer_set_lst_children(printer); + } + ast_pretty_printer_print_ast(printer, vector_at(ast->data.ns.nodes, i)); + } + ast_pretty_printer_rm_indentation(printer); + } + break; case AST_IF_STMT: { ast_if_stmt_t if_stmt = ast->data.if_stmt; ast_pretty_printer_printf(printer, "IfStmt\n"); diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 413fbb0..4326b27 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -147,6 +147,9 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) gen->latest_evaluation.kind = EVALUATION_RESULT_VOID; switch (ast->kind) { + case AST_NAMESPACE: + gas_assembly_generator_compile_ns(gen, &ast->data.ns); + break; case AST_BINARY_OPERATION: gas_assembly_generator_binary_operation(gen, &ast->data.binary_operation); break; @@ -179,6 +182,14 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) } } +void +gas_assembly_generator_compile_ns(gas_assembly_generator_t *gen, ast_namespace_t *ns) +{ + for (size_t i = 0; i < ns->nodes->size; i++) { + gas_assembly_generator_compile(gen, vector_at(ns->nodes, i)); + } +} + static void gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_function_declaration_t *func) { @@ -201,12 +212,9 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct } void -gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *func) +gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *ns) { - assert(func); - assert(func->kind == AST_FUNCTION_DECLARATION); - - if (!string_view_eq(func->data.function.identifier.name, string_view_from_str("main"))) { + if (ast_node_ns_get_function_node_by_name(ns, "main") == NULL) { fprintf(stderr, "[ERROR]: no main function has been defined!\n"); exit(EXIT_FAILURE); } @@ -214,7 +222,7 @@ gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_nod fprintf(gen->stream, ".global _start\n"); fprintf(gen->stream, ".text\n"); - gas_assembly_generator_compile(gen, func); + gas_assembly_generator_compile(gen, ns); fprintf(gen->stream, "_start:\n"); fprintf(gen->stream, " call main\n"); diff --git a/src/gas_assembly_generator.h b/src/gas_assembly_generator.h index 18ffb7f..26aa87b 100644 --- a/src/gas_assembly_generator.h +++ b/src/gas_assembly_generator.h @@ -59,6 +59,9 @@ gas_assembly_generator_init(gas_assembly_generator_t *gen, FILE *stream); void gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast); +void +gas_assembly_generator_compile_ns(gas_assembly_generator_t *gen, ast_namespace_t *ns); + void gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *func); diff --git a/src/main.c b/src/main.c index 780499c..ca6ed9b 100644 --- a/src/main.c +++ b/src/main.c @@ -27,11 +27,11 @@ #include "string_view.h" static void -generate_gas_x86_64_linux(ast_node_t *func) +generate_gas_x86_64_linux(ast_node_t *ns) { gas_assembly_generator_t gen; gas_assembly_generator_init(&gen, stdout); - gas_assembly_generator_compile_linux_main(&gen, func); + gas_assembly_generator_compile_linux_main(&gen, ns); } static void @@ -89,20 +89,20 @@ main(int argc, char **argv) parser_t parser; parser_init(&parser, &lexer, scope); - ast_node_t *func = parser_parse_function_declaration(&parser); + ast_node_t *ns = parser_parse_ns(&parser); - if (func == NULL) { + if (ns == NULL) { parser_print_errors(&parser); return EXIT_FAILURE; } if (should_dump_ast) { - pretty_print_ast(func); + pretty_print_ast(ns); } else { - generate_gas_x86_64_linux(func); + generate_gas_x86_64_linux(ns); } scope_destroy(scope); - ast_node_destroy(func); + ast_node_destroy(ns); return EXIT_SUCCESS; } diff --git a/src/parser.c b/src/parser.c index e3f157f..b94087f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -570,6 +570,22 @@ is_next_statement_return(parser_t *parser) return token.kind == TOKEN_KEYWORD_RETURN; } +static bool +is_next_function_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_FN; +} + +static bool +is_next_token_eof(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_EOF; +} + static bool is_block_end(parser_t *parser) { @@ -746,3 +762,24 @@ parser_parse_function_declaration(parser_t *parser) return node; } + +ast_node_t * +parser_parse_ns(parser_t *parser) +{ + vector_t *nodes = vector_new(); + + while (!is_next_token_eof(parser)) { + if (is_next_function_declaration(parser)) { + ast_node_t *node = parser_parse_function_declaration(parser); + + if (node == NULL) { + ast_node_destroy_vector(nodes); + return NULL; + } + + vector_push_back(nodes, node); + } + } + + return ast_node_new_namespace(nodes); +} diff --git a/src/parser.h b/src/parser.h index ef5dff5..7bebee7 100644 --- a/src/parser.h +++ b/src/parser.h @@ -46,4 +46,7 @@ parser_parse_function_declaration(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser); +ast_node_t * +parser_parse_ns(parser_t *parser); + #endif /* PARSER_H */ diff --git a/test/parser_test.c b/test/parser_test.c index 8565dc8..78e2e23 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -64,17 +64,15 @@ test_parse_function(const MunitParameter params[], void *user_data_or_fixture) lexer_t lexer; scope_t *scope = scope_new(); - make_lexer_from_static_src(&lexer, "fn main(): i32 { \nreturn 42;\n }"); + make_lexer_from_static_src(&lexer, "fn add(): i32 { return 2; } fn main(): i32 { \nreturn 42;\n }"); parser_init(&parser, &lexer, scope); - ast_node_t *ast_function = parser_parse_function_declaration(&parser); + ast_node_t *ast_ns = parser_parse_ns(&parser); - assert_not_null(ast_function); + assert_not_null(ast_ns); - char actual[5]; + ast_node_t *ast_function = ast_node_ns_get_function_node_by_name(ast_ns, "main"); - string_view_to_str(&ast_function->data.function.identifier.name, actual); - assert_string_equal("main", actual); - assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); + assert_not_null(ast_function); ast_node_t *ast_return = vector_at(ast_function->data.function.body->data.block.body, 0); -- cgit v1.2.3 From 3129b741064c2b4f2c6c2408bd42cc83f7341ea8 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 16:53:05 -0300 Subject: gas: Generate function call This is an initial commit that enables function calls. At this point only functions with no parameters is going to work. Signed-off-by: Carlos Maniero --- examples/main.pipa | 6 +++++- src/ast.c | 27 +++++++++++++++++++++++++++ src/ast.h | 16 ++++++++++++++++ src/ast_pretty_printer.c | 5 +++++ src/gas_assembly_generator.c | 13 +++++++++++++ src/parser.c | 22 +++++++++++++++++----- 6 files changed, 83 insertions(+), 6 deletions(-) diff --git a/examples/main.pipa b/examples/main.pipa index 2da2231..5ea0077 100644 --- a/examples/main.pipa +++ b/examples/main.pipa @@ -1,3 +1,7 @@ -fn main(): i32 { +fn give_me_the_number(): i32 { return 69; } + +fn main(): i32 { + return give_me_the_number(); +} diff --git a/src/ast.c b/src/ast.c index 47d658d..e9aa677 100644 --- a/src/ast.c +++ b/src/ast.c @@ -72,6 +72,7 @@ ast_node_destroy(ast_node_t *node) case AST_LITERAL: case AST_UNKOWN_NODE: case AST_VARIABLE: + case AST_FUNCTION_CALL: break; } free(node); @@ -275,6 +276,20 @@ ast_node_new_variable(ast_identifier_t *identifier, type_t result_type) return node; } +ast_node_t * +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_FUNCTION_CALL, + .result_type = result_type, + .data = { .function_call = { .identifier = identifier } }, + }; + + return node; +} + ast_node_t * ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name) { @@ -296,6 +311,18 @@ ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name) return ast_node_ns_get_function_node_by_sv(ns, string_view_from_str(function_name)); } +bool +ast_node_is_variable_declaration(ast_node_t *node) +{ + return node->kind == AST_VARIABLE_DECLARATION; +} + +bool +ast_node_is_function_declaration(ast_node_t *node) +{ + return node->kind == AST_FUNCTION_DECLARATION; +} + char * ast_type_to_str(type_t type) { diff --git a/src/ast.h b/src/ast.h index 7d2d2f1..315de1e 100644 --- a/src/ast.h +++ b/src/ast.h @@ -54,6 +54,11 @@ typedef struct ast_variable_t ast_identifier_t *identifier; } ast_variable_t; +typedef struct ast_function_call_t +{ + ast_identifier_t *identifier; +} ast_function_call_t; + typedef struct ast_function_declaration_t { ast_identifier_t identifier; @@ -128,6 +133,7 @@ typedef enum AST_BINARY_OPERATION, AST_BLOCK, AST_FUNCTION_DECLARATION, + AST_FUNCTION_CALL, AST_LITERAL, AST_RETURN_STMT, AST_IF_STMT, @@ -142,6 +148,7 @@ typedef union ast_namespace_t ns; ast_binary_operation_t binary_operation; ast_function_declaration_t function; + ast_function_call_t function_call; ast_literal_t literal; ast_block_t block; ast_if_stmt_t if_stmt; @@ -182,6 +189,9 @@ ast_node_new_binary_operation(ast_binary_operation_kind_t kind, ast_node_t * ast_node_new_function_declaration(string_view_t function_name, type_t return_type, ast_node_t *body); +ast_node_t * +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type); + ast_node_t * ast_node_new_return_stmt(ast_node_t *argument); @@ -212,4 +222,10 @@ ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name); ast_node_t * ast_node_ns_get_function_node_by_name(ast_node_t *ns, char *function_name); +bool +ast_node_is_variable_declaration(ast_node_t *node); + +bool +ast_node_is_function_declaration(ast_node_t *node); + #endif /* AST_H */ diff --git a/src/ast_pretty_printer.c b/src/ast_pretty_printer.c index f5fcf70..0d8b66c 100644 --- a/src/ast_pretty_printer.c +++ b/src/ast_pretty_printer.c @@ -155,6 +155,11 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) } break; } + case AST_FUNCTION_CALL: { + ast_function_call_t var = ast->data.function_call; + ast_pretty_printer_printf(printer, "FunctionCall name='" SVFMT "'\n", SVARG(&var.identifier->name)); + break; + } case AST_BLOCK: { ast_pretty_printer_printf(printer, "Block\n"); ast_pretty_printer_add_indentation(printer); diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 4326b27..2d6b993 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -28,6 +28,9 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar static void gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_function_declaration_t *func); +static void +gas_assembly_generator_compile_function_call(gas_assembly_generator_t *gen, ast_function_call_t *func_call); + static void gas_assembly_generator_compile_literal(gas_assembly_generator_t *gen, ast_literal_t *literal); @@ -156,6 +159,9 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) case AST_FUNCTION_DECLARATION: gas_assembly_generator_compile_function(gen, &ast->data.function); break; + case AST_FUNCTION_CALL: + gas_assembly_generator_compile_function_call(gen, &ast->data.function_call); + break; case AST_LITERAL: gas_assembly_generator_compile_literal(gen, &ast->data.literal); break; @@ -211,6 +217,13 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct gen->return_label_index = previous_index; } +static void +gas_assembly_generator_compile_function_call(gas_assembly_generator_t *gen, ast_function_call_t *func_call) +{ + fprintf(gen->stream, " call " SVFMT "\n", SVARG(&func_call->identifier->name)); + gas_assembly_generator_set_latest_evaluation_to_rax(gen); +} + void gas_assembly_generator_compile_linux_main(gas_assembly_generator_t *gen, ast_node_t *ns) { diff --git a/src/parser.c b/src/parser.c index b94087f..578514c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -339,10 +339,9 @@ parser_parse_factor(parser_t *parser) return expression; } case TOKEN_NAME: { - // TODO: Check node kind, today accepts only variables - ast_node_t *var_node = scope_get(parser->scope, token.value); + ast_node_t *referenced_node = scope_get(parser->scope, token.value); - if (var_node == NULL) { + if (referenced_node == NULL) { parser_error_t error; error.token = token; sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); @@ -350,8 +349,21 @@ parser_parse_factor(parser_t *parser) return NULL; } - return ast_node_new_variable(&var_node->data.variable_declaration.identifier, - var_node->data.variable_declaration.type); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } + + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + if (!drop_expected_token(parser, TOKEN_CPAREN)) { + return NULL; + } + + return ast_node_new_function_call(&referenced_node->data.function.identifier, + referenced_node->data.function.return_type); } default: { parser_error_t error; -- cgit v1.2.3 From 6f187a71cbe3aa4ebb32ba287c75562d96c7a3f4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 17:49:36 -0300 Subject: tests: Replace parse function with parse ns for error handling It was necessary to test function calls errors. Signed-off-by: Carlos Maniero --- src/parser.c | 10 ++++++++++ test/parser_test.c | 11 ++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/parser.c b/src/parser.c index 578514c..377ff73 100644 --- a/src/parser.c +++ b/src/parser.c @@ -790,7 +790,17 @@ parser_parse_ns(parser_t *parser) } vector_push_back(nodes, node); + continue; } + + parser_error_t error; + + lexer_peek_next_token(parser->lexer, &error.token); + sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value)); + parser->errors[parser->errors_len++] = error; + + ast_node_destroy_vector(nodes); + return NULL; } return ast_node_new_namespace(nodes); diff --git a/test/parser_test.c b/test/parser_test.c index 78e2e23..ebb917c 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -48,9 +48,9 @@ assert_parser_error(char *src, char *error_msg) make_lexer_from_static_src(&lexer, src); parser_init(&parser, &lexer, scope); - ast_node_t *ast_function = parser_parse_function_declaration(&parser); + ast_node_t *ast_ns = parser_parse_ns(&parser); - assert_false(ast_function != NULL); + assert_false(ast_ns != NULL); assert_int(1, ==, parser.errors_len); assert_string_equal(error_msg, parser.errors[0].message); @@ -394,7 +394,7 @@ assert_string_view_equal(char *expected, string_view_t actual) static MunitResult test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or_fixture) { - assert_parser_error("main(): i32 { return 42; }", "expected 'fn' but got 'TOKEN_NAME'"); + assert_parser_error("main(): i32 { return 42; }", "Unexpected token 'main'"); assert_parser_error("fn (): i32 { return 42; }", "expected 'TOKEN_NAME' but got '('"); assert_parser_error("fn main): i32 { return 42; }", "expected '(' but got ')'"); assert_parser_error("fn main(: i32 { return 42; }", "expected ')' but got ':'"); @@ -427,8 +427,9 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or "incompatible types: expected 'bool', actual 'i32'."); assert_parser_error("fn main(): i32 { if true { return true; } \nreturn 42;\n }", "incompatible types: expected 'i32', actual 'bool'."); - // FIXME: once function calls are implemented, this error should inform that - // neither a variable or function call was found. + assert_parser_error("fn main(): i32 { return fn_404(); }", "identifier 'fn_404' not defined"); + assert_parser_error("fn my_bool_fn(): bool { return true; } fn main(): i32 { return my_bool_fn(); }", + "incompatible types: expected 'i32', actual 'bool'."); assert_parser_error("fn main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'"); return MUNIT_OK; -- cgit v1.2.3 From 5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 22:24:14 -0300 Subject: gas: implement function calls For now function calls are following the C's calling convention, which means they are using the following registers to pass functions' arguments: rdi, rsi, rdx, rcx, r8, r9 If a function has more then 6 parameters, the compilation will fail. To enable function with more than 6 parameters we will need to save the extra arguments on stack. Naming: parameters: function parameters are the variables a function receives. arguments: Arguments are the values passed to a function when calling it. Calling mechanism: When a function is called, all the expressions passed as argument are evaluated, after the evaluation, the result is stored on the register that represents its argument position, the first argument will be stored on rdi, the second on rsi and so on. Receiving mechanism: When a function starts, the first thing it does is store all the registers onto the stack. So rdi will be stored on -8(rbp), rsi on -16(rbp) and so on. And, a ref_entry is created making the relationship parameter-stack_offset. Signed-off-by: Carlos Maniero --- examples/function_call.pipa | 11 +++++ examples/main.pipa | 6 +-- src/ast.c | 54 ++++++++++++++++++++++--- src/ast.h | 33 +++++++++++++-- src/ast_pretty_printer.c | 6 ++- src/gas_assembly_generator.c | 50 ++++++++++++++++++++++- src/lexer.c | 8 ++++ src/lexer.h | 1 + src/parser.c | 95 +++++++++++++++++++++++++++++++++++++++----- test/integration_test.c | 1 + test/parser_test.c | 6 +-- 11 files changed, 241 insertions(+), 30 deletions(-) create mode 100644 examples/function_call.pipa diff --git a/examples/function_call.pipa b/examples/function_call.pipa new file mode 100644 index 0000000..833c195 --- /dev/null +++ b/examples/function_call.pipa @@ -0,0 +1,11 @@ +fn add(n: i32, n2: i32): i32 { + return n + n2; +} + +fn id(n: i32): i32 { + return n; +} + +fn main(): i32 { + return id(add(40, 44)) / 2; +} diff --git a/examples/main.pipa b/examples/main.pipa index 5ea0077..2da2231 100644 --- a/examples/main.pipa +++ b/examples/main.pipa @@ -1,7 +1,3 @@ -fn give_me_the_number(): i32 { - return 69; -} - fn main(): i32 { - return give_me_the_number(); + return 69; } diff --git a/src/ast.c b/src/ast.c index e9aa677..f6f8f08 100644 --- a/src/ast.c +++ b/src/ast.c @@ -49,6 +49,7 @@ ast_node_destroy(ast_node_t *node) break; case AST_FUNCTION_DECLARATION: ast_node_destroy(node->data.function.body); + ast_node_destroy_vector(node->data.function.prototype.parameters); break; case AST_IF_STMT: ast_node_destroy(node->data.if_stmt.condition); @@ -69,6 +70,7 @@ ast_node_destroy(ast_node_t *node) break; case AST_VARIABLE_ASSIGNMENT: ast_node_destroy(node->data.variable_assignment.expression); + case AST_FUNCTION_PARAMETER: case AST_LITERAL: case AST_UNKOWN_NODE: case AST_VARIABLE: @@ -93,7 +95,30 @@ ast_node_new_return_stmt(ast_node_t *argument) } ast_node_t * -ast_node_new_function_declaration(string_view_t function_name, type_t return_type, ast_node_t *body) +ast_node_new_function_parameter(string_view_t name, type_t type) +{ + ast_node_t *node = ast_node_new(); + + *node = (ast_node_t){ + .kind = AST_FUNCTION_PARAMETER, + .data = { + .function_parameter = { + .identifier = { + .name = name, + }, + .type = type, + }, + }, + }; + + return node; +} + +ast_node_t * +ast_node_new_function_declaration(string_view_t function_name, + type_t return_type, + vector_t *parameters, + ast_node_t *body) { ast_node_t *node = ast_node_new(); @@ -101,8 +126,11 @@ ast_node_new_function_declaration(string_view_t function_name, type_t return_typ .kind = AST_FUNCTION_DECLARATION, .data = { .function = { - .identifier = { .name = function_name }, - .return_type = return_type, + .prototype = { + .identifier = { .name = function_name }, + .return_type = return_type, + .parameters = parameters, + }, .body = body, } }, @@ -277,14 +305,14 @@ ast_node_new_variable(ast_identifier_t *identifier, type_t result_type) } ast_node_t * -ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type) +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type, vector_t *arguments) { ast_node_t *node = ast_node_new(); *node = (ast_node_t){ .kind = AST_FUNCTION_CALL, .result_type = result_type, - .data = { .function_call = { .identifier = identifier } }, + .data = { .function_call = { .identifier = identifier, .arguments = arguments } }, }; return node; @@ -298,7 +326,7 @@ ast_node_ns_get_function_node_by_sv(ast_node_t *ns, string_view_t name) for (size_t i = 0; i < ns->data.ns.nodes->size; i++) { ast_node_t *node = vector_at(ns->data.ns.nodes, i); - if (node->kind == AST_FUNCTION_DECLARATION && string_view_eq(node->data.function.identifier.name, name)) { + if (node->kind == AST_FUNCTION_DECLARATION && string_view_eq(node->data.function.prototype.identifier.name, name)) { return node; } } @@ -323,6 +351,20 @@ ast_node_is_function_declaration(ast_node_t *node) return node->kind == AST_FUNCTION_DECLARATION; } +ast_identifier_t * +ast_node_function_declaration_identifier(ast_node_t *node) +{ + assert(node->kind == AST_FUNCTION_DECLARATION); + + return &node->data.function.prototype.identifier; +} + +string_view_t +ast_node_function_declaration_name(ast_node_t *node) +{ + return ast_node_function_declaration_identifier(node)->name; +} + char * ast_type_to_str(type_t type) { diff --git a/src/ast.h b/src/ast.h index 315de1e..9e0e514 100644 --- a/src/ast.h +++ b/src/ast.h @@ -57,12 +57,25 @@ typedef struct ast_variable_t typedef struct ast_function_call_t { ast_identifier_t *identifier; + vector_t *arguments; } ast_function_call_t; -typedef struct ast_function_declaration_t +typedef struct ast_function_parameter_t +{ + ast_identifier_t identifier; + type_t type; +} ast_function_parameter_t; + +typedef struct ast_function_prototype_t { ast_identifier_t identifier; type_t return_type; + vector_t *parameters; +} ast_function_prototype_t; + +typedef struct ast_function_declaration_t +{ + ast_function_prototype_t prototype; ast_node_t *body; } ast_function_declaration_t; @@ -133,6 +146,7 @@ typedef enum AST_BINARY_OPERATION, AST_BLOCK, AST_FUNCTION_DECLARATION, + AST_FUNCTION_PARAMETER, AST_FUNCTION_CALL, AST_LITERAL, AST_RETURN_STMT, @@ -148,6 +162,7 @@ typedef union ast_namespace_t ns; ast_binary_operation_t binary_operation; ast_function_declaration_t function; + ast_function_parameter_t function_parameter; ast_function_call_t function_call; ast_literal_t literal; ast_block_t block; @@ -187,10 +202,16 @@ ast_node_new_binary_operation(ast_binary_operation_kind_t kind, type_t result_type); ast_node_t * -ast_node_new_function_declaration(string_view_t function_name, type_t return_type, ast_node_t *body); +ast_node_new_function_declaration(string_view_t function_name, + type_t return_type, + vector_t *parameters, + ast_node_t *body); + +ast_node_t * +ast_node_new_function_parameter(string_view_t name, type_t type); ast_node_t * -ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type); +ast_node_new_function_call(ast_identifier_t *identifier, type_t result_type, vector_t *arguments); ast_node_t * ast_node_new_return_stmt(ast_node_t *argument); @@ -228,4 +249,10 @@ ast_node_is_variable_declaration(ast_node_t *node); bool ast_node_is_function_declaration(ast_node_t *node); +ast_identifier_t * +ast_node_function_declaration_identifier(ast_node_t *node); + +string_view_t +ast_node_function_declaration_name(ast_node_t *node); + #endif /* AST_H */ diff --git a/src/ast_pretty_printer.c b/src/ast_pretty_printer.c index 0d8b66c..a211fa4 100644 --- a/src/ast_pretty_printer.c +++ b/src/ast_pretty_printer.c @@ -65,6 +65,8 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) assert(ast); switch (ast->kind) { + case AST_FUNCTION_PARAMETER: + break; case AST_NAMESPACE: ast_pretty_printer_printf(printer, "Namespace\n"); ast_pretty_printer_add_indentation(printer); @@ -136,8 +138,8 @@ ast_pretty_printer_print_ast(ast_pretty_printer_t *printer, ast_node_t *ast) break; } case AST_FUNCTION_DECLARATION: { - ast_function_declaration_t function = ast->data.function; - ast_pretty_printer_printf(printer, "FunctionDecl name='" SVFMT "'\n", SVARG(&function.identifier.name)); + string_view_t function_name = ast_node_function_declaration_name(ast); + ast_pretty_printer_printf(printer, "FunctionDecl name='" SVFMT "'\n", SVARG(&function_name)); ast_pretty_printer_add_indentation(printer); { diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 2d6b993..26bf3f9 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -22,6 +22,9 @@ #include #include +size_t available_calling_registers = 6; +char *calling_registers[] = { "rdi", "rsi", "rdx", "rcx", "r8", "r9" }; + static void gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binary_operation_t *binary_operation); @@ -183,6 +186,7 @@ gas_assembly_generator_compile(gas_assembly_generator_t *gen, ast_node_t *ast) case AST_IF_STMT: gas_assembly_generator_compile_if_stmt(gen, &ast->data.if_stmt); break; + case AST_FUNCTION_PARAMETER: case AST_UNKOWN_NODE: assert(false && "unreachable"); } @@ -204,10 +208,27 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct uint64_t previous_index = gen->return_label_index; uint64_t return_label_index = gen->return_label_index = gen->counter_label_index++; - fprintf(gen->stream, SVFMT ":\n", SVARG(&func->identifier.name)); + assert((func->prototype.parameters->size <= available_calling_registers) && + "Not enough registers to process this function. Stack parameters will come soon."); + + fprintf(gen->stream, SVFMT ":\n", SVARG(&func->prototype.identifier.name)); fprintf(gen->stream, " push %%rbp\n"); fprintf(gen->stream, " mov %%rsp, %%rbp\n"); + for (size_t i = 0; i < func->prototype.parameters->size; i++) { + char *reg = calling_registers[i]; + ast_node_t *parameter = vector_at(func->prototype.parameters, i); + + assert(parameter->kind == AST_FUNCTION_PARAMETER); + + gen->stack_offset -= 8; + + fprintf(gen->stream, " mov %%%s, %d(%%rbp)\n", reg, gen->stack_offset); + + ref_set_variable_reference_stack_offset( + gen->refs, ¶meter->data.function_parameter.identifier, gen->stack_offset); + } + gas_assembly_generator_compile(gen, func->body); fprintf(gen->stream, ".L%ld:\n", return_label_index); @@ -215,11 +236,38 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct fprintf(gen->stream, " ret\n"); gen->return_label_index = previous_index; + gen->stack_offset = 0; } static void gas_assembly_generator_compile_function_call(gas_assembly_generator_t *gen, ast_function_call_t *func_call) { + assert((func_call->arguments->size <= available_calling_registers) && + "Not enough registers to process this function. Stack parameters will come soon."); + + for (size_t i = 0; i < func_call->arguments->size; i++) { + char *reg = calling_registers[i]; + ast_node_t *argument = vector_at(func_call->arguments, i); + + gas_assembly_generator_compile(gen, argument); + + switch (gen->latest_evaluation.kind) { + case EVALUATION_RESULT_AS_LITERAL_INTEGER: + fprintf(gen->stream, " mov $%ld, %%%s\n", gen->latest_evaluation.data.literal_int, reg); + break; + case EVALUATION_RESULT_ON_RAX: + fprintf(gen->stream, " mov %%rax, %%%s\n", reg); + break; + case EVALUATION_RESULT_ON_STACK: + fprintf(gen->stream, " mov %d(%%rbp), %%%s\n", gen->latest_evaluation.data.stack_offset, reg); + break; + case EVALUATION_RESULT_AS_LITERAL_BOOL: + fprintf(gen->stream, " mov %d(%%rbp), %%%s\n", gen->latest_evaluation.data.literal_bool, reg); + break; + case EVALUATION_RESULT_VOID: + assert(false && "Unexpected"); + } + } fprintf(gen->stream, " call " SVFMT "\n", SVARG(&func_call->identifier->name)); gas_assembly_generator_set_latest_evaluation_to_rax(gen); } diff --git a/src/lexer.c b/src/lexer.c index a6c8ae2..7c668d3 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -165,6 +165,12 @@ lexer_next_token(lexer_t *lexer, token_t *token) return; } + if (lexer_current_char(lexer) == ',') { + lexer_define_literal_token_props(lexer, token, TOKEN_COMMA); + lexer_drop_char(lexer); + return; + } + if (lexer_current_char(lexer) == ';') { lexer_define_literal_token_props(lexer, token, TOKEN_SEMICOLON); lexer_drop_char(lexer); @@ -412,6 +418,8 @@ token_kind_to_str(token_kind_t kind) return ")"; case TOKEN_COLON: return ":"; + case TOKEN_COMMA: + return ","; case TOKEN_SEMICOLON: return ";"; case TOKEN_OCURLY: diff --git a/src/lexer.h b/src/lexer.h index 0e36ada..46912dc 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -32,6 +32,7 @@ typedef enum // Literal Tokens TOKEN_OPAREN, TOKEN_CPAREN, + TOKEN_COMMA, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_OCURLY, diff --git a/src/parser.c b/src/parser.c index 377ff73..fe2a2f0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -33,6 +33,9 @@ parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expres static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type); +static bool +is_next_token_cparen(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -354,16 +357,40 @@ parser_parse_factor(parser_t *parser) referenced_node->data.variable_declaration.type); } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } + // Parse function parameters if (!drop_expected_token(parser, TOKEN_OPAREN)) { return NULL; } - if (!drop_expected_token(parser, TOKEN_CPAREN)) { - return NULL; + + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; + } + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); + return NULL; + } } - return ast_node_new_function_call(&referenced_node->data.function.identifier, - referenced_node->data.function.return_type); + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } default: { parser_error_t error; @@ -590,6 +617,14 @@ is_next_function_declaration(parser_t *parser) return token.kind == TOKEN_KEYWORD_FN; } +static bool +is_next_token_cparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CPAREN; +} + static bool is_next_token_eof(parser_t *parser) { @@ -722,10 +757,48 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type) return ast_node_new_block(body); } -static bool -parser_parse_function_arguments(parser_t *parser) +static vector_t * +parser_parse_function_parameters(parser_t *parser) { - return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN); + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *vector = vector_new(); + + while (!is_next_token_cparen(parser)) { + token_t token_name; + type_t type; + + if (!expected_token(&token_name, parser, TOKEN_NAME)) { + ast_node_destroy_vector(vector); + return NULL; + } + if (!drop_expected_token(parser, TOKEN_COLON)) { + ast_node_destroy_vector(vector); + return NULL; + } + + if (!parser_parse_type(parser, &type)) { + ast_node_destroy_vector(vector); + return NULL; + } + + ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type); + + scope_push(parser->scope, ¶meter->data.function_parameter.identifier, parameter); + + vector_push_back(vector, parameter); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(vector); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return vector; } ast_node_t * @@ -741,7 +814,9 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - if (!parser_parse_function_arguments(parser)) { + vector_t *parameters = parser_parse_function_parameters(parser); + + if (parameters == NULL) { return NULL; } @@ -766,11 +841,11 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, body); + ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body); // TODO: When implementing function calls the scope must be pushed before the // body to be parsed, otherwise recursion not gonna work. - scope_push(parser->scope, &node->data.function.identifier, node); + scope_push(parser->scope, &node->data.function.prototype.identifier, node); return node; } diff --git a/test/integration_test.c b/test/integration_test.c index 985e574..608764a 100644 --- a/test/integration_test.c +++ b/test/integration_test.c @@ -44,6 +44,7 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture) assert_exit_status("../examples/arithmetics.pipa", 13); assert_exit_status("../examples/variables.pipa", 28); assert_exit_status("../examples/if.pipa", 42); + assert_exit_status("../examples/function_call.pipa", 42); return MUNIT_OK; } diff --git a/test/parser_test.c b/test/parser_test.c index ebb917c..44ffd57 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -104,7 +104,7 @@ test_parse_variable_definition(const MunitParameter params[], void *user_data_or char actual[5]; - string_view_to_str(&ast_function->data.function.identifier.name, actual); + string_view_to_str(&ast_function->data.function.prototype.identifier.name, actual); assert_string_equal("main", actual); assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); @@ -141,7 +141,7 @@ test_parse_boolean(const MunitParameter params[], void *user_data_or_fixture) assert_true(ast_function != NULL); - assert_string_view_equal("my_bool_fn", ast_function->data.function.identifier.name); + assert_string_view_equal("my_bool_fn", ast_node_function_declaration_name(ast_function)); assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); ast_node_t *ast_variable = vector_at(ast_function->data.function.body->data.block.body, 0); @@ -397,7 +397,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or assert_parser_error("main(): i32 { return 42; }", "Unexpected token 'main'"); assert_parser_error("fn (): i32 { return 42; }", "expected 'TOKEN_NAME' but got '('"); assert_parser_error("fn main): i32 { return 42; }", "expected '(' but got ')'"); - assert_parser_error("fn main(: i32 { return 42; }", "expected ')' but got ':'"); + assert_parser_error("fn main(: i32 { return 42; }", "expected 'TOKEN_NAME' but got ':'"); assert_parser_error("fn main() i32 { return 42; }", "expected ':' but got 'TOKEN_NAME'"); assert_parser_error("fn main(): { return 42; }", "expected 'TOKEN_NAME' but got '{'"); assert_parser_error("fn main(): i32 return 42; }", "expected '{' but got 'return'"); -- cgit v1.2.3 From ea7f65fe1250be8f49edcaaedd3410aed1401648 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Thu, 11 May 2023 15:00:10 -0300 Subject: gas: implement recursion and late evaluation Until now the below code was not valid for pipac. fn main(): i32 { return fib(13); } fn fib(n: i32): i32 { if n <= 1 { return n; } return fib(n - 1) + fib(n - 2); } Pipa's parser was adding a function to scope after they were fully parsed which means that fib's self-reference would not work. Also, functions were required to follow the be called in the order they are declared for the same scope reason so, the main function was required to be defined after fib. And how it is working now? When a TOKEN_NAME is not found in the scope, instead of returning an error, an unknown token is created as placeholder. The parser stores the node reference and the token it was trying to parse. During type checks, if the parser detects an unknown node, instead of returning an error, it stores in that node what was the expected type. After the NS is fully parsed a reevaluation is made on those unknown nodes by setting the lexer back on the node's token position and parsing the TOKEN_NAME again. Ps: There is a typo on the unknown token. It will be addressed in another commit since this issue was not introduced by this change. Signed-off-by: Carlos Maniero --- examples/fibbonacci.pipa | 10 +++ src/ast.h | 12 ++- src/gas_assembly_generator.c | 5 +- src/lexer.c | 8 ++ src/lexer.h | 3 + src/parser.c | 199 +++++++++++++++++++++++++++++++++---------- src/parser.h | 7 ++ test/integration_test.c | 1 + test/parser_test.c | 2 + 9 files changed, 198 insertions(+), 49 deletions(-) create mode 100644 examples/fibbonacci.pipa diff --git a/examples/fibbonacci.pipa b/examples/fibbonacci.pipa new file mode 100644 index 0000000..c19e65d --- /dev/null +++ b/examples/fibbonacci.pipa @@ -0,0 +1,10 @@ +fn main(): i32 { + return fib(13); +} + +fn fib(n: i32): i32 { + if n <= 1 { + return n; + } + return fib(n - 1) + fib(n - 2); +} diff --git a/src/ast.h b/src/ast.h index 9e0e514..42f02ac 100644 --- a/src/ast.h +++ b/src/ast.h @@ -16,6 +16,7 @@ */ #ifndef AST_H #define AST_H +#include "lexer.h" #include "string_view.h" #include "vector.h" #include @@ -140,6 +141,12 @@ typedef struct ast_if_stmt_t ast_node_t *body; } ast_if_stmt_t; +typedef struct ast_unkown_token_t +{ + type_t expected_type; + token_t reference_token; +} ast_unkown_token_t; + typedef enum { AST_NAMESPACE, @@ -151,10 +158,10 @@ typedef enum AST_LITERAL, AST_RETURN_STMT, AST_IF_STMT, - AST_UNKOWN_NODE, AST_VARIABLE_DECLARATION, AST_VARIABLE_ASSIGNMENT, - AST_VARIABLE + AST_VARIABLE, + AST_UNKOWN_NODE } ast_node_kind_t; typedef union @@ -171,6 +178,7 @@ typedef union ast_variable_declaration_t variable_declaration; ast_variable_assignment_t variable_assignment; ast_variable_t variable; + ast_unkown_token_t unknown; } ast_node_data_t; typedef struct ast_node_t diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 26bf3f9..7641147 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -214,6 +214,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct fprintf(gen->stream, SVFMT ":\n", SVARG(&func->prototype.identifier.name)); fprintf(gen->stream, " push %%rbp\n"); fprintf(gen->stream, " mov %%rsp, %%rbp\n"); + fprintf(gen->stream, " subq $16, %%rsp\n"); for (size_t i = 0; i < func->prototype.parameters->size; i++) { char *reg = calling_registers[i]; @@ -232,7 +233,7 @@ gas_assembly_generator_compile_function(gas_assembly_generator_t *gen, ast_funct gas_assembly_generator_compile(gen, func->body); fprintf(gen->stream, ".L%ld:\n", return_label_index); - fprintf(gen->stream, " pop %%rbp\n"); + fprintf(gen->stream, " leave\n"); fprintf(gen->stream, " ret\n"); gen->return_label_index = previous_index; @@ -515,7 +516,7 @@ gas_assembly_generator_binary_operation(gas_assembly_generator_t *gen, ast_binar binary_operation->kind == AST_BINOP_EQUAL || binary_operation->kind == AST_BINOP_NOT_EQUAL || binary_operation->kind == AST_BINOP_GT || binary_operation->kind == AST_BINOP_LT || binary_operation->kind == AST_BINOP_LT_EQUAL || binary_operation->kind == AST_BINOP_GT_EQUAL) { - fprintf(gen->stream, " cmp %%rcx, %%rax\n"); + fprintf(gen->stream, " cmpq %%rcx, %%rax\n"); return; } diff --git a/src/lexer.c b/src/lexer.c index 7c668d3..c00c944 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -348,6 +348,14 @@ lexer_load_file_contents(lexer_t *lexer) } } +void +lexer_step_back_to(lexer_t *lexer, token_t *token) +{ + lexer->cur = token->bol + token->col; + lexer->row = token->row; + lexer->bol = token->bol; +} + void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level) { diff --git a/src/lexer.h b/src/lexer.h index 46912dc..c544a15 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -114,6 +114,9 @@ lexer_drop_next_token(lexer_t *lexer); void lexer_peek_next_token(lexer_t *lexer, token_t *token); +void +lexer_step_back_to(lexer_t *lexer, token_t *token); + void lexer_lookahead(lexer_t *lexer, token_t *token, size_t level); diff --git a/src/parser.c b/src/parser.c index fe2a2f0..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type); static bool is_next_token_cparen(parser_t *parser); +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -43,9 +50,27 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } +static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { @@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * * ::= @@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - ast_node_t *referenced_node = scope_get(parser->scope, token.value); - - if (referenced_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); + default: { + parser_error_t error; + error.token = token; + sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); + parser->errors[parser->errors_len++] = error; + return NULL; + } + } +} - if (referenced_node->kind == AST_VARIABLE_DECLARATION) { - return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, - referenced_node->data.variable_declaration.type); - } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } - // Function parameters are also variables. - if (referenced_node->kind == AST_FUNCTION_PARAMETER) { - return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, - referenced_node->data.function_parameter.type); - } + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} - // Parse function parameters - if (!drop_expected_token(parser, TOKEN_OPAREN)) { - return NULL; - } +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); - vector_t *arguments = vector_new(); + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); - while (!is_next_token_cparen(parser)) { - ast_node_t *argument = parser_parse_expression(parser); + parser_to_be_reevaluated_push(parser, node, *token); - if (argument == NULL) { - ast_node_destroy_vector(arguments); - return NULL; - } + parser_drop_until_close_paren(parser); + return node; + } - vector_push_back(arguments, argument); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } - if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { - ast_node_destroy_vector(arguments); - return NULL; - } - } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } - drop_expected_token(parser, TOKEN_CPAREN); + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } - return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), - referenced_node->data.function.prototype.return_type, - arguments); + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; } - default: { - parser_error_t error; - error.token = token; - sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); return NULL; } } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } static ast_node_t * @@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -625,6 +682,14 @@ is_next_token_cparen(parser_t *parser) return token.kind == TOKEN_CPAREN; } +static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + static bool is_next_token_eof(parser_t *parser) { @@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser) return node; } +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + ast_node_t * parser_parse_ns(parser_t *parser) { @@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser) return NULL; } + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + return ast_node_new_namespace(nodes); } diff --git a/src/parser.h b/src/parser.h index 7bebee7..cfeb1f0 100644 --- a/src/parser.h +++ b/src/parser.h @@ -28,10 +28,17 @@ typedef struct parser_error_t char message[256]; } parser_error_t; +typedef struct reevaluation_node_t +{ + ast_node_t *node; + token_t token; +} reevaluation_node_t; + typedef struct parser_t { lexer_t *lexer; scope_t *scope; + vector_t *to_be_reevaluated; int errors_len; // FIXME: replace with vector parser_error_t errors[1]; diff --git a/test/integration_test.c b/test/integration_test.c index 608764a..8866f83 100644 --- a/test/integration_test.c +++ b/test/integration_test.c @@ -45,6 +45,7 @@ test_examples(const MunitParameter params[], void *user_data_or_fixture) assert_exit_status("../examples/variables.pipa", 28); assert_exit_status("../examples/if.pipa", 42); assert_exit_status("../examples/function_call.pipa", 42); + assert_exit_status("../examples/fibbonacci.pipa", 233); return MUNIT_OK; } diff --git a/test/parser_test.c b/test/parser_test.c index 44ffd57..acc238f 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -430,6 +430,8 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or assert_parser_error("fn main(): i32 { return fn_404(); }", "identifier 'fn_404' not defined"); assert_parser_error("fn my_bool_fn(): bool { return true; } fn main(): i32 { return my_bool_fn(); }", "incompatible types: expected 'i32', actual 'bool'."); + assert_parser_error("fn main(): i32 { return my_bool_fn(); } fn my_bool_fn(): bool { return true; }", + "incompatible types: expected 'i32', actual 'bool'."); assert_parser_error("fn main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'"); return MUNIT_OK; -- cgit v1.2.3