From 3065f54e3a122dd3d8c2deffdec72ec48ea4f165 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 00:20:01 -0300 Subject: parser: Fixes boolean binary operation precedence The comparators && and || should have precedence over others comparators (> < >= <= == !=). Signed-off-by: Carlos Maniero --- src/parser.c | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 9167f1e..e3f157f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -156,7 +156,10 @@ ast_node_t * parser_parse_expression(parser_t *parser); static ast_node_t * -parser_parse_comparison(parser_t *parser); +parser_parse_comparison1(parser_t *parser); + +static ast_node_t * +parser_parse_comparison2(parser_t *parser); static ast_node_t * parser_parse_arithmetic1(parser_t *parser); @@ -169,8 +172,9 @@ parser_parse_factor(parser_t *parser); /** * - * ::= - * ::= (('>' | '>=' | '=' | ...) arithmetic1)* + * ::= + * ::= (('>' | '>=' | '=' | ...) comparison2)* + * ::= (('>' | '>=' | '=' | ...) arithmetic1)* * ::= (('+' | '-') arithmetic2)* * ::= (('*' | '/') factor)* * ::= | '(' ')' @@ -179,11 +183,42 @@ parser_parse_factor(parser_t *parser); ast_node_t * parser_parse_expression(parser_t *parser) { - return parser_parse_comparison(parser); + return parser_parse_comparison1(parser); +} + +static ast_node_t * +parser_parse_comparison1(parser_t *parser) +{ + ast_node_t *node = parser_parse_comparison2(parser); + + if (node == NULL) { + return NULL; + } + + token_t token; + lexer_peek_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + lexer_drop_next_token(parser->lexer); + + ast_node_t *left = node; + ast_node_t *right = parser_parse_comparison2(parser); + + if (right == NULL) { + ast_node_destroy(node); + return NULL; + } + + node = ast_node_new_binary_operation(token_to_binary_operation_kind(&token), left, right, TYPE_BOOL); + + lexer_peek_next_token(parser->lexer, &token); + } + + return node; } static ast_node_t * -parser_parse_comparison(parser_t *parser) +parser_parse_comparison2(parser_t *parser) { ast_node_t *node = parser_parse_arithmetic1(parser); @@ -195,8 +230,7 @@ parser_parse_comparison(parser_t *parser) lexer_peek_next_token(parser->lexer, &token); while (token.kind == TOKEN_GT || token.kind == TOKEN_LT || token.kind == TOKEN_GT_EQUAL || - token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL || - token.kind == TOKEN_AND || token.kind == TOKEN_OR) { + token.kind == TOKEN_LT_EQUAL || token.kind == TOKEN_EQUAL || token.kind == TOKEN_NOT_EQUAL) { lexer_drop_next_token(parser->lexer); ast_node_t *left = node; -- cgit v1.2.3 From 75639fbf01bd6ae1212521b6cf822025eb8b598d Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 16:07:39 -0300 Subject: namespaces: Add a namespace structure that represents a file We have been always parsing a single function. Since we want to have multiple functions in a near future, this patch introduces an namespace that represents an entire file. To ensure a function is defined inside a namespace, a helper function was created. Today our ast_node structure is highly exposed, and this is something that Johnny and I have been discussed. So then, this is a first step to try to protected the code generation from our ast tree. Signed-off-by: Carlos Maniero --- src/parser.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index e3f157f..b94087f 100644 --- a/src/parser.c +++ b/src/parser.c @@ -570,6 +570,22 @@ is_next_statement_return(parser_t *parser) return token.kind == TOKEN_KEYWORD_RETURN; } +static bool +is_next_function_declaration(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_KEYWORD_FN; +} + +static bool +is_next_token_eof(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_EOF; +} + static bool is_block_end(parser_t *parser) { @@ -746,3 +762,24 @@ parser_parse_function_declaration(parser_t *parser) return node; } + +ast_node_t * +parser_parse_ns(parser_t *parser) +{ + vector_t *nodes = vector_new(); + + while (!is_next_token_eof(parser)) { + if (is_next_function_declaration(parser)) { + ast_node_t *node = parser_parse_function_declaration(parser); + + if (node == NULL) { + ast_node_destroy_vector(nodes); + return NULL; + } + + vector_push_back(nodes, node); + } + } + + return ast_node_new_namespace(nodes); +} -- cgit v1.2.3 From 3129b741064c2b4f2c6c2408bd42cc83f7341ea8 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 16:53:05 -0300 Subject: gas: Generate function call This is an initial commit that enables function calls. At this point only functions with no parameters is going to work. Signed-off-by: Carlos Maniero --- src/parser.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index b94087f..578514c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -339,10 +339,9 @@ parser_parse_factor(parser_t *parser) return expression; } case TOKEN_NAME: { - // TODO: Check node kind, today accepts only variables - ast_node_t *var_node = scope_get(parser->scope, token.value); + ast_node_t *referenced_node = scope_get(parser->scope, token.value); - if (var_node == NULL) { + if (referenced_node == NULL) { parser_error_t error; error.token = token; sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); @@ -350,8 +349,21 @@ parser_parse_factor(parser_t *parser) return NULL; } - return ast_node_new_variable(&var_node->data.variable_declaration.identifier, - var_node->data.variable_declaration.type); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } + + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + if (!drop_expected_token(parser, TOKEN_CPAREN)) { + return NULL; + } + + return ast_node_new_function_call(&referenced_node->data.function.identifier, + referenced_node->data.function.return_type); } default: { parser_error_t error; -- cgit v1.2.3 From 6f187a71cbe3aa4ebb32ba287c75562d96c7a3f4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 17:49:36 -0300 Subject: tests: Replace parse function with parse ns for error handling It was necessary to test function calls errors. Signed-off-by: Carlos Maniero --- src/parser.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 578514c..377ff73 100644 --- a/src/parser.c +++ b/src/parser.c @@ -790,7 +790,17 @@ parser_parse_ns(parser_t *parser) } vector_push_back(nodes, node); + continue; } + + parser_error_t error; + + lexer_peek_next_token(parser->lexer, &error.token); + sprintf(error.message, "Unexpected token '" SVFMT "'", SVARG(&error.token.value)); + parser->errors[parser->errors_len++] = error; + + ast_node_destroy_vector(nodes); + return NULL; } return ast_node_new_namespace(nodes); -- cgit v1.2.3 From 5042a4ffc1363d6f0f99a3afd79f76cf2da738d6 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 10 May 2023 22:24:14 -0300 Subject: gas: implement function calls For now function calls are following the C's calling convention, which means they are using the following registers to pass functions' arguments: rdi, rsi, rdx, rcx, r8, r9 If a function has more then 6 parameters, the compilation will fail. To enable function with more than 6 parameters we will need to save the extra arguments on stack. Naming: parameters: function parameters are the variables a function receives. arguments: Arguments are the values passed to a function when calling it. Calling mechanism: When a function is called, all the expressions passed as argument are evaluated, after the evaluation, the result is stored on the register that represents its argument position, the first argument will be stored on rdi, the second on rsi and so on. Receiving mechanism: When a function starts, the first thing it does is store all the registers onto the stack. So rdi will be stored on -8(rbp), rsi on -16(rbp) and so on. And, a ref_entry is created making the relationship parameter-stack_offset. Signed-off-by: Carlos Maniero --- src/parser.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 10 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 377ff73..fe2a2f0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -33,6 +33,9 @@ parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expres static ast_node_t * parser_parse_block_declarations(parser_t *parser, type_t result_type); +static bool +is_next_token_cparen(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -354,16 +357,40 @@ parser_parse_factor(parser_t *parser) referenced_node->data.variable_declaration.type); } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } + // Parse function parameters if (!drop_expected_token(parser, TOKEN_OPAREN)) { return NULL; } - if (!drop_expected_token(parser, TOKEN_CPAREN)) { - return NULL; + + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; + } + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); + return NULL; + } } - return ast_node_new_function_call(&referenced_node->data.function.identifier, - referenced_node->data.function.return_type); + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } default: { parser_error_t error; @@ -590,6 +617,14 @@ is_next_function_declaration(parser_t *parser) return token.kind == TOKEN_KEYWORD_FN; } +static bool +is_next_token_cparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_CPAREN; +} + static bool is_next_token_eof(parser_t *parser) { @@ -722,10 +757,48 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type) return ast_node_new_block(body); } -static bool -parser_parse_function_arguments(parser_t *parser) +static vector_t * +parser_parse_function_parameters(parser_t *parser) { - return drop_expected_token(parser, TOKEN_OPAREN) && drop_expected_token(parser, TOKEN_CPAREN); + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } + + vector_t *vector = vector_new(); + + while (!is_next_token_cparen(parser)) { + token_t token_name; + type_t type; + + if (!expected_token(&token_name, parser, TOKEN_NAME)) { + ast_node_destroy_vector(vector); + return NULL; + } + if (!drop_expected_token(parser, TOKEN_COLON)) { + ast_node_destroy_vector(vector); + return NULL; + } + + if (!parser_parse_type(parser, &type)) { + ast_node_destroy_vector(vector); + return NULL; + } + + ast_node_t *parameter = ast_node_new_function_parameter(token_name.value, type); + + scope_push(parser->scope, ¶meter->data.function_parameter.identifier, parameter); + + vector_push_back(vector, parameter); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(vector); + return NULL; + } + } + + drop_expected_token(parser, TOKEN_CPAREN); + + return vector; } ast_node_t * @@ -741,7 +814,9 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - if (!parser_parse_function_arguments(parser)) { + vector_t *parameters = parser_parse_function_parameters(parser); + + if (parameters == NULL) { return NULL; } @@ -766,11 +841,11 @@ parser_parse_function_declaration(parser_t *parser) return NULL; } - ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, body); + ast_node_t *node = ast_node_new_function_declaration(func_name_token.value, return_type, parameters, body); // TODO: When implementing function calls the scope must be pushed before the // body to be parsed, otherwise recursion not gonna work. - scope_push(parser->scope, &node->data.function.identifier, node); + scope_push(parser->scope, &node->data.function.prototype.identifier, node); return node; } -- cgit v1.2.3 From ea7f65fe1250be8f49edcaaedd3410aed1401648 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Thu, 11 May 2023 15:00:10 -0300 Subject: gas: implement recursion and late evaluation Until now the below code was not valid for pipac. fn main(): i32 { return fib(13); } fn fib(n: i32): i32 { if n <= 1 { return n; } return fib(n - 1) + fib(n - 2); } Pipa's parser was adding a function to scope after they were fully parsed which means that fib's self-reference would not work. Also, functions were required to follow the be called in the order they are declared for the same scope reason so, the main function was required to be defined after fib. And how it is working now? When a TOKEN_NAME is not found in the scope, instead of returning an error, an unknown token is created as placeholder. The parser stores the node reference and the token it was trying to parse. During type checks, if the parser detects an unknown node, instead of returning an error, it stores in that node what was the expected type. After the NS is fully parsed a reevaluation is made on those unknown nodes by setting the lexer back on the node's token position and parsing the TOKEN_NAME again. Ps: There is a typo on the unknown token. It will be addressed in another commit since this issue was not introduced by this change. Signed-off-by: Carlos Maniero --- src/parser.c | 199 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 154 insertions(+), 45 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index fe2a2f0..8180351 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -36,6 +37,12 @@ parser_parse_block_declarations(parser_t *parser, type_t result_type); static bool is_next_token_cparen(parser_t *parser); +static bool +is_next_token_oparen(parser_t *parser); + +static bool +is_next_token_eof(parser_t *parser); + void parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) { @@ -43,9 +50,27 @@ parser_init(parser_t *parser, lexer_t *lexer, scope_t *scope) assert(lexer && "lexer must be defined"); parser->lexer = lexer; parser->scope = scope; + // FIXME: change parser_init to parser_new and create a destruction function + // for it + parser->to_be_reevaluated = vector_new(); parser->errors_len = 0; } +static void +parser_to_be_reevaluated_push(parser_t *parser, ast_node_t *node, token_t token) +{ + reevaluation_node_t *reevaluation_node = (reevaluation_node_t *)malloc(sizeof(reevaluation_node_t)); + + if (reevaluation_node == NULL) { + fprintf(stderr, "ref_entry_new: Out of memory: %s\n", strerror(errno)); + exit(EXIT_FAILURE); + } + + *reevaluation_node = (reevaluation_node_t){ .node = node, token = token }; + + vector_push_back(parser->to_be_reevaluated, reevaluation_node); +} + static void parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { @@ -173,6 +198,9 @@ parser_parse_arithmetic2(parser_t *parser); static ast_node_t * parser_parse_factor(parser_t *parser); +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token); + /** * * ::= @@ -341,65 +369,89 @@ parser_parse_factor(parser_t *parser) return expression; } - case TOKEN_NAME: { - ast_node_t *referenced_node = scope_get(parser->scope, token.value); - - if (referenced_node == NULL) { - parser_error_t error; - error.token = token; - sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; - return NULL; - } + case TOKEN_NAME: + return parser_parse_name_factor(parser, &token); + default: { + parser_error_t error; + error.token = token; + sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); + parser->errors[parser->errors_len++] = error; + return NULL; + } + } +} - if (referenced_node->kind == AST_VARIABLE_DECLARATION) { - return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, - referenced_node->data.variable_declaration.type); - } +static void +parser_drop_until_close_paren(parser_t *parser) +{ + if (!is_next_token_oparen(parser)) { + return; + } - // Function parameters are also variables. - if (referenced_node->kind == AST_FUNCTION_PARAMETER) { - return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, - referenced_node->data.function_parameter.type); - } + int open_parens = 0; + do { + if (is_next_token_oparen(parser)) { + open_parens++; + } else if (is_next_token_cparen(parser)) { + open_parens--; + } + lexer_drop_next_token(parser->lexer); + } while (open_parens != 0 && !is_next_token_eof(parser)); +} - // Parse function parameters - if (!drop_expected_token(parser, TOKEN_OPAREN)) { - return NULL; - } +static ast_node_t * +parser_parse_name_factor(parser_t *parser, token_t *token) +{ + ast_node_t *referenced_node = scope_get(parser->scope, token->value); - vector_t *arguments = vector_new(); + if (referenced_node == NULL) { + ast_node_t *node = ast_node_new(); - while (!is_next_token_cparen(parser)) { - ast_node_t *argument = parser_parse_expression(parser); + parser_to_be_reevaluated_push(parser, node, *token); - if (argument == NULL) { - ast_node_destroy_vector(arguments); - return NULL; - } + parser_drop_until_close_paren(parser); + return node; + } - vector_push_back(arguments, argument); + if (referenced_node->kind == AST_VARIABLE_DECLARATION) { + return ast_node_new_variable(&referenced_node->data.variable_declaration.identifier, + referenced_node->data.variable_declaration.type); + } - if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { - ast_node_destroy_vector(arguments); - return NULL; - } - } + // Function parameters are also variables. + if (referenced_node->kind == AST_FUNCTION_PARAMETER) { + return ast_node_new_variable(&referenced_node->data.function_parameter.identifier, + referenced_node->data.function_parameter.type); + } - drop_expected_token(parser, TOKEN_CPAREN); + // Parse function parameters + if (!drop_expected_token(parser, TOKEN_OPAREN)) { + return NULL; + } - return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), - referenced_node->data.function.prototype.return_type, - arguments); + vector_t *arguments = vector_new(); + + while (!is_next_token_cparen(parser)) { + ast_node_t *argument = parser_parse_expression(parser); + + if (argument == NULL) { + ast_node_destroy_vector(arguments); + return NULL; } - default: { - parser_error_t error; - error.token = token; - sprintf(error.message, "unexpected '%s (" SVFMT ")' token", token_kind_to_str(token.kind), SVARG(&token.value)); - parser->errors[parser->errors_len++] = error; + + vector_push_back(arguments, argument); + + if (!is_next_token_cparen(parser) && !drop_expected_token(parser, TOKEN_COMMA)) { + ast_node_destroy_vector(arguments); return NULL; } } + + drop_expected_token(parser, TOKEN_CPAREN); + + return ast_node_new_function_call(ast_node_function_declaration_identifier(referenced_node), + referenced_node->data.function.prototype.return_type, + arguments); } static ast_node_t * @@ -477,6 +529,11 @@ parser_parse_variable_assignment(parser_t *parser) static bool parser_expression_matches_the_expected_type(parser_t *parser, ast_node_t *expression, type_t type, token_t token) { + if (expression->kind == AST_UNKOWN_NODE) { + expression->data.unknown.expected_type = type; + expression->data.unknown.reference_token = token; + return true; + } if (expression->result_type != type) { parser_error_t error; error.token = token; @@ -625,6 +682,14 @@ is_next_token_cparen(parser_t *parser) return token.kind == TOKEN_CPAREN; } +static bool +is_next_token_oparen(parser_t *parser) +{ + token_t token; + lexer_peek_next_token(parser->lexer, &token); + return token.kind == TOKEN_OPAREN; +} + static bool is_next_token_eof(parser_t *parser) { @@ -850,6 +915,45 @@ parser_parse_function_declaration(parser_t *parser) return node; } +static bool +parser_reevaluate_nodes(parser_t *parser) +{ + for (size_t i = 0; i < parser->to_be_reevaluated->size; i++) { + reevaluation_node_t *reevaluation = vector_at(parser->to_be_reevaluated, i); + + lexer_step_back_to(parser->lexer, &reevaluation->token); + lexer_drop_next_token(parser->lexer); + + ast_node_t *node = parser_parse_name_factor(parser, &reevaluation->token); + + if (node == NULL) { + return false; + } + + if (node->kind == AST_UNKOWN_NODE) { + parser_error_t error; + error.token = reevaluation->token; + sprintf(error.message, "identifier '" SVFMT "' not defined", SVARG(&reevaluation->token.value)); + parser->errors[parser->errors_len++] = error; + ast_node_destroy(node); + return false; + } + + if (!parser_expression_matches_the_expected_type(parser, + node, + reevaluation->node->data.unknown.expected_type, + reevaluation->node->data.unknown.reference_token)) { + ast_node_destroy(node); + return false; + } + + *reevaluation->node = *node; + + free(node); + } + return true; +} + ast_node_t * parser_parse_ns(parser_t *parser) { @@ -878,5 +982,10 @@ parser_parse_ns(parser_t *parser) return NULL; } + if (!parser_reevaluate_nodes(parser)) { + ast_node_destroy_vector(nodes); + return NULL; + } + return ast_node_new_namespace(nodes); } -- cgit v1.2.3