From d86d70fc7c6751713a6b9f02d9f77814e2f75718 Mon Sep 17 00:00:00 2001 From: Johnny Richard Date: Fri, 21 Apr 2023 15:11:17 +0200 Subject: parser: Parse integers arithmetic expression This patch implements the AST creation for arithmetic expressions. NOTE: The implementation works only for integer numbers. Signed-off-by: Johnny Richard Reviewed-by: Carlos Maniero --- src/ast.c | 8 ++++-- src/ast.h | 9 +++++++ src/lexer.c | 11 ++++++++ src/lexer.h | 3 +++ src/parser.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++--- src/parser.h | 1 + test/parser_test.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++- 7 files changed, 177 insertions(+), 7 deletions(-) diff --git a/src/ast.c b/src/ast.c index 7f3af9e..5c94d68 100644 --- a/src/ast.c +++ b/src/ast.c @@ -64,12 +64,16 @@ ast_node_destroy(ast_node_t *node) case AST_FUNCTION_DECLARATION: ast_node_destroy(node->data.function.body); break; + case AST_BINARY_OPERATION: + ast_node_destroy(node->data.binary_operation.left); + ast_node_destroy(node->data.binary_operation.right); + break; + case AST_LITERAL: + break; case AST_RETURN_STMT: break; case AST_UNKOWN_NODE: break; - case AST_LITERAL: - break; default: assert(false && "unmapped free strategy"); } diff --git a/src/ast.h b/src/ast.h index 1c4bbb8..cb9b8d5 100644 --- a/src/ast.h +++ b/src/ast.h @@ -38,6 +38,13 @@ typedef struct ast_function_declaration_t { ast_node_t* body; } ast_function_declaration_t; +typedef struct ast_binary_operation_t { + // FIXME: We want to use enum to distinguish operators + string_view_t op; + ast_node_t* left; + ast_node_t* right; +} ast_binary_operation_t; + typedef enum { AST_LITERAL_INTEGER } ast_literal_kind_t; @@ -59,6 +66,7 @@ typedef struct ast_visitor_t { typedef enum { AST_FUNCTION_DECLARATION, + AST_BINARY_OPERATION, AST_LITERAL, AST_RETURN_STMT, AST_UNKOWN_NODE @@ -66,6 +74,7 @@ typedef enum { typedef union { ast_function_declaration_t function; + ast_binary_operation_t binary_operation; ast_literal_t literal; ast_return_stmt_t return_stmt; } ast_node_data_t; diff --git a/src/lexer.c b/src/lexer.c index 2c8ffb9..b641752 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -44,6 +44,7 @@ lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t ki token->filepath = lexer->filepath; token->row = lexer->row; token->col = lexer->cur - lexer->bol; + token->bol = lexer->bol; } static void @@ -73,6 +74,7 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token) token->filepath = lexer->filepath; token->row = lexer->row; token->col = begin - lexer->bol; + token->bol = lexer->bol; } static void @@ -88,6 +90,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token) token->filepath = lexer->filepath; token->row = lexer->row; token->col = begin - lexer->bol; + token->bol = lexer->bol; } void @@ -196,6 +199,14 @@ lexer_load_file_contents(lexer_t *lexer) } +void +lexer_step_back_to(lexer_t *lexer, token_t *token) +{ + lexer->cur = token->bol + token->col; + lexer->row = token->row; + lexer->bol = token->bol; +} + void lexer_drop_char(lexer_t *lexer) { diff --git a/src/lexer.h b/src/lexer.h index ae8ab60..9f45910 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -42,6 +42,7 @@ typedef struct token_t { char *filepath; uint32_t row; uint32_t col; + uint32_t bol; } token_t; @@ -68,6 +69,8 @@ bool lexer_is_not_eof(lexer_t *lexer); void lexer_drop_char(lexer_t *lexer); +void lexer_step_back_to(lexer_t *lexer, token_t *token); + char * token_kind_to_str(token_kind_t kind); #endif /* LEXER_H */ diff --git a/src/parser.c b/src/parser.c index 176a4a8..ad38125 100644 --- a/src/parser.c +++ b/src/parser.c @@ -109,16 +109,20 @@ parser_literal_integer_node(ast_node_t *node, token_t *token) } bool -parser_parse_next_expression(parser_t *parser, ast_node_t *node) +parser_parse_factor(parser_t *parser, ast_node_t *node) { token_t token; lexer_next_token(parser->lexer, &token); - if (token.kind == TOKEN_NUMBER) - { + if (token.kind == TOKEN_NUMBER) { return parser_literal_integer_node(node, &token); + } else if (token.kind == TOKEN_OPAREN) { + parser_parse_expression(parser, node); + if (!drop_expected_token(parser, TOKEN_CPAREN)) return false; + return true; } + // FIXME: Extract this erros logic to a function parser_error_t error; error.token = token; sprintf( @@ -128,9 +132,75 @@ parser_parse_next_expression(parser_t *parser, ast_node_t *node) SVARG(&token.value) ); parser->errors[parser->errors_len++] = error; + return false; } +bool +parser_parse_term(parser_t *parser, ast_node_t *node) +{ + if (!parser_parse_factor(parser, node)) return false; + + token_t token; + lexer_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("*")) || string_view_eq(token.value, string_view_from_str("/")))) { + ast_node_t *binary_op = ast_node_new(); + binary_op->kind = AST_BINARY_OPERATION; + binary_op->data.binary_operation.op = token.value; + + binary_op->data.binary_operation.left = ast_node_new(); + *binary_op->data.binary_operation.left = *node; + + binary_op->data.binary_operation.right = ast_node_new(); + if (!parser_parse_factor(parser, binary_op->data.binary_operation.right)) return false; + + *node = *binary_op; + + lexer_next_token(parser->lexer, &token); + } + + lexer_step_back_to(parser->lexer, &token); + + return true; +} + +/** + * + * ::= (('+' | '-') term)* + * ::= (('*' | '/') factor)* + * ::= | '(' ')' + * + */ +bool +parser_parse_expression(parser_t *parser, ast_node_t *node) +{ + if (!parser_parse_term(parser, node)) return false; + + token_t token; + lexer_next_token(parser->lexer, &token); + + while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("+")) || string_view_eq(token.value, string_view_from_str("-")))) { + + ast_node_t *binary_op = ast_node_new(); + binary_op->kind = AST_BINARY_OPERATION; + binary_op->data.binary_operation.op = token.value; + + binary_op->data.binary_operation.left = ast_node_new(); + *binary_op->data.binary_operation.left = *node; + + binary_op->data.binary_operation.right = ast_node_new(); + if (!parser_parse_term(parser, binary_op->data.binary_operation.right)) return false; + + *node = *binary_op; + lexer_next_token(parser->lexer, &token); + } + + lexer_step_back_to(parser->lexer, &token); + + return true; +} + bool parser_parse_return_stmt(parser_t *parser, ast_node_t *node) { @@ -155,7 +225,7 @@ parser_parse_return_stmt(parser_t *parser, ast_node_t *node) } ast_node_t *argument_token = ast_node_new(); - if (!parser_parse_next_expression(parser, argument_token)) return false; + if (!parser_parse_expression(parser, argument_token)) return false; if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false; if (!drop_expected_token(parser, TOKEN_CCURLY)) return false; diff --git a/src/parser.h b/src/parser.h index 5f73ff3..00d1aae 100644 --- a/src/parser.h +++ b/src/parser.h @@ -35,6 +35,7 @@ typedef struct parser_t { void parser_init(parser_t *parser, lexer_t *lexer); bool parser_parse_function_declaration(parser_t *parser, ast_node_t *node); +bool parser_parse_expression(parser_t *parser, ast_node_t *node); #endif /* PARSER_H */ diff --git a/test/parser_test.c b/test/parser_test.c index 573296f..2e73fa6 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -19,6 +19,9 @@ #include "lexer.h" #include "parser.h" #include "ast.h" +#include "string.h" + +void assert_string_view_equal(char *expected, string_view_t actual); void make_lexer_from_static_src(lexer_t *lexer, char *src) @@ -55,7 +58,7 @@ test_parse_function(const MunitParameter params[], parser_t parser; lexer_t lexer; - make_lexer_from_static_src(&lexer, "main(): i32 { return 42; }"); + make_lexer_from_static_src(&lexer, "main(): i32 { \nreturn 42;\n }"); parser_init(&parser, &lexer); ast_node_t *ast_function = ast_node_new(); @@ -82,6 +85,74 @@ test_parse_function(const MunitParameter params[], return MUNIT_OK; } +static MunitResult +test_parse_arithmetic_expression(const MunitParameter params[], + void *user_data_or_fixture) +{ + parser_t parser; + lexer_t lexer; + + make_lexer_from_static_src(&lexer, "1 + 3 * 3 / 2 - 1"); + parser_init(&parser, &lexer); + + ast_node_t *ast_expression = ast_node_new(); + bool parsed = parser_parse_expression(&parser, ast_expression); + assert_true(parsed); + + ast_node_t *exp1 = ast_expression; + { + + assert_int(AST_BINARY_OPERATION, ==, exp1->kind); + assert_string_view_equal("-", exp1->data.binary_operation.op); + + ast_node_t *exp2 = exp1->data.binary_operation.left; + { + + assert_int(AST_BINARY_OPERATION, ==, exp2->kind); + assert_string_view_equal("+", exp2->data.binary_operation.op); + + assert_int(AST_LITERAL, ==, exp2->data.binary_operation.left->kind); + assert_int(exp2->data.binary_operation.left->data.literal.value.integer, ==, 1); + + ast_node_t *exp3 = exp2->data.binary_operation.right; + { + + assert_int(AST_BINARY_OPERATION, ==, exp3->kind); + assert_string_view_equal("/", exp3->data.binary_operation.op); + + ast_node_t *exp4 = exp3->data.binary_operation.left; + { + assert_int(AST_BINARY_OPERATION, ==, exp4->kind); + assert_string_view_equal("*", exp4->data.binary_operation.op); + + assert_int(AST_LITERAL, ==, exp4->data.binary_operation.left->kind); + assert_int(exp4->data.binary_operation.left->data.literal.value.integer, ==, 3); + + assert_int(AST_LITERAL, ==, exp4->data.binary_operation.right->kind); + assert_int(exp4->data.binary_operation.right->data.literal.value.integer, ==, 3); + } + + assert_int(AST_LITERAL, ==, exp3->data.binary_operation.right->kind); + assert_int(exp3->data.binary_operation.right->data.literal.value.integer, ==, 2); + } + } + + assert_int(AST_LITERAL, ==, exp1->data.binary_operation.right->kind); + assert_int(exp1->data.binary_operation.right->data.literal.value.integer, ==, 1); + } + + ast_node_destroy(ast_expression); + + return MUNIT_OK; +} + +void assert_string_view_equal(char *expected, string_view_t actual) +{ + size_t expected_len = strlen(expected); + assert_int(expected_len, ==, actual.size); + assert_memory_equal(expected_len, expected, actual.str); +} + static MunitResult test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or_fixture) @@ -104,6 +175,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], static MunitTest tests[] = { { "/test_parse_function", test_parse_function, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_parse_basic_syntax_errors", test_parse_basic_syntax_errors, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_parse_arithmetic_expression", test_parse_arithmetic_expression, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } }; -- cgit v1.2.3