summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohnny Richard <johnny@johnnyrichard.com>2023-04-21 15:11:17 +0200
committerCarlos Maniero <carlosmaniero@gmail.com>2023-04-21 10:29:41 -0300
commitd86d70fc7c6751713a6b9f02d9f77814e2f75718 (patch)
tree15340e4f421628fa79461374c39407997f928d93
parent562fa8785f9bc3074e8b2acf524f4120add22752 (diff)
parser: Parse integers arithmetic expression
This patch implements the AST creation for arithmetic expressions. NOTE: The implementation works only for integer numbers. Signed-off-by: Johnny Richard <johnny@johnnyrichard.com> Reviewed-by: Carlos Maniero <carlosmaniero@gmail.com>
-rw-r--r--src/ast.c8
-rw-r--r--src/ast.h9
-rw-r--r--src/lexer.c11
-rw-r--r--src/lexer.h3
-rw-r--r--src/parser.c78
-rw-r--r--src/parser.h1
-rw-r--r--test/parser_test.c74
7 files changed, 177 insertions, 7 deletions
diff --git a/src/ast.c b/src/ast.c
index 7f3af9e..5c94d68 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -64,12 +64,16 @@ ast_node_destroy(ast_node_t *node)
case AST_FUNCTION_DECLARATION:
ast_node_destroy(node->data.function.body);
break;
+ case AST_BINARY_OPERATION:
+ ast_node_destroy(node->data.binary_operation.left);
+ ast_node_destroy(node->data.binary_operation.right);
+ break;
+ case AST_LITERAL:
+ break;
case AST_RETURN_STMT:
break;
case AST_UNKOWN_NODE:
break;
- case AST_LITERAL:
- break;
default:
assert(false && "unmapped free strategy");
}
diff --git a/src/ast.h b/src/ast.h
index 1c4bbb8..cb9b8d5 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -38,6 +38,13 @@ typedef struct ast_function_declaration_t {
ast_node_t* body;
} ast_function_declaration_t;
+typedef struct ast_binary_operation_t {
+ // FIXME: We want to use enum to distinguish operators
+ string_view_t op;
+ ast_node_t* left;
+ ast_node_t* right;
+} ast_binary_operation_t;
+
typedef enum {
AST_LITERAL_INTEGER
} ast_literal_kind_t;
@@ -59,6 +66,7 @@ typedef struct ast_visitor_t {
typedef enum {
AST_FUNCTION_DECLARATION,
+ AST_BINARY_OPERATION,
AST_LITERAL,
AST_RETURN_STMT,
AST_UNKOWN_NODE
@@ -66,6 +74,7 @@ typedef enum {
typedef union {
ast_function_declaration_t function;
+ ast_binary_operation_t binary_operation;
ast_literal_t literal;
ast_return_stmt_t return_stmt;
} ast_node_data_t;
diff --git a/src/lexer.c b/src/lexer.c
index 2c8ffb9..b641752 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -44,6 +44,7 @@ lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t ki
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
+ token->bol = lexer->bol;
}
static void
@@ -73,6 +74,7 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token)
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = begin - lexer->bol;
+ token->bol = lexer->bol;
}
static void
@@ -88,6 +90,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token)
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = begin - lexer->bol;
+ token->bol = lexer->bol;
}
void
@@ -197,6 +200,14 @@ lexer_load_file_contents(lexer_t *lexer)
}
void
+lexer_step_back_to(lexer_t *lexer, token_t *token)
+{
+ lexer->cur = token->bol + token->col;
+ lexer->row = token->row;
+ lexer->bol = token->bol;
+}
+
+void
lexer_drop_char(lexer_t *lexer)
{
lexer->cur++;
diff --git a/src/lexer.h b/src/lexer.h
index ae8ab60..9f45910 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -42,6 +42,7 @@ typedef struct token_t {
char *filepath;
uint32_t row;
uint32_t col;
+ uint32_t bol;
} token_t;
@@ -68,6 +69,8 @@ bool lexer_is_not_eof(lexer_t *lexer);
void lexer_drop_char(lexer_t *lexer);
+void lexer_step_back_to(lexer_t *lexer, token_t *token);
+
char * token_kind_to_str(token_kind_t kind);
#endif /* LEXER_H */
diff --git a/src/parser.c b/src/parser.c
index 176a4a8..ad38125 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -109,16 +109,20 @@ parser_literal_integer_node(ast_node_t *node, token_t *token)
}
bool
-parser_parse_next_expression(parser_t *parser, ast_node_t *node)
+parser_parse_factor(parser_t *parser, ast_node_t *node)
{
token_t token;
lexer_next_token(parser->lexer, &token);
- if (token.kind == TOKEN_NUMBER)
- {
+ if (token.kind == TOKEN_NUMBER) {
return parser_literal_integer_node(node, &token);
+ } else if (token.kind == TOKEN_OPAREN) {
+ parser_parse_expression(parser, node);
+ if (!drop_expected_token(parser, TOKEN_CPAREN)) return false;
+ return true;
}
+ // FIXME: Extract this erros logic to a function
parser_error_t error;
error.token = token;
sprintf(
@@ -128,10 +132,76 @@ parser_parse_next_expression(parser_t *parser, ast_node_t *node)
SVARG(&token.value)
);
parser->errors[parser->errors_len++] = error;
+
return false;
}
bool
+parser_parse_term(parser_t *parser, ast_node_t *node)
+{
+ if (!parser_parse_factor(parser, node)) return false;
+
+ token_t token;
+ lexer_next_token(parser->lexer, &token);
+
+ while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("*")) || string_view_eq(token.value, string_view_from_str("/")))) {
+ ast_node_t *binary_op = ast_node_new();
+ binary_op->kind = AST_BINARY_OPERATION;
+ binary_op->data.binary_operation.op = token.value;
+
+ binary_op->data.binary_operation.left = ast_node_new();
+ *binary_op->data.binary_operation.left = *node;
+
+ binary_op->data.binary_operation.right = ast_node_new();
+ if (!parser_parse_factor(parser, binary_op->data.binary_operation.right)) return false;
+
+ *node = *binary_op;
+
+ lexer_next_token(parser->lexer, &token);
+ }
+
+ lexer_step_back_to(parser->lexer, &token);
+
+ return true;
+}
+
+/**
+ *
+ * <expression> ::= <term> (('+' | '-') term)*
+ * <term> ::= <factor> (('*' | '/') factor)*
+ * <factor> ::= <integer> | '(' <expression> ')'
+ *
+ */
+bool
+parser_parse_expression(parser_t *parser, ast_node_t *node)
+{
+ if (!parser_parse_term(parser, node)) return false;
+
+ token_t token;
+ lexer_next_token(parser->lexer, &token);
+
+ while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("+")) || string_view_eq(token.value, string_view_from_str("-")))) {
+
+ ast_node_t *binary_op = ast_node_new();
+ binary_op->kind = AST_BINARY_OPERATION;
+ binary_op->data.binary_operation.op = token.value;
+
+ binary_op->data.binary_operation.left = ast_node_new();
+ *binary_op->data.binary_operation.left = *node;
+
+ binary_op->data.binary_operation.right = ast_node_new();
+ if (!parser_parse_term(parser, binary_op->data.binary_operation.right)) return false;
+
+ *node = *binary_op;
+ lexer_next_token(parser->lexer, &token);
+ }
+
+ lexer_step_back_to(parser->lexer, &token);
+
+ return true;
+}
+
+bool
parser_parse_return_stmt(parser_t *parser, ast_node_t *node)
{
token_t return_keyword_token;
@@ -155,7 +225,7 @@ parser_parse_return_stmt(parser_t *parser, ast_node_t *node)
}
ast_node_t *argument_token = ast_node_new();
- if (!parser_parse_next_expression(parser, argument_token)) return false;
+ if (!parser_parse_expression(parser, argument_token)) return false;
if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false;
if (!drop_expected_token(parser, TOKEN_CCURLY)) return false;
diff --git a/src/parser.h b/src/parser.h
index 5f73ff3..00d1aae 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -35,6 +35,7 @@ typedef struct parser_t {
void parser_init(parser_t *parser, lexer_t *lexer);
bool parser_parse_function_declaration(parser_t *parser, ast_node_t *node);
+bool parser_parse_expression(parser_t *parser, ast_node_t *node);
#endif /* PARSER_H */
diff --git a/test/parser_test.c b/test/parser_test.c
index 573296f..2e73fa6 100644
--- a/test/parser_test.c
+++ b/test/parser_test.c
@@ -19,6 +19,9 @@
#include "lexer.h"
#include "parser.h"
#include "ast.h"
+#include "string.h"
+
+void assert_string_view_equal(char *expected, string_view_t actual);
void
make_lexer_from_static_src(lexer_t *lexer, char *src)
@@ -55,7 +58,7 @@ test_parse_function(const MunitParameter params[],
parser_t parser;
lexer_t lexer;
- make_lexer_from_static_src(&lexer, "main(): i32 { return 42; }");
+ make_lexer_from_static_src(&lexer, "main(): i32 { \nreturn 42;\n }");
parser_init(&parser, &lexer);
ast_node_t *ast_function = ast_node_new();
@@ -83,6 +86,74 @@ test_parse_function(const MunitParameter params[],
}
static MunitResult
+test_parse_arithmetic_expression(const MunitParameter params[],
+ void *user_data_or_fixture)
+{
+ parser_t parser;
+ lexer_t lexer;
+
+ make_lexer_from_static_src(&lexer, "1 + 3 * 3 / 2 - 1");
+ parser_init(&parser, &lexer);
+
+ ast_node_t *ast_expression = ast_node_new();
+ bool parsed = parser_parse_expression(&parser, ast_expression);
+ assert_true(parsed);
+
+ ast_node_t *exp1 = ast_expression;
+ {
+
+ assert_int(AST_BINARY_OPERATION, ==, exp1->kind);
+ assert_string_view_equal("-", exp1->data.binary_operation.op);
+
+ ast_node_t *exp2 = exp1->data.binary_operation.left;
+ {
+
+ assert_int(AST_BINARY_OPERATION, ==, exp2->kind);
+ assert_string_view_equal("+", exp2->data.binary_operation.op);
+
+ assert_int(AST_LITERAL, ==, exp2->data.binary_operation.left->kind);
+ assert_int(exp2->data.binary_operation.left->data.literal.value.integer, ==, 1);
+
+ ast_node_t *exp3 = exp2->data.binary_operation.right;
+ {
+
+ assert_int(AST_BINARY_OPERATION, ==, exp3->kind);
+ assert_string_view_equal("/", exp3->data.binary_operation.op);
+
+ ast_node_t *exp4 = exp3->data.binary_operation.left;
+ {
+ assert_int(AST_BINARY_OPERATION, ==, exp4->kind);
+ assert_string_view_equal("*", exp4->data.binary_operation.op);
+
+ assert_int(AST_LITERAL, ==, exp4->data.binary_operation.left->kind);
+ assert_int(exp4->data.binary_operation.left->data.literal.value.integer, ==, 3);
+
+ assert_int(AST_LITERAL, ==, exp4->data.binary_operation.right->kind);
+ assert_int(exp4->data.binary_operation.right->data.literal.value.integer, ==, 3);
+ }
+
+ assert_int(AST_LITERAL, ==, exp3->data.binary_operation.right->kind);
+ assert_int(exp3->data.binary_operation.right->data.literal.value.integer, ==, 2);
+ }
+ }
+
+ assert_int(AST_LITERAL, ==, exp1->data.binary_operation.right->kind);
+ assert_int(exp1->data.binary_operation.right->data.literal.value.integer, ==, 1);
+ }
+
+ ast_node_destroy(ast_expression);
+
+ return MUNIT_OK;
+}
+
+void assert_string_view_equal(char *expected, string_view_t actual)
+{
+ size_t expected_len = strlen(expected);
+ assert_int(expected_len, ==, actual.size);
+ assert_memory_equal(expected_len, expected, actual.str);
+}
+
+static MunitResult
test_parse_basic_syntax_errors(const MunitParameter params[],
void *user_data_or_fixture)
{
@@ -104,6 +175,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[],
static MunitTest tests[] = {
{ "/test_parse_function", test_parse_function, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ "/test_parse_basic_syntax_errors", test_parse_basic_syntax_errors, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_parse_arithmetic_expression", test_parse_arithmetic_expression, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};