diff options
-rw-r--r-- | src/ast.c | 45 | ||||
-rw-r--r-- | src/ast.h | 29 | ||||
-rw-r--r-- | src/gas_assembly_generator.c | 4 | ||||
-rw-r--r-- | src/lexer.c | 3 | ||||
-rw-r--r-- | src/parser.c | 153 | ||||
-rw-r--r-- | src/parser.h | 1 | ||||
-rw-r--r-- | test/Makefile | 2 | ||||
-rw-r--r-- | test/lexer_test.c | 1 | ||||
-rw-r--r-- | test/parser_test.c | 56 |
9 files changed, 241 insertions, 53 deletions
@@ -67,20 +67,35 @@ ast_node_new() return node; } +static void +ast_node_destroy_vector(vector_t *vector) +{ + for (size_t i=0; i < vector->size; i++) { + ast_node_destroy(vector_at(vector, i)); + } + vector_destroy(vector); +} + void ast_node_destroy(ast_node_t *node) { switch (node->kind) { case AST_FUNCTION_DECLARATION: - ast_node_destroy(node->data.function.body); + ast_node_destroy_vector(node->data.function.body); break; case AST_BINARY_OPERATION: ast_node_destroy(node->data.binary_operation.left); ast_node_destroy(node->data.binary_operation.right); break; + case AST_VARIABLE_DECLARATION: + ast_node_destroy(node->data.variable.value); + break; case AST_LITERAL: break; case AST_RETURN_STMT: + ast_node_destroy(node->data.return_stmt.argument); + break; + case AST_IDENTIFIER: break; case AST_UNKOWN_NODE: break; @@ -103,7 +118,7 @@ ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument) } void -ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, ast_node_t* body) +ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, vector_t *body) { node->accept_visitor = &ast_node_function_accept_visitor, node->kind = AST_FUNCTION_DECLARATION; @@ -144,3 +159,29 @@ ast_node_init_binary_operation(ast_node_t *node, string_view_t op, ast_node_t *l } }; } + +void +ast_node_init_variable_declaration(ast_node_t *node, string_view_t variable_name, type_t type, ast_node_t *value) +{ + // FIXME: define the visitor strategy + node->kind = AST_VARIABLE_DECLARATION; + node->data = (ast_node_data_t) { + .variable = { + .name = variable_name, + .type = type, + .value = value + } + }; +} + +void +ast_node_init_identifier(ast_node_t *node, string_view_t name) +{ + // FIXME: define the visitor strategy + node->kind = AST_IDENTIFIER; + node->data = (ast_node_data_t) { + .variable = { + .name = name, + } + }; +} @@ -18,6 +18,7 @@ #define AST_H #include <stdint.h> #include "string_view.h" +#include "vector.h" #define ast_visitor_visit(visitor, node) ast_node_accept_visitor(node, (ast_visitor_t *) visitor); @@ -35,7 +36,7 @@ typedef struct ast_return_stmt_t { typedef struct ast_function_declaration_t { string_view_t name; type_t return_type; - ast_node_t* body; + vector_t* body; } ast_function_declaration_t; typedef struct ast_binary_operation_t { @@ -58,6 +59,16 @@ typedef struct ast_literal_t { ast_literal_value_t value; } ast_literal_t; +typedef struct ast_identifier_t { + string_view_t name; +} ast_identifier_t; + +typedef struct ast_variable_declaration_t { + string_view_t name; + type_t type; + ast_node_t* value; +} ast_variable_declaration_t; + typedef struct ast_visitor_t { void (*visit_function)(struct ast_visitor_t *, ast_function_declaration_t *); void (*visit_return_stmt)(struct ast_visitor_t *, ast_return_stmt_t *); @@ -66,18 +77,22 @@ typedef struct ast_visitor_t { } ast_visitor_t; typedef enum { - AST_FUNCTION_DECLARATION, AST_BINARY_OPERATION, + AST_FUNCTION_DECLARATION, + AST_IDENTIFIER, AST_LITERAL, AST_RETURN_STMT, - AST_UNKOWN_NODE + AST_UNKOWN_NODE, + AST_VARIABLE_DECLARATION, } ast_node_kind_t; typedef union { - ast_function_declaration_t function; ast_binary_operation_t binary_operation; + ast_function_declaration_t function; ast_literal_t literal; ast_return_stmt_t return_stmt; + ast_variable_declaration_t variable; + ast_identifier_t identifier; } ast_node_data_t; typedef struct ast_node_t { @@ -91,9 +106,11 @@ void ast_node_accept_visitor(ast_node_t *node, ast_visitor_t *visitor); ast_node_t* ast_node_new(); void ast_node_destroy(ast_node_t *node); -void ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, ast_node_t *body); -void ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument); void ast_node_init_binary_operation(ast_node_t *node, string_view_t op, ast_node_t *left, ast_node_t *right); +void ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, vector_t *body); +void ast_node_init_identifier(ast_node_t *node, string_view_t name); +void ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument); +void ast_node_init_variable_declaration(ast_node_t *node, string_view_t variable_name, type_t type, ast_node_t *value); // FIXME: use the naming convention void ast_literal_integer_create(ast_node_t* node, uint32_t number); diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c index 10a67f3..5572cb7 100644 --- a/src/gas_assembly_generator.c +++ b/src/gas_assembly_generator.c @@ -54,7 +54,9 @@ gas_assembly_generator_visit_function(ast_visitor_t *visitor, ast_function_decla fprintf(gen->out,".text\n"); fprintf(gen->out,"_start:\n"); - ast_visitor_visit(visitor, func->body); + for (size_t i=0; i < func->body->size; i++) { + ast_visitor_visit(visitor, vector_at(func->body, i)); + } } static void diff --git a/src/lexer.c b/src/lexer.c index b641752..e1f0d80 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -154,7 +154,8 @@ lexer_next_token(lexer_t *lexer, token_t *token) if (lexer_current_char(lexer) == '+' || lexer_current_char(lexer) == '-' || lexer_current_char(lexer) == '*' - || lexer_current_char(lexer) == '/') { + || lexer_current_char(lexer) == '/' + || lexer_current_char(lexer) == '=') { lexer_define_literal_token_props(lexer, token, TOKEN_OP); lexer_drop_char(lexer); return; diff --git a/src/parser.c b/src/parser.c index a123648..5399cfd 100644 --- a/src/parser.c +++ b/src/parser.c @@ -24,6 +24,7 @@ #include "ast.h" #include "lexer.h" #include "parser.h" +#include "vector.h" void parser_init(parser_t *parser, lexer_t *lexer) @@ -34,32 +35,27 @@ parser_init(parser_t *parser, lexer_t *lexer) parser->errors_len = 0; } -static bool -expected_token(token_t *token, parser_t *parser, token_kind_t kind) +static void +parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected) { - lexer_next_token(parser->lexer, token); - parser_error_t error; + parser_error_t* error = &parser->errors[parser->errors_len++]; + error->token = *token; if (token->kind == TOKEN_EOF) { - error.token = *token; - sprintf( - error.message, - "expected '%s' but got end of file", - token_kind_to_str(kind) - ); - - parser->errors[parser->errors_len++] = error; - return false; + sprintf(error->message, "expected '%s' but got end of file", token_kind_to_str(expected)); + return; } + sprintf(error->message, "expected '%s' but got '%s'", token_kind_to_str(expected), token_kind_to_str(token->kind)); +} + +static bool +expected_token(token_t *token, parser_t *parser, token_kind_t kind) +{ + lexer_next_token(parser->lexer, token); + if (token->kind != kind) { - error.token = *token; - sprintf( - error.message, - "expected '%s' but got '%s'", - token_kind_to_str(kind), token_kind_to_str(token->kind) - ); - parser->errors[parser->errors_len++] = error; + parser_error_push_unexpected_kind(parser, token, kind); return false; } @@ -120,6 +116,10 @@ parser_parse_factor(parser_t *parser, ast_node_t *node) parser_parse_expression(parser, node); if (!drop_expected_token(parser, TOKEN_CPAREN)) return false; return true; + } else if (token.kind == TOKEN_NAME) { + /// FIXME: Check if the identifier is defined + ast_node_init_identifier(node, token.value); + return true; } // FIXME: Extract this erros logic to a function @@ -196,33 +196,112 @@ parser_parse_expression(parser_t *parser, ast_node_t *node) bool parser_parse_return_stmt(parser_t *parser, ast_node_t *node) { - token_t return_keyword_token; + ast_node_t *argument_token = ast_node_new(); + if (!parser_parse_expression(parser, argument_token)) return false; - if (!drop_expected_token(parser, TOKEN_OCURLY)) return false; + if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false; + + ast_node_init_return_stmt(node, argument_token); + return true; +} + +bool +parser_parse_variable_definition(parser_t *parser, string_view_t variable_name, ast_node_t *node) +{ + if (!drop_expected_token(parser, TOKEN_COLON)) return false; + + type_t type; + + // FIXME: change the parameters order + if (!parser_parse_type(&type, parser)) return false; - if(!expected_token(&return_keyword_token, parser, TOKEN_NAME)) return false; + token_t equal_token; - if (!string_view_eq(return_keyword_token.value, string_view_from_str("return"))) { + if (!expected_token(&equal_token, parser, TOKEN_OP)) return false; + + if (!string_view_eq(equal_token.value, string_view_from_str("="))) { parser_error_t error; - error.token = return_keyword_token; + error.token = equal_token; sprintf( error.message, - "expected 'return' keyword but got '"SVFMT"'", - SVARG(&return_keyword_token.value) + "expected '=' but got "SVFMT, + SVARG(&equal_token.value) ); parser->errors[parser->errors_len++] = error; return false; } - ast_node_t *argument_token = ast_node_new(); - if (!parser_parse_expression(parser, argument_token)) return false; + ast_node_t *expression = ast_node_new(); - if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false; - if (!drop_expected_token(parser, TOKEN_CCURLY)) return false; + if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) { + ast_node_destroy(expression); + return false; + } + + ast_node_init_variable_declaration(node, variable_name, type, expression); - ast_node_init_return_stmt(node, argument_token); + return true; +} + +bool +parser_parse_block_declarations(parser_t *parser, vector_t *body) +{ + token_t current_token; + lexer_next_token(parser->lexer, ¤t_token); + + while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) { + if (current_token.kind != TOKEN_NAME) { + parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_NAME); + return false; + } + + if (string_view_eq(current_token.value, string_view_from_str("return"))) { + ast_node_t *return_node = ast_node_new(); + bool parsed_return = parser_parse_return_stmt(parser, return_node); + + if (!parsed_return) { + ast_node_destroy(return_node); + return false; + } + + vector_push_back(body, return_node); + } + else { + ast_node_t *variable_node = ast_node_new(); + bool parsed_variable = parser_parse_variable_definition(parser, current_token.value, variable_node); + + if (!parsed_variable) { + ast_node_destroy(variable_node); + return false; + } + + vector_push_back(body, variable_node); + } + + lexer_next_token(parser->lexer, ¤t_token); + } + + if (current_token.kind != TOKEN_CCURLY) { + parser_error_push_unexpected_kind(parser, ¤t_token, TOKEN_CCURLY); + return false; + } + + ast_node_t *latest_node = vector_at(body, body->size - 1); + + if (latest_node->kind != AST_RETURN_STMT) { + parser_error_t error; + error.token = current_token; + + sprintf( + error.message, + "expected 'return' keyword." + ); + + parser->errors[parser->errors_len++] = error; + return false; + } return true; } @@ -245,19 +324,17 @@ parser_parse_function_declaration(parser_t *parser, ast_node_t *node) return false; } - ast_node_t *return_node = ast_node_new(); + if (!drop_expected_token(parser, TOKEN_OCURLY)) return false; - bool parsed_return = parser_parse_return_stmt(parser, return_node); + vector_t* body = vector_new(); - if (!parsed_return) { - return false; - } + if (!parser_parse_block_declarations(parser, body)) return false; ast_node_init_function_declaration( node, func_name_token.value, return_type, - return_node + body ); return true; diff --git a/src/parser.h b/src/parser.h index 00d1aae..4b6b5a9 100644 --- a/src/parser.h +++ b/src/parser.h @@ -29,6 +29,7 @@ typedef struct parser_error_t { typedef struct parser_t { lexer_t *lexer; int errors_len; + // FIXME: replace with vector parser_error_t errors[1]; } parser_t; diff --git a/test/Makefile b/test/Makefile index 09adcfb..40f5cf9 100644 --- a/test/Makefile +++ b/test/Makefile @@ -23,7 +23,7 @@ vector_test: munit.o ../build/vector.o vector_test.o lexer_test: munit.o ../build/string_view.o ../build/lexer.o lexer_test.o $(CC) $? $(CFLAGS) -o $@ -parser_test: munit.o ../build/string_view.o ../build/lexer.o ../build/ast.o ../build/parser.o parser_test.o +parser_test: munit.o ../build/string_view.o ../build/vector.o ../build/lexer.o ../build/ast.o ../build/parser.o parser_test.o $(CC) $? $(CFLAGS) -o $@ integration_test: munit.o integration_test.o diff --git a/test/lexer_test.c b/test/lexer_test.c index ae713b3..0eebfe5 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -71,6 +71,7 @@ test_tokenize_op(const MunitParameter params[], assert_token_at(" - \n", 0, TOKEN_OP, "-"); assert_token_at(" * ;", 0, TOKEN_OP, "*"); assert_token_at(" / ", 0, TOKEN_OP, "/"); + assert_token_at(" = ", 0, TOKEN_OP, "="); assert_token_at("1 * 2", 0, TOKEN_NUMBER, "1"); assert_token_at("1 * 2", 1, TOKEN_OP, "*"); diff --git a/test/parser_test.c b/test/parser_test.c index 2e73fa6..daa2fe7 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -15,11 +15,12 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ #define MUNIT_ENABLE_ASSERT_ALIASES -#include "munit.h" +#include "ast.h" #include "lexer.h" +#include "munit.h" #include "parser.h" -#include "ast.h" #include "string.h" +#include "vector.h" void assert_string_view_equal(char *expected, string_view_t actual); @@ -49,6 +50,8 @@ assert_parser_error(char* src, char* error_msg) { assert_false(parsed); assert_int(1, ==, parser.errors_len); assert_string_equal(error_msg, parser.errors[0].message); + + ast_node_destroy(ast_function); } static MunitResult @@ -71,7 +74,7 @@ test_parse_function(const MunitParameter params[], assert_string_equal("main", actual); assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); - ast_node_t *ast_return = ast_function->data.function.body; + ast_node_t *ast_return = vector_at(ast_function->data.function.body, 0); assert_int(AST_RETURN_STMT, ==, ast_return->kind); @@ -86,6 +89,46 @@ test_parse_function(const MunitParameter params[], } static MunitResult +test_parse_variable_definition(const MunitParameter params[], + void *user_data_or_fixture) +{ + parser_t parser; + lexer_t lexer; + + make_lexer_from_static_src(&lexer, "main(): i32 { \nvariable : i32 = 42; \nreturn variable;\n }"); + parser_init(&parser, &lexer); + ast_node_t *ast_function = ast_node_new(); + + bool parsed = parser_parse_function_declaration(&parser, ast_function); + + assert_true(parsed); + + char actual[5]; + + string_view_to_str(&ast_function->data.function.name, actual); + assert_string_equal("main", actual); + assert_int(AST_FUNCTION_DECLARATION, ==, ast_function->kind); + + ast_node_t *ast_variable = vector_at(ast_function->data.function.body, 0); + + assert_int(AST_VARIABLE_DECLARATION, ==, ast_variable->kind); + assert_string_view_equal("variable", ast_variable->data.variable.name); + + ast_node_t *ast_return = vector_at(ast_function->data.function.body, 1); + + assert_int(AST_RETURN_STMT, ==, ast_return->kind); + + ast_node_t *ast_literal = ast_return->data.return_stmt.argument; + + assert_int(AST_IDENTIFIER, ==, ast_literal->kind); + assert_string_view_equal("variable", ast_literal->data.identifier.name); + + ast_node_destroy(ast_function); + + return MUNIT_OK; +} + +static MunitResult test_parse_arithmetic_expression(const MunitParameter params[], void *user_data_or_fixture) { @@ -167,15 +210,20 @@ test_parse_basic_syntax_errors(const MunitParameter params[], assert_parser_error("main(): i32 { return; }" , "unexpected '; (;)' token"); assert_parser_error("main(): i32 { return 42;" , "expected '}' but got end of file"); assert_parser_error("main(): beff { return 42; }" , "type 'beff' is not defined"); - assert_parser_error("main(): i32 { oxi 42; }" , "expected 'return' keyword but got 'oxi'"); + // FIXME: once function calls are implemented, this error should inform that + // neither a variable or function call was found. + assert_parser_error("main(): i32 { oxi 42; }" , "expected ':' but got 'TOKEN_NUMBER'"); return MUNIT_OK; } static MunitTest tests[] = { { "/test_parse_function", test_parse_function, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_parse_function", test_parse_function, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_parse_basic_syntax_errors", test_parse_basic_syntax_errors, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_parse_arithmetic_expression", test_parse_arithmetic_expression, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_parse_arithmetic_expression", test_parse_arithmetic_expression, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_parse_variable_definition", test_parse_variable_definition, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } }; |