summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCarlos Maniero <carlosmaniero@gmail.com>2023-04-25 03:52:59 -0300
committerJohnny Richard <johnny@johnnyrichard.com>2023-04-25 22:54:22 +0200
commitc1a1bd2320b4c1508c4ab20d23b7c193a94d8026 (patch)
treeea54aa17ba0223b9ff58087bdcb6af778186702e /src
parentcca2c345f9b16b88e1fc4e9ea598cf2f7ed8653a (diff)
parser: Add support for variables and identifiers in function body
This commit adds support for variables and identifiers in the function body of the parser, stored as a vector. However, at this point, identifier resolution is not fully implemented, and we currently accept identifiers without checking if they can be resolved. This is a known limitation that will be addressed in a future commit once hash-tables are added to the parser. Signed-off-by: Carlos Maniero <carlosmaniero@gmail.com> Reviewed-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src')
-rw-r--r--src/ast.c45
-rw-r--r--src/ast.h29
-rw-r--r--src/gas_assembly_generator.c4
-rw-r--r--src/lexer.c3
-rw-r--r--src/parser.c153
-rw-r--r--src/parser.h1
6 files changed, 187 insertions, 48 deletions
diff --git a/src/ast.c b/src/ast.c
index b2efc42..645a2dd 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -67,20 +67,35 @@ ast_node_new()
return node;
}
+static void
+ast_node_destroy_vector(vector_t *vector)
+{
+ for (size_t i=0; i < vector->size; i++) {
+ ast_node_destroy(vector_at(vector, i));
+ }
+ vector_destroy(vector);
+}
+
void
ast_node_destroy(ast_node_t *node)
{
switch (node->kind) {
case AST_FUNCTION_DECLARATION:
- ast_node_destroy(node->data.function.body);
+ ast_node_destroy_vector(node->data.function.body);
break;
case AST_BINARY_OPERATION:
ast_node_destroy(node->data.binary_operation.left);
ast_node_destroy(node->data.binary_operation.right);
break;
+ case AST_VARIABLE_DECLARATION:
+ ast_node_destroy(node->data.variable.value);
+ break;
case AST_LITERAL:
break;
case AST_RETURN_STMT:
+ ast_node_destroy(node->data.return_stmt.argument);
+ break;
+ case AST_IDENTIFIER:
break;
case AST_UNKOWN_NODE:
break;
@@ -103,7 +118,7 @@ ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument)
}
void
-ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, ast_node_t* body)
+ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, vector_t *body)
{
node->accept_visitor = &ast_node_function_accept_visitor,
node->kind = AST_FUNCTION_DECLARATION;
@@ -144,3 +159,29 @@ ast_node_init_binary_operation(ast_node_t *node, string_view_t op, ast_node_t *l
}
};
}
+
+void
+ast_node_init_variable_declaration(ast_node_t *node, string_view_t variable_name, type_t type, ast_node_t *value)
+{
+ // FIXME: define the visitor strategy
+ node->kind = AST_VARIABLE_DECLARATION;
+ node->data = (ast_node_data_t) {
+ .variable = {
+ .name = variable_name,
+ .type = type,
+ .value = value
+ }
+ };
+}
+
+void
+ast_node_init_identifier(ast_node_t *node, string_view_t name)
+{
+ // FIXME: define the visitor strategy
+ node->kind = AST_IDENTIFIER;
+ node->data = (ast_node_data_t) {
+ .variable = {
+ .name = name,
+ }
+ };
+}
diff --git a/src/ast.h b/src/ast.h
index 70852b1..5ba21ad 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -18,6 +18,7 @@
#define AST_H
#include <stdint.h>
#include "string_view.h"
+#include "vector.h"
#define ast_visitor_visit(visitor, node) ast_node_accept_visitor(node, (ast_visitor_t *) visitor);
@@ -35,7 +36,7 @@ typedef struct ast_return_stmt_t {
typedef struct ast_function_declaration_t {
string_view_t name;
type_t return_type;
- ast_node_t* body;
+ vector_t* body;
} ast_function_declaration_t;
typedef struct ast_binary_operation_t {
@@ -58,6 +59,16 @@ typedef struct ast_literal_t {
ast_literal_value_t value;
} ast_literal_t;
+typedef struct ast_identifier_t {
+ string_view_t name;
+} ast_identifier_t;
+
+typedef struct ast_variable_declaration_t {
+ string_view_t name;
+ type_t type;
+ ast_node_t* value;
+} ast_variable_declaration_t;
+
typedef struct ast_visitor_t {
void (*visit_function)(struct ast_visitor_t *, ast_function_declaration_t *);
void (*visit_return_stmt)(struct ast_visitor_t *, ast_return_stmt_t *);
@@ -66,18 +77,22 @@ typedef struct ast_visitor_t {
} ast_visitor_t;
typedef enum {
- AST_FUNCTION_DECLARATION,
AST_BINARY_OPERATION,
+ AST_FUNCTION_DECLARATION,
+ AST_IDENTIFIER,
AST_LITERAL,
AST_RETURN_STMT,
- AST_UNKOWN_NODE
+ AST_UNKOWN_NODE,
+ AST_VARIABLE_DECLARATION,
} ast_node_kind_t;
typedef union {
- ast_function_declaration_t function;
ast_binary_operation_t binary_operation;
+ ast_function_declaration_t function;
ast_literal_t literal;
ast_return_stmt_t return_stmt;
+ ast_variable_declaration_t variable;
+ ast_identifier_t identifier;
} ast_node_data_t;
typedef struct ast_node_t {
@@ -91,9 +106,11 @@ void ast_node_accept_visitor(ast_node_t *node, ast_visitor_t *visitor);
ast_node_t* ast_node_new();
void ast_node_destroy(ast_node_t *node);
-void ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, ast_node_t *body);
-void ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument);
void ast_node_init_binary_operation(ast_node_t *node, string_view_t op, ast_node_t *left, ast_node_t *right);
+void ast_node_init_function_declaration(ast_node_t *node, string_view_t name, type_t return_type, vector_t *body);
+void ast_node_init_identifier(ast_node_t *node, string_view_t name);
+void ast_node_init_return_stmt(ast_node_t *node, ast_node_t *argument);
+void ast_node_init_variable_declaration(ast_node_t *node, string_view_t variable_name, type_t type, ast_node_t *value);
// FIXME: use the naming convention
void ast_literal_integer_create(ast_node_t* node, uint32_t number);
diff --git a/src/gas_assembly_generator.c b/src/gas_assembly_generator.c
index 10a67f3..5572cb7 100644
--- a/src/gas_assembly_generator.c
+++ b/src/gas_assembly_generator.c
@@ -54,7 +54,9 @@ gas_assembly_generator_visit_function(ast_visitor_t *visitor, ast_function_decla
fprintf(gen->out,".text\n");
fprintf(gen->out,"_start:\n");
- ast_visitor_visit(visitor, func->body);
+ for (size_t i=0; i < func->body->size; i++) {
+ ast_visitor_visit(visitor, vector_at(func->body, i));
+ }
}
static void
diff --git a/src/lexer.c b/src/lexer.c
index b641752..e1f0d80 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -154,7 +154,8 @@ lexer_next_token(lexer_t *lexer, token_t *token)
if (lexer_current_char(lexer) == '+'
|| lexer_current_char(lexer) == '-'
|| lexer_current_char(lexer) == '*'
- || lexer_current_char(lexer) == '/') {
+ || lexer_current_char(lexer) == '/'
+ || lexer_current_char(lexer) == '=') {
lexer_define_literal_token_props(lexer, token, TOKEN_OP);
lexer_drop_char(lexer);
return;
diff --git a/src/parser.c b/src/parser.c
index a123648..5399cfd 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -24,6 +24,7 @@
#include "ast.h"
#include "lexer.h"
#include "parser.h"
+#include "vector.h"
void
parser_init(parser_t *parser, lexer_t *lexer)
@@ -34,32 +35,27 @@ parser_init(parser_t *parser, lexer_t *lexer)
parser->errors_len = 0;
}
-static bool
-expected_token(token_t *token, parser_t *parser, token_kind_t kind)
+static void
+parser_error_push_unexpected_kind(parser_t *parser, token_t *token, token_kind_t expected)
{
- lexer_next_token(parser->lexer, token);
- parser_error_t error;
+ parser_error_t* error = &parser->errors[parser->errors_len++];
+ error->token = *token;
if (token->kind == TOKEN_EOF) {
- error.token = *token;
- sprintf(
- error.message,
- "expected '%s' but got end of file",
- token_kind_to_str(kind)
- );
-
- parser->errors[parser->errors_len++] = error;
- return false;
+ sprintf(error->message, "expected '%s' but got end of file", token_kind_to_str(expected));
+ return;
}
+ sprintf(error->message, "expected '%s' but got '%s'", token_kind_to_str(expected), token_kind_to_str(token->kind));
+}
+
+static bool
+expected_token(token_t *token, parser_t *parser, token_kind_t kind)
+{
+ lexer_next_token(parser->lexer, token);
+
if (token->kind != kind) {
- error.token = *token;
- sprintf(
- error.message,
- "expected '%s' but got '%s'",
- token_kind_to_str(kind), token_kind_to_str(token->kind)
- );
- parser->errors[parser->errors_len++] = error;
+ parser_error_push_unexpected_kind(parser, token, kind);
return false;
}
@@ -120,6 +116,10 @@ parser_parse_factor(parser_t *parser, ast_node_t *node)
parser_parse_expression(parser, node);
if (!drop_expected_token(parser, TOKEN_CPAREN)) return false;
return true;
+ } else if (token.kind == TOKEN_NAME) {
+ /// FIXME: Check if the identifier is defined
+ ast_node_init_identifier(node, token.value);
+ return true;
}
// FIXME: Extract this erros logic to a function
@@ -196,33 +196,112 @@ parser_parse_expression(parser_t *parser, ast_node_t *node)
bool
parser_parse_return_stmt(parser_t *parser, ast_node_t *node)
{
- token_t return_keyword_token;
+ ast_node_t *argument_token = ast_node_new();
+ if (!parser_parse_expression(parser, argument_token)) return false;
- if (!drop_expected_token(parser, TOKEN_OCURLY)) return false;
+ if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false;
+
+ ast_node_init_return_stmt(node, argument_token);
+ return true;
+}
+
+bool
+parser_parse_variable_definition(parser_t *parser, string_view_t variable_name, ast_node_t *node)
+{
+ if (!drop_expected_token(parser, TOKEN_COLON)) return false;
+
+ type_t type;
+
+ // FIXME: change the parameters order
+ if (!parser_parse_type(&type, parser)) return false;
- if(!expected_token(&return_keyword_token, parser, TOKEN_NAME)) return false;
+ token_t equal_token;
- if (!string_view_eq(return_keyword_token.value, string_view_from_str("return"))) {
+ if (!expected_token(&equal_token, parser, TOKEN_OP)) return false;
+
+ if (!string_view_eq(equal_token.value, string_view_from_str("="))) {
parser_error_t error;
- error.token = return_keyword_token;
+ error.token = equal_token;
sprintf(
error.message,
- "expected 'return' keyword but got '"SVFMT"'",
- SVARG(&return_keyword_token.value)
+ "expected '=' but got "SVFMT,
+ SVARG(&equal_token.value)
);
parser->errors[parser->errors_len++] = error;
return false;
}
- ast_node_t *argument_token = ast_node_new();
- if (!parser_parse_expression(parser, argument_token)) return false;
+ ast_node_t *expression = ast_node_new();
- if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false;
- if (!drop_expected_token(parser, TOKEN_CCURLY)) return false;
+ if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) {
+ ast_node_destroy(expression);
+ return false;
+ }
+
+ ast_node_init_variable_declaration(node, variable_name, type, expression);
- ast_node_init_return_stmt(node, argument_token);
+ return true;
+}
+
+bool
+parser_parse_block_declarations(parser_t *parser, vector_t *body)
+{
+ token_t current_token;
+ lexer_next_token(parser->lexer, &current_token);
+
+ while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) {
+ if (current_token.kind != TOKEN_NAME) {
+ parser_error_push_unexpected_kind(parser, &current_token, TOKEN_NAME);
+ return false;
+ }
+
+ if (string_view_eq(current_token.value, string_view_from_str("return"))) {
+ ast_node_t *return_node = ast_node_new();
+ bool parsed_return = parser_parse_return_stmt(parser, return_node);
+
+ if (!parsed_return) {
+ ast_node_destroy(return_node);
+ return false;
+ }
+
+ vector_push_back(body, return_node);
+ }
+ else {
+ ast_node_t *variable_node = ast_node_new();
+ bool parsed_variable = parser_parse_variable_definition(parser, current_token.value, variable_node);
+
+ if (!parsed_variable) {
+ ast_node_destroy(variable_node);
+ return false;
+ }
+
+ vector_push_back(body, variable_node);
+ }
+
+ lexer_next_token(parser->lexer, &current_token);
+ }
+
+ if (current_token.kind != TOKEN_CCURLY) {
+ parser_error_push_unexpected_kind(parser, &current_token, TOKEN_CCURLY);
+ return false;
+ }
+
+ ast_node_t *latest_node = vector_at(body, body->size - 1);
+
+ if (latest_node->kind != AST_RETURN_STMT) {
+ parser_error_t error;
+ error.token = current_token;
+
+ sprintf(
+ error.message,
+ "expected 'return' keyword."
+ );
+
+ parser->errors[parser->errors_len++] = error;
+ return false;
+ }
return true;
}
@@ -245,19 +324,17 @@ parser_parse_function_declaration(parser_t *parser, ast_node_t *node)
return false;
}
- ast_node_t *return_node = ast_node_new();
+ if (!drop_expected_token(parser, TOKEN_OCURLY)) return false;
- bool parsed_return = parser_parse_return_stmt(parser, return_node);
+ vector_t* body = vector_new();
- if (!parsed_return) {
- return false;
- }
+ if (!parser_parse_block_declarations(parser, body)) return false;
ast_node_init_function_declaration(
node,
func_name_token.value,
return_type,
- return_node
+ body
);
return true;
diff --git a/src/parser.h b/src/parser.h
index 00d1aae..4b6b5a9 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -29,6 +29,7 @@ typedef struct parser_error_t {
typedef struct parser_t {
lexer_t *lexer;
int errors_len;
+ // FIXME: replace with vector
parser_error_t errors[1];
} parser_t;