summaryrefslogtreecommitdiff
path: root/src/parser.c
diff options
context:
space:
mode:
authorJohnny Richard <johnny@johnnyrichard.com>2024-10-30 22:58:03 +0100
committerJohnny Richard <johnny@johnnyrichard.com>2025-12-14 09:53:52 +0100
commit10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 (patch)
tree7a4b3f69a461301c45204ed856b61f92a7d42233 /src/parser.c
bootstrap projectHEADmaster
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c224
1 files changed, 224 insertions, 0 deletions
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..d95e6f9
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2025 Johnny Richard <johnny@johnnyrichard.com>
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ *
+ * This file is part of obe.
+ *
+ * obe is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * obe is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with obe. If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <assert.h>
+#include <obe/array.h>
+#include <obe/lexer.h>
+#include <obe/parser.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static obe_ir_function_t obe_parser_parse_function(obe_parser_t* parser);
+
+inline static bool expected_next_token(obe_lexer_t *lexer, obe_token_t *token,
+ obe_token_kind_t expected_kind);
+
+static bool expected_token(obe_lexer_t *lexer, obe_token_t *token,
+ obe_token_kind_t expected_kind);
+
+void
+obe_parser_init(obe_parser_t* parser, obe_lexer_t* lexer, obe_arena_t* arena)
+{
+ assert(parser && lexer && arena);
+ parser->lexer = lexer;
+ parser->arena = arena;
+}
+
+void
+obe_parser_next_expected_token(obe_parser_t* parser,
+ obe_token_t* token,
+ obe_token_kind_t kind)
+{
+ obe_lexer_next_token(parser->lexer, token);
+ if (token->kind != kind) {
+ fprintf(stderr,
+ "%s:%zu:%zu: syntax error: expected token <%s> but got <%s>.\n",
+ parser->lexer->filename,
+ token->loc.lineno + 1,
+ token->loc.offset - token->loc.lineoffset + 1,
+ obe_token_to_cstr(kind),
+ obe_token_to_cstr(token->kind));
+ exit(EXIT_FAILURE);
+ }
+}
+
+obe_ir_translation_unit_t*
+obe_parser_parse(obe_parser_t* parser)
+{
+ obe_ir_translation_unit_t* tu = obe_ir_translation_unit_new(parser->arena);
+
+ // FIXME: Add support to parse multiple functions
+ obe_array_append(tu->funcs, obe_parser_parse_function(parser));
+
+ return tu;
+}
+
+static obe_ir_function_t
+obe_parser_parse_function(obe_parser_t* parser)
+{
+ obe_token_t token;
+
+ expected_next_token(parser->lexer, &token, TOKEN_KW_FN);
+ expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+
+ obe_ir_function_t func = { .name = token.value, .instrs = obe_array(parser->arena, obe_ir_inst_t) };
+
+ expected_next_token(parser->lexer, &token, TOKEN_LBRACE);
+
+ obe_lexer_next_token(parser->lexer, &token);
+ while (token.kind != TOKEN_EOF && token.kind != TOKEN_RBRACE) {
+ switch (token.kind) {
+ case TOKEN_KW_RETURN:
+ {
+ expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+
+ char value[token.value.length + 1];
+ memcpy(value, token.value.chars, token.value.length);
+ value[token.value.length] = 0;
+
+ obe_ir_inst_t inst = {
+ .kind = OBE_IR_INST_RETURN,
+ .operand1 = (obe_ir_operand_t) {
+ .kind = OBE_IR_OPERAND_IDENT,
+ .value = obe_arena_strdup(parser->arena, value)
+ }
+ };
+
+ obe_array_append(func.instrs, inst);
+
+ expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+ } break;
+ case TOKEN_KW_BR:
+ {
+ expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+
+ char op1[token.value.length + 1];
+ memcpy(op1, token.value.chars, token.value.length);
+ op1[token.value.length] = 0;
+
+ expected_next_token(parser->lexer, &token, TOKEN_LABEL);
+
+ char dest[token.value.length + 1];
+ memcpy(dest, token.value.chars, token.value.length);
+ dest[token.value.length] = 0;
+
+ expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+
+ obe_ir_inst_t inst = {
+ .kind = OBE_IR_INST_BR,
+ .dest = obe_arena_strdup(parser->arena, dest),
+ .operand1 = (obe_ir_operand_t) {
+ .kind = OBE_IR_OPERAND_IDENT,
+ .value = obe_arena_strdup(parser->arena, op1)
+ }
+ };
+
+ obe_array_append(func.instrs, inst);
+ } break;
+ case TOKEN_LABEL:
+ {
+ char label[token.value.length + 1];
+ memcpy(label, token.value.chars, token.value.length);
+ label[token.value.length] = 0;
+
+ expected_next_token(parser->lexer, &token, TOKEN_COLON);
+
+ obe_ir_inst_t inst = {
+ .kind = OBE_IR_INST_LABEL,
+ .dest = obe_arena_strdup(parser->arena, label)
+ };
+
+ obe_array_append(func.instrs, inst);
+ } break;
+ case TOKEN_IDENT:
+ {
+ obe_string_t val = token.value;
+
+ char dest[token.value.length + 1];
+ memcpy(dest, token.value.chars, token.value.length);
+ dest[token.value.length] = 0;
+
+ expected_next_token(parser->lexer, &token, TOKEN_COLON);
+ expected_next_token(parser->lexer, &token, TOKEN_INT);
+ expected_next_token(parser->lexer, &token, TOKEN_EQ);
+ expected_next_token(parser->lexer, &token, TOKEN_NUMBER);
+
+
+ char number[token.value.length + 1];
+ memcpy(number, token.value.chars, token.value.length);
+ number[token.value.length] = 0;
+
+ obe_ir_inst_t inst = {
+ .kind = OBE_IR_INST_CONST,
+ .type = OBE_IR_TYPE_INT,
+ .dest = obe_arena_strdup(parser->arena, dest),
+ .operand1 = (obe_ir_operand_t) {
+ .kind = OBE_IR_OPERAND_LITERAL,
+ .value = obe_arena_strdup(parser->arena, number)
+ }
+ };
+
+ obe_array_append(func.instrs, inst);
+
+ expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+ } break;
+ default:
+ {
+ printf("%s:%ld:%ld: token <" PRIs ">\n",
+ parser->lexer->filename,
+ token.loc.lineno + 1,
+ token.loc.offset - token.loc.lineoffset + 1,
+ PRIsARG(token.value));
+ }
+ }
+ obe_lexer_next_token(parser->lexer, &token);
+ }
+
+ expected_token(parser->lexer, &token, TOKEN_RBRACE);
+
+ return func;
+}
+
+static bool
+expected_token(obe_lexer_t *lexer, obe_token_t *token, obe_token_kind_t expected_kind)
+{
+ if (token->kind != expected_kind) {
+ fprintf(stderr,
+ "%s:%lu:%lu: syntax error: got '" PRIs "' token but expect '%s'\n",
+ lexer->filename,
+ token->loc.lineno + 1,
+ token->loc.offset - lexer->loc.lineoffset + 1,
+ PRIsARG(token->value),
+ obe_token_to_cstr(expected_kind));
+ exit(EXIT_FAILURE);
+ }
+ return true;
+}
+
+inline static bool
+expected_next_token(obe_lexer_t *lexer,
+ obe_token_t *token,
+ obe_token_kind_t expected_kind)
+{
+ obe_lexer_next_token(lexer, token);
+ return expected_token(lexer, token, expected_kind);
+}