From 10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 Mon Sep 17 00:00:00 2001
From: Johnny Richard <johnny@johnnyrichard.com>
Date: Wed, 30 Oct 2024 22:58:03 +0100
Subject: bootstrap project

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
---
 src/parser.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 224 insertions(+)
 create mode 100644 src/parser.c

(limited to 'src/parser.c')

diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..d95e6f9
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2025 Johnny Richard <johnny@johnnyrichard.com>
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ *
+ * This file is part of obe.
+ *
+ * obe is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * obe is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with obe. If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <assert.h>
+#include <obe/array.h>
+#include <obe/lexer.h>
+#include <obe/parser.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static obe_ir_function_t obe_parser_parse_function(obe_parser_t* parser);
+
+inline static bool expected_next_token(obe_lexer_t *lexer, obe_token_t *token,
+        obe_token_kind_t expected_kind);
+
+static bool expected_token(obe_lexer_t *lexer, obe_token_t *token,
+        obe_token_kind_t expected_kind);
+
+void
+obe_parser_init(obe_parser_t* parser, obe_lexer_t* lexer, obe_arena_t* arena)
+{
+    assert(parser && lexer && arena);
+    parser->lexer = lexer;
+    parser->arena = arena;
+}
+
+void
+obe_parser_next_expected_token(obe_parser_t* parser,
+                               obe_token_t* token,
+                               obe_token_kind_t kind)
+{
+    obe_lexer_next_token(parser->lexer, token);
+    if (token->kind != kind) {
+        fprintf(stderr,
+                "%s:%zu:%zu: syntax error: expected token <%s> but got <%s>.\n",
+                parser->lexer->filename,
+                token->loc.lineno + 1,
+                token->loc.offset - token->loc.lineoffset + 1,
+                obe_token_to_cstr(kind),
+                obe_token_to_cstr(token->kind));
+        exit(EXIT_FAILURE);
+    }
+}
+
+obe_ir_translation_unit_t*
+obe_parser_parse(obe_parser_t* parser)
+{
+    obe_ir_translation_unit_t* tu = obe_ir_translation_unit_new(parser->arena);
+
+    // FIXME: Add support to parse multiple functions
+    obe_array_append(tu->funcs, obe_parser_parse_function(parser));
+
+    return tu;
+}
+
+static obe_ir_function_t
+obe_parser_parse_function(obe_parser_t* parser)
+{
+    obe_token_t token;
+
+    expected_next_token(parser->lexer, &token, TOKEN_KW_FN);
+    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+
+    obe_ir_function_t func = { .name = token.value, .instrs = obe_array(parser->arena, obe_ir_inst_t) };
+
+    expected_next_token(parser->lexer, &token, TOKEN_LBRACE);
+
+    obe_lexer_next_token(parser->lexer, &token);
+    while (token.kind != TOKEN_EOF && token.kind != TOKEN_RBRACE) {
+        switch (token.kind) {
+            case TOKEN_KW_RETURN:
+                {
+                    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+                    
+                    char value[token.value.length + 1];
+                    memcpy(value, token.value.chars, token.value.length);
+                    value[token.value.length] = 0;
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_RETURN,
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_IDENT,
+                            .value = obe_arena_strdup(parser->arena, value)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+                } break;
+            case TOKEN_KW_BR:
+                {
+                    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+                    
+                    char op1[token.value.length + 1];
+                    memcpy(op1, token.value.chars, token.value.length);
+                    op1[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_LABEL);
+
+                    char dest[token.value.length + 1];
+                    memcpy(dest, token.value.chars, token.value.length);
+                    dest[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_BR,
+                        .dest = obe_arena_strdup(parser->arena, dest),
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_IDENT,
+                            .value = obe_arena_strdup(parser->arena, op1)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+                } break;
+            case TOKEN_LABEL:
+                {
+                    char label[token.value.length + 1];
+                    memcpy(label, token.value.chars, token.value.length);
+                    label[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_COLON);
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_LABEL,
+                        .dest = obe_arena_strdup(parser->arena, label)
+                    };
+
+                    obe_array_append(func.instrs, inst);
+                } break;
+            case TOKEN_IDENT:
+                {
+                    obe_string_t val = token.value;
+
+                    char dest[token.value.length + 1];
+                    memcpy(dest, token.value.chars, token.value.length);
+                    dest[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_COLON);
+                    expected_next_token(parser->lexer, &token, TOKEN_INT);
+                    expected_next_token(parser->lexer, &token, TOKEN_EQ);
+                    expected_next_token(parser->lexer, &token, TOKEN_NUMBER);
+
+
+                    char number[token.value.length + 1];
+                    memcpy(number, token.value.chars, token.value.length);
+                    number[token.value.length] = 0;
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_CONST,
+                        .type = OBE_IR_TYPE_INT,
+                        .dest = obe_arena_strdup(parser->arena, dest),
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_LITERAL,
+                            .value = obe_arena_strdup(parser->arena, number)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+                } break;
+            default:
+                {
+                    printf("%s:%ld:%ld: token <" PRIs ">\n",
+                           parser->lexer->filename,
+                           token.loc.lineno + 1,
+                           token.loc.offset - token.loc.lineoffset + 1,
+                           PRIsARG(token.value));
+                }
+        }
+        obe_lexer_next_token(parser->lexer, &token);
+    }
+
+    expected_token(parser->lexer, &token, TOKEN_RBRACE);
+
+    return func;
+}
+
+static bool
+expected_token(obe_lexer_t *lexer, obe_token_t *token, obe_token_kind_t expected_kind)
+{
+    if (token->kind != expected_kind) {
+        fprintf(stderr,
+                "%s:%lu:%lu: syntax error: got '" PRIs "' token but expect '%s'\n",
+                lexer->filename,
+                token->loc.lineno + 1,
+                token->loc.offset - lexer->loc.lineoffset + 1,
+                PRIsARG(token->value),
+                obe_token_to_cstr(expected_kind));
+        exit(EXIT_FAILURE);
+    }
+    return true;
+}
+
+inline static bool
+expected_next_token(obe_lexer_t *lexer,
+                    obe_token_t *token,
+                    obe_token_kind_t expected_kind)
+{
+    obe_lexer_next_token(lexer, token);
+    return expected_token(lexer, token, expected_kind);
+}
-- 
cgit v1.2.3