bootstrap projectHEAD master

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
author: Johnny Richard <johnny@johnnyrichard.com> 2024-10-30 22:58:03 +0100
committer: Johnny Richard <johnny@johnnyrichard.com> 2025-12-14 09:53:52 +0100
commit: 10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 (patch)
tree: 7a4b3f69a461301c45204ed856b61f92a7d42233 /src/parser.c
1 files changed, 224 insertions, 0 deletions
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..d95e6f9
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2025 Johnny Richard <johnny@johnnyrichard.com>
+ *
+ * SPDX-License-Identifier: LGPL-3.0-or-later
+ *
+ * This file is part of obe.
+ *
+ * obe is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * obe is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with obe. If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <assert.h>
+#include <obe/array.h>
+#include <obe/lexer.h>
+#include <obe/parser.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static obe_ir_function_t obe_parser_parse_function(obe_parser_t* parser);
+
+inline static bool expected_next_token(obe_lexer_t *lexer, obe_token_t *token,
+        obe_token_kind_t expected_kind);
+
+static bool expected_token(obe_lexer_t *lexer, obe_token_t *token,
+        obe_token_kind_t expected_kind);
+
+void
+obe_parser_init(obe_parser_t* parser, obe_lexer_t* lexer, obe_arena_t* arena)
+{
+    assert(parser && lexer && arena);
+    parser->lexer = lexer;
+    parser->arena = arena;
+}
+
+void
+obe_parser_next_expected_token(obe_parser_t* parser,
+                               obe_token_t* token,
+                               obe_token_kind_t kind)
+{
+    obe_lexer_next_token(parser->lexer, token);
+    if (token->kind != kind) {
+        fprintf(stderr,
+                "%s:%zu:%zu: syntax error: expected token <%s> but got <%s>.\n",
+                parser->lexer->filename,
+                token->loc.lineno + 1,
+                token->loc.offset - token->loc.lineoffset + 1,
+                obe_token_to_cstr(kind),
+                obe_token_to_cstr(token->kind));
+        exit(EXIT_FAILURE);
+    }
+}
+
+obe_ir_translation_unit_t*
+obe_parser_parse(obe_parser_t* parser)
+{
+    obe_ir_translation_unit_t* tu = obe_ir_translation_unit_new(parser->arena);
+
+    // FIXME: Add support to parse multiple functions
+    obe_array_append(tu->funcs, obe_parser_parse_function(parser));
+
+    return tu;
+}
+
+static obe_ir_function_t
+obe_parser_parse_function(obe_parser_t* parser)
+{
+    obe_token_t token;
+
+    expected_next_token(parser->lexer, &token, TOKEN_KW_FN);
+    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+
+    obe_ir_function_t func = { .name = token.value, .instrs = obe_array(parser->arena, obe_ir_inst_t) };
+
+    expected_next_token(parser->lexer, &token, TOKEN_LBRACE);
+
+    obe_lexer_next_token(parser->lexer, &token);
+    while (token.kind != TOKEN_EOF && token.kind != TOKEN_RBRACE) {
+        switch (token.kind) {
+            case TOKEN_KW_RETURN:
+                {
+                    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+                    
+                    char value[token.value.length + 1];
+                    memcpy(value, token.value.chars, token.value.length);
+                    value[token.value.length] = 0;
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_RETURN,
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_IDENT,
+                            .value = obe_arena_strdup(parser->arena, value)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+                } break;
+            case TOKEN_KW_BR:
+                {
+                    expected_next_token(parser->lexer, &token, TOKEN_IDENT);
+                    
+                    char op1[token.value.length + 1];
+                    memcpy(op1, token.value.chars, token.value.length);
+                    op1[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_LABEL);
+
+                    char dest[token.value.length + 1];
+                    memcpy(dest, token.value.chars, token.value.length);
+                    dest[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_BR,
+                        .dest = obe_arena_strdup(parser->arena, dest),
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_IDENT,
+                            .value = obe_arena_strdup(parser->arena, op1)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+                } break;
+            case TOKEN_LABEL:
+                {
+                    char label[token.value.length + 1];
+                    memcpy(label, token.value.chars, token.value.length);
+                    label[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_COLON);
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_LABEL,
+                        .dest = obe_arena_strdup(parser->arena, label)
+                    };
+
+                    obe_array_append(func.instrs, inst);
+                } break;
+            case TOKEN_IDENT:
+                {
+                    obe_string_t val = token.value;
+
+                    char dest[token.value.length + 1];
+                    memcpy(dest, token.value.chars, token.value.length);
+                    dest[token.value.length] = 0;
+
+                    expected_next_token(parser->lexer, &token, TOKEN_COLON);
+                    expected_next_token(parser->lexer, &token, TOKEN_INT);
+                    expected_next_token(parser->lexer, &token, TOKEN_EQ);
+                    expected_next_token(parser->lexer, &token, TOKEN_NUMBER);
+
+
+                    char number[token.value.length + 1];
+                    memcpy(number, token.value.chars, token.value.length);
+                    number[token.value.length] = 0;
+
+                    obe_ir_inst_t inst = {
+                        .kind = OBE_IR_INST_CONST,
+                        .type = OBE_IR_TYPE_INT,
+                        .dest = obe_arena_strdup(parser->arena, dest),
+                        .operand1 = (obe_ir_operand_t) {
+                            .kind = OBE_IR_OPERAND_LITERAL,
+                            .value = obe_arena_strdup(parser->arena, number)
+                        }
+                    };
+
+                    obe_array_append(func.instrs, inst);
+
+                    expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON);
+                } break;
+            default:
+                {
+                    printf("%s:%ld:%ld: token <" PRIs ">\n",
+                           parser->lexer->filename,
+                           token.loc.lineno + 1,
+                           token.loc.offset - token.loc.lineoffset + 1,
+                           PRIsARG(token.value));
+                }
+        }
+        obe_lexer_next_token(parser->lexer, &token);
+    }
+
+    expected_token(parser->lexer, &token, TOKEN_RBRACE);
+
+    return func;
+}
+
+static bool
+expected_token(obe_lexer_t *lexer, obe_token_t *token, obe_token_kind_t expected_kind)
+{
+    if (token->kind != expected_kind) {
+        fprintf(stderr,
+                "%s:%lu:%lu: syntax error: got '" PRIs "' token but expect '%s'\n",
+                lexer->filename,
+                token->loc.lineno + 1,
+                token->loc.offset - lexer->loc.lineoffset + 1,
+                PRIsARG(token->value),
+                obe_token_to_cstr(expected_kind));
+        exit(EXIT_FAILURE);
+    }
+    return true;
+}
+
+inline static bool
+expected_next_token(obe_lexer_t *lexer,
+                    obe_token_t *token,
+                    obe_token_kind_t expected_kind)
+{
+    obe_lexer_next_token(lexer, token);
+    return expected_token(lexer, token, expected_kind);
+}
author	Johnny Richard <johnny@johnnyrichard.com>	2024-10-30 22:58:03 +0100
committer	Johnny Richard <johnny@johnnyrichard.com>	2025-12-14 09:53:52 +0100
commit	10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 (patch)
tree	7a4b3f69a461301c45204ed856b61f92a7d42233 /src/parser.c