From 10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 Mon Sep 17 00:00:00 2001 From: Johnny Richard Date: Wed, 30 Oct 2024 22:58:03 +0100 Subject: bootstrap project Signed-off-by: Johnny Richard --- src/parser.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 src/parser.c (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..d95e6f9 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,224 @@ +/* + * Copyright (C) 2025 Johnny Richard + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * This file is part of obe. + * + * obe is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * obe is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with obe. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static obe_ir_function_t obe_parser_parse_function(obe_parser_t* parser); + +inline static bool expected_next_token(obe_lexer_t *lexer, obe_token_t *token, + obe_token_kind_t expected_kind); + +static bool expected_token(obe_lexer_t *lexer, obe_token_t *token, + obe_token_kind_t expected_kind); + +void +obe_parser_init(obe_parser_t* parser, obe_lexer_t* lexer, obe_arena_t* arena) +{ + assert(parser && lexer && arena); + parser->lexer = lexer; + parser->arena = arena; +} + +void +obe_parser_next_expected_token(obe_parser_t* parser, + obe_token_t* token, + obe_token_kind_t kind) +{ + obe_lexer_next_token(parser->lexer, token); + if (token->kind != kind) { + fprintf(stderr, + "%s:%zu:%zu: syntax error: expected token <%s> but got <%s>.\n", + parser->lexer->filename, + token->loc.lineno + 1, + token->loc.offset - token->loc.lineoffset + 1, + obe_token_to_cstr(kind), + obe_token_to_cstr(token->kind)); + exit(EXIT_FAILURE); + } +} + +obe_ir_translation_unit_t* +obe_parser_parse(obe_parser_t* parser) +{ + obe_ir_translation_unit_t* tu = obe_ir_translation_unit_new(parser->arena); + + // FIXME: Add support to parse multiple functions + obe_array_append(tu->funcs, obe_parser_parse_function(parser)); + + return tu; +} + +static obe_ir_function_t +obe_parser_parse_function(obe_parser_t* parser) +{ + obe_token_t token; + + expected_next_token(parser->lexer, &token, TOKEN_KW_FN); + expected_next_token(parser->lexer, &token, TOKEN_IDENT); + + obe_ir_function_t func = { .name = token.value, .instrs = obe_array(parser->arena, obe_ir_inst_t) }; + + expected_next_token(parser->lexer, &token, TOKEN_LBRACE); + + obe_lexer_next_token(parser->lexer, &token); + while (token.kind != TOKEN_EOF && token.kind != TOKEN_RBRACE) { + switch (token.kind) { + case TOKEN_KW_RETURN: + { + expected_next_token(parser->lexer, &token, TOKEN_IDENT); + + char value[token.value.length + 1]; + memcpy(value, token.value.chars, token.value.length); + value[token.value.length] = 0; + + obe_ir_inst_t inst = { + .kind = OBE_IR_INST_RETURN, + .operand1 = (obe_ir_operand_t) { + .kind = OBE_IR_OPERAND_IDENT, + .value = obe_arena_strdup(parser->arena, value) + } + }; + + obe_array_append(func.instrs, inst); + + expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON); + } break; + case TOKEN_KW_BR: + { + expected_next_token(parser->lexer, &token, TOKEN_IDENT); + + char op1[token.value.length + 1]; + memcpy(op1, token.value.chars, token.value.length); + op1[token.value.length] = 0; + + expected_next_token(parser->lexer, &token, TOKEN_LABEL); + + char dest[token.value.length + 1]; + memcpy(dest, token.value.chars, token.value.length); + dest[token.value.length] = 0; + + expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON); + + obe_ir_inst_t inst = { + .kind = OBE_IR_INST_BR, + .dest = obe_arena_strdup(parser->arena, dest), + .operand1 = (obe_ir_operand_t) { + .kind = OBE_IR_OPERAND_IDENT, + .value = obe_arena_strdup(parser->arena, op1) + } + }; + + obe_array_append(func.instrs, inst); + } break; + case TOKEN_LABEL: + { + char label[token.value.length + 1]; + memcpy(label, token.value.chars, token.value.length); + label[token.value.length] = 0; + + expected_next_token(parser->lexer, &token, TOKEN_COLON); + + obe_ir_inst_t inst = { + .kind = OBE_IR_INST_LABEL, + .dest = obe_arena_strdup(parser->arena, label) + }; + + obe_array_append(func.instrs, inst); + } break; + case TOKEN_IDENT: + { + obe_string_t val = token.value; + + char dest[token.value.length + 1]; + memcpy(dest, token.value.chars, token.value.length); + dest[token.value.length] = 0; + + expected_next_token(parser->lexer, &token, TOKEN_COLON); + expected_next_token(parser->lexer, &token, TOKEN_INT); + expected_next_token(parser->lexer, &token, TOKEN_EQ); + expected_next_token(parser->lexer, &token, TOKEN_NUMBER); + + + char number[token.value.length + 1]; + memcpy(number, token.value.chars, token.value.length); + number[token.value.length] = 0; + + obe_ir_inst_t inst = { + .kind = OBE_IR_INST_CONST, + .type = OBE_IR_TYPE_INT, + .dest = obe_arena_strdup(parser->arena, dest), + .operand1 = (obe_ir_operand_t) { + .kind = OBE_IR_OPERAND_LITERAL, + .value = obe_arena_strdup(parser->arena, number) + } + }; + + obe_array_append(func.instrs, inst); + + expected_next_token(parser->lexer, &token, TOKEN_SEMICOLON); + } break; + default: + { + printf("%s:%ld:%ld: token <" PRIs ">\n", + parser->lexer->filename, + token.loc.lineno + 1, + token.loc.offset - token.loc.lineoffset + 1, + PRIsARG(token.value)); + } + } + obe_lexer_next_token(parser->lexer, &token); + } + + expected_token(parser->lexer, &token, TOKEN_RBRACE); + + return func; +} + +static bool +expected_token(obe_lexer_t *lexer, obe_token_t *token, obe_token_kind_t expected_kind) +{ + if (token->kind != expected_kind) { + fprintf(stderr, + "%s:%lu:%lu: syntax error: got '" PRIs "' token but expect '%s'\n", + lexer->filename, + token->loc.lineno + 1, + token->loc.offset - lexer->loc.lineoffset + 1, + PRIsARG(token->value), + obe_token_to_cstr(expected_kind)); + exit(EXIT_FAILURE); + } + return true; +} + +inline static bool +expected_next_token(obe_lexer_t *lexer, + obe_token_t *token, + obe_token_kind_t expected_kind) +{ + obe_lexer_next_token(lexer, token); + return expected_token(lexer, token, expected_kind); +} -- cgit v1.2.3