From 10bb8a05088f1d3bb24f7167f609b5f6fb0ba026 Mon Sep 17 00:00:00 2001 From: Johnny Richard Date: Wed, 30 Oct 2024 22:58:03 +0100 Subject: bootstrap project Signed-off-by: Johnny Richard --- src/arena.c | 121 ++++++++++++++++++++++++++++ src/array.c | 67 +++++++++++++++ src/ir.c | 42 ++++++++++ src/lexer.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/obe.c | 142 ++++++++++++++++++++++++++++++++ src/parser.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/string.c | 34 ++++++++ src/utils.c | 74 +++++++++++++++++ src/x86_64/codegen.c | 152 ++++++++++++++++++++++++++++++++++ 9 files changed, 1078 insertions(+) create mode 100644 src/arena.c create mode 100644 src/array.c create mode 100644 src/ir.c create mode 100644 src/lexer.c create mode 100644 src/obe.c create mode 100644 src/parser.c create mode 100644 src/string.c create mode 100644 src/utils.c create mode 100644 src/x86_64/codegen.c (limited to 'src') diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 0000000..8822fdd --- /dev/null +++ b/src/arena.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2025 Johnny Richard + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * This file is part of obe. + * + * obe is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * obe is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with obe. If not, see . + */ +#include +#include +#include +#include + +void* +obe_arena_alloc(obe_arena_t* arena, size_t size) +{ + if (arena->end == NULL) { + assert(arena->begin == NULL); + size_t capacity = OBE_ARENA_REGION_DEFAULT_CAPACITY; + if (capacity < OBE_ARENA_PADDING(size)) + capacity = size + OBE_ARENA_PADDING(size); + arena->end = obe_arena_region_new(capacity); + arena->begin = arena->end; + } + + while (arena->end->offset + size > arena->end->capacity && + arena->end->next != NULL) { + arena->end = arena->end->next; + } + + if (arena->end->offset + size > arena->end->capacity) { + assert(arena->end->next == NULL); + size_t capacity = OBE_ARENA_REGION_DEFAULT_CAPACITY; + if (capacity < size) + capacity = size + OBE_ARENA_PADDING(size); + arena->end->next = obe_arena_region_new(capacity); + arena->end = arena->end->next; + } + + void* ptr = arena->end->data + arena->end->offset; + arena->end->offset += size + OBE_ARENA_PADDING(size); + return ptr; +} + +void* +obe_arena_realloc(obe_arena_t* arena, + void* old_ptr, + size_t old_size, + size_t new_size) +{ + if (new_size <= old_size) + return old_ptr; + void* new_ptr = obe_arena_alloc(arena, new_size); + return memcpy(new_ptr, old_ptr, old_size); +} + +void +obe_arena_release(obe_arena_t* arena) +{ + for (obe_arena_region_t* r = arena->begin; r != NULL; r = r->next) { + r->offset = 0; + } + arena->end = arena->begin; +} + +void +obe_arena_free(obe_arena_t* arena) +{ + obe_arena_region_t* r = arena->begin; + while (r) { + obe_arena_region_t* r_tmp = r; + r = r->next; + obe_arena_region_free(r_tmp); + } + arena->begin = NULL; + arena->end = NULL; +} + +obe_arena_region_t* +obe_arena_region_new(size_t capacity) +{ + size_t size = sizeof(obe_arena_region_t) + sizeof(uint8_t) * capacity; + obe_arena_region_t* r = (obe_arena_region_t*)malloc(size); + assert(r); + r->next = NULL; + r->offset = 0; + r->capacity = capacity; + r->data = (uint8_t*)(r + 1); + return r; +} + +void +obe_arena_region_free(obe_arena_region_t* r) +{ + free(r); +} + +char* +obe_arena_strdup(obe_arena_t* arena, char* s) +{ + size_t slen = strlen(s); + char* d = obe_arena_alloc(arena, sizeof(char) * (slen + 1)); + assert(d); + + for (size_t i = 0; i <= slen; ++i) + d[i] = s[i]; + + return d; +} diff --git a/src/array.c b/src/array.c new file mode 100644 index 0000000..dde3eba --- /dev/null +++ b/src/array.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2025 Johnny Richard + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include + +#include +#include + +void* +obe_array_new(obe_arena_t* arena, size_t item_size) +{ + obe_array_header_t* h = obe_arena_alloc( + arena, + (item_size * OBE_ARRAY_INITIAL_CAPACITY) + sizeof(obe_array_header_t)); + if (h == NULL) { + return NULL; + } + h->arena = arena; + h->length = 0; + h->item_size = item_size; + h->capacity = OBE_ARRAY_INITIAL_CAPACITY; + + return ((uint8_t *)h) + sizeof(obe_array_header_t); +} + +obe_array_header_t* +obe_array_get_header(void* arr) +{ + return (obe_array_header_t*)(((uint8_t *)arr) - sizeof(obe_array_header_t)); +} + +void* +obe_array_grow(void* arr) +{ + obe_array_header_t* h = obe_array_get_header(arr); + + size_t old_size = sizeof(obe_array_header_t) + (h->capacity * h->item_size); + h->capacity *= 2; + size_t new_size = sizeof(obe_array_header_t) + (h->capacity * h->item_size); + + h = obe_arena_realloc(h->arena, h, old_size, new_size); + + return ((uint8_t *)h) + sizeof(obe_array_header_t); +} + +size_t +obe_array_length(void* arr) +{ + assert(arr); + obe_array_header_t* header = obe_array_get_header(arr); + return header->length; +} diff --git a/src/ir.c b/src/ir.c new file mode 100644 index 0000000..2c0b0c9 --- /dev/null +++ b/src/ir.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2025 Johnny Richard + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * This file is part of obe. + * + * obe is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * obe is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with obe. If not, see . + */ +#include +#include +#include +#include + +obe_ir_translation_unit_t* +obe_ir_translation_unit_new(obe_arena_t* arena) +{ + obe_ir_translation_unit_t* tu = obe_arena_alloc(arena, sizeof(obe_ir_translation_unit_t)); + assert(tu); + tu->funcs = obe_array(arena, obe_ir_function_t); + return tu; +} + +obe_ir_function_t* +obe_ir_function_new(obe_arena_t* arena, obe_string_t name) +{ + obe_ir_function_t* func = obe_arena_alloc(arena, sizeof(obe_ir_function_t)); + assert(func); + func->name = name; + return func; +} diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..3ad8751 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2025 Johnny Richard + * + * SPDX-License-Identifier: LGPL-3.0-or-later + * + * This file is part of obe. + * + * obe is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * + * obe is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with obe. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include + +void +obe_lexer_init(obe_lexer_t* lexer, char* filename) +{ + assert(lexer); + + char* program = read_file_contents(filename); + if (program == NULL) { + fprintf(stderr, "Unable to read file contents <%s>\n", filename); + exit(EXIT_FAILURE); + } + + lexer->filename = filename; + lexer->loc = (obe_lexer_loc_t){ 0 }; + lexer->source = obe_string_from_cstr(program); +} + +bool +obe_lexer_is_eof(obe_lexer_t* lexer) +{ + return !(lexer->loc.offset < lexer->source.length); +} + +char +obe_lexer_current_char(obe_lexer_t* lexer) +{ + return lexer->source.chars[lexer->loc.offset]; +} + +char +obe_lexer_next_char(obe_lexer_t* lexer) +{ + assert(lexer->loc.offset < lexer->source.length); + + char previous_char = obe_lexer_current_char(lexer); + if (previous_char == '\n') { + lexer->loc.lineno++; + lexer->loc.lineoffset = ++lexer->loc.offset; + } else { + lexer->loc.offset++; + } + return obe_lexer_current_char(lexer); +} + +void +obe_lexer_next_token(obe_lexer_t* lexer, obe_token_t* token) +{ + if (obe_lexer_is_eof(lexer)) { + *token = (obe_token_t){ .kind = TOKEN_EOF }; + return; + } + + char c = obe_lexer_current_char(lexer); + if (isspace(c) && !obe_lexer_is_eof(lexer)) { + while (isspace(c) && !obe_lexer_is_eof(lexer)) { + c = obe_lexer_next_char(lexer); + } + } + + if (obe_lexer_is_eof(lexer)) { + *token = (obe_token_t){ .kind = TOKEN_EOF }; + return; + } + + if (isalpha(c) || c == '_') { + obe_lexer_loc_t start_loc = lexer->loc; + while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer)) { + c = obe_lexer_next_char(lexer); + } + obe_string_t token_value = { + .chars = lexer->source.chars + start_loc.offset, + .length = lexer->loc.offset - start_loc.offset + }; + token->value = token_value; + token->loc = start_loc; + if (obe_string_eq(token_value, obe_string_from_cstr("fn"))) { + token->kind = TOKEN_KW_FN; + return; + } + if (obe_string_eq(token_value, obe_string_from_cstr("br"))) { + token->kind = TOKEN_KW_BR; + return; + } + if (obe_string_eq(token_value, + obe_string_from_cstr("return"))) { + token->kind = TOKEN_KW_RETURN; + return; + } + if (obe_string_eq(token_value, obe_string_from_cstr("int"))) { + token->kind = TOKEN_INT; + return; + } + token->kind = TOKEN_IDENT; + return; + } + + if (c == '.') { + obe_lexer_loc_t start_loc = lexer->loc; + do { + c = obe_lexer_next_char(lexer); + } while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer)); + + obe_string_t token_value = { + .chars = lexer->source.chars + start_loc.offset, + .length = lexer->loc.offset - start_loc.offset + }; + + token->value = token_value; + token->loc = start_loc; + token->kind = TOKEN_LABEL; + return; + } + + if (isdigit(c)) { + obe_lexer_loc_t start_loc = lexer->loc; + while (isdigit(c) && !obe_lexer_is_eof(lexer)) { + c = obe_lexer_next_char(lexer); + } + obe_string_t token_value = { + .chars = lexer->source.chars + start_loc.offset, + .length = lexer->loc.offset - start_loc.offset + }; + token->kind = TOKEN_NUMBER; + token->value = token_value; + token->loc = start_loc; + return; + } + + if (c == ';') { + token->kind = TOKEN_SEMICOLON; + token->loc = lexer->loc; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; + obe_lexer_next_char(lexer); + return; + } + + if (c == ':') { + token->kind = TOKEN_COLON; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset , .length = 1} ; + token->loc = lexer->loc; + obe_lexer_next_char(lexer); + return; + } + + if (c == '=') { + token->kind = TOKEN_EQ; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; + token->loc = lexer->loc; + obe_lexer_next_char(lexer); + return; + } + + if (c == '{') { + token->kind = TOKEN_LBRACE; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; + token->loc = lexer->loc; + obe_lexer_next_char(lexer); + return; + } + + if (c == '}') { + token->kind = TOKEN_RBRACE; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; + token->loc = lexer->loc; + obe_lexer_next_char(lexer); + return; + } + + token->kind = TOKEN_UNKOWN; + token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; + token->loc = lexer->loc; + obe_lexer_next_char(lexer); + return; +} + +static char* token_to_cstr_table[] = { [TOKEN_KW_RETURN] = "return", + [TOKEN_KW_FN] = "fn", + [TOKEN_KW_BR] = "br", + [TOKEN_IDENT] = "", + [TOKEN_LABEL] = "