/* * Copyright (C) 2025 Johnny Richard * * SPDX-License-Identifier: LGPL-3.0-or-later * * This file is part of obe. * * obe is free software: you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * obe is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with obe. If not, see . */ #include #include #include #include #include #include #include void obe_lexer_init(obe_lexer_t* lexer, char* filename) { assert(lexer); char* program = read_file_contents(filename); if (program == NULL) { fprintf(stderr, "Unable to read file contents <%s>\n", filename); exit(EXIT_FAILURE); } lexer->filename = filename; lexer->loc = (obe_lexer_loc_t){ 0 }; lexer->source = obe_string_from_cstr(program); } bool obe_lexer_is_eof(obe_lexer_t* lexer) { return !(lexer->loc.offset < lexer->source.length); } char obe_lexer_current_char(obe_lexer_t* lexer) { return lexer->source.chars[lexer->loc.offset]; } char obe_lexer_next_char(obe_lexer_t* lexer) { assert(lexer->loc.offset < lexer->source.length); char previous_char = obe_lexer_current_char(lexer); if (previous_char == '\n') { lexer->loc.lineno++; lexer->loc.lineoffset = ++lexer->loc.offset; } else { lexer->loc.offset++; } return obe_lexer_current_char(lexer); } void obe_lexer_next_token(obe_lexer_t* lexer, obe_token_t* token) { if (obe_lexer_is_eof(lexer)) { *token = (obe_token_t){ .kind = TOKEN_EOF }; return; } char c = obe_lexer_current_char(lexer); if (isspace(c) && !obe_lexer_is_eof(lexer)) { while (isspace(c) && !obe_lexer_is_eof(lexer)) { c = obe_lexer_next_char(lexer); } } if (obe_lexer_is_eof(lexer)) { *token = (obe_token_t){ .kind = TOKEN_EOF }; return; } if (isalpha(c) || c == '_') { obe_lexer_loc_t start_loc = lexer->loc; while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer)) { c = obe_lexer_next_char(lexer); } obe_string_t token_value = { .chars = lexer->source.chars + start_loc.offset, .length = lexer->loc.offset - start_loc.offset }; token->value = token_value; token->loc = start_loc; if (obe_string_eq(token_value, obe_string_from_cstr("fn"))) { token->kind = TOKEN_KW_FN; return; } if (obe_string_eq(token_value, obe_string_from_cstr("br"))) { token->kind = TOKEN_KW_BR; return; } if (obe_string_eq(token_value, obe_string_from_cstr("return"))) { token->kind = TOKEN_KW_RETURN; return; } if (obe_string_eq(token_value, obe_string_from_cstr("int"))) { token->kind = TOKEN_INT; return; } token->kind = TOKEN_IDENT; return; } if (c == '.') { obe_lexer_loc_t start_loc = lexer->loc; do { c = obe_lexer_next_char(lexer); } while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer)); obe_string_t token_value = { .chars = lexer->source.chars + start_loc.offset, .length = lexer->loc.offset - start_loc.offset }; token->value = token_value; token->loc = start_loc; token->kind = TOKEN_LABEL; return; } if (isdigit(c)) { obe_lexer_loc_t start_loc = lexer->loc; while (isdigit(c) && !obe_lexer_is_eof(lexer)) { c = obe_lexer_next_char(lexer); } obe_string_t token_value = { .chars = lexer->source.chars + start_loc.offset, .length = lexer->loc.offset - start_loc.offset }; token->kind = TOKEN_NUMBER; token->value = token_value; token->loc = start_loc; return; } if (c == ';') { token->kind = TOKEN_SEMICOLON; token->loc = lexer->loc; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; obe_lexer_next_char(lexer); return; } if (c == ':') { token->kind = TOKEN_COLON; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset , .length = 1} ; token->loc = lexer->loc; obe_lexer_next_char(lexer); return; } if (c == '=') { token->kind = TOKEN_EQ; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; token->loc = lexer->loc; obe_lexer_next_char(lexer); return; } if (c == '{') { token->kind = TOKEN_LBRACE; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; token->loc = lexer->loc; obe_lexer_next_char(lexer); return; } if (c == '}') { token->kind = TOKEN_RBRACE; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; token->loc = lexer->loc; obe_lexer_next_char(lexer); return; } token->kind = TOKEN_UNKOWN; token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 }; token->loc = lexer->loc; obe_lexer_next_char(lexer); return; } static char* token_to_cstr_table[] = { [TOKEN_KW_RETURN] = "return", [TOKEN_KW_FN] = "fn", [TOKEN_KW_BR] = "br", [TOKEN_IDENT] = "", [TOKEN_LABEL] = "