/*
 * Copyright (C) 2025 Johnny Richard <johnny@johnnyrichard.com>
 *
 * SPDX-License-Identifier: LGPL-3.0-or-later
 *
 * This file is part of obe.
 *
 * obe is free software: you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option)
 * any later version.
 *
 * obe is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with obe. If not, see <https://www.gnu.org/licenses/>.
 */
#include <assert.h>
#include <ctype.h>
#include <obe/lexer.h>
#include <obe/string.h>
#include <obe/utils.h>
#include <stdio.h>
#include <stdlib.h>

void
obe_lexer_init(obe_lexer_t* lexer, char* filename)
{
    assert(lexer);

    char* program = read_file_contents(filename);
    if (program == NULL) {
        fprintf(stderr, "Unable to read file contents <%s>\n", filename);
        exit(EXIT_FAILURE);
    }

    lexer->filename = filename;
    lexer->loc = (obe_lexer_loc_t){ 0 };
    lexer->source = obe_string_from_cstr(program);
}

bool
obe_lexer_is_eof(obe_lexer_t* lexer)
{
    return !(lexer->loc.offset < lexer->source.length);
}

char
obe_lexer_current_char(obe_lexer_t* lexer)
{
    return lexer->source.chars[lexer->loc.offset];
}

char
obe_lexer_next_char(obe_lexer_t* lexer)
{
    assert(lexer->loc.offset < lexer->source.length);

    char previous_char = obe_lexer_current_char(lexer);
    if (previous_char == '\n') {
        lexer->loc.lineno++;
        lexer->loc.lineoffset = ++lexer->loc.offset;
    } else {
        lexer->loc.offset++;
    }
    return obe_lexer_current_char(lexer);
}

void
obe_lexer_next_token(obe_lexer_t* lexer, obe_token_t* token)
{
    if (obe_lexer_is_eof(lexer)) {
        *token = (obe_token_t){ .kind = TOKEN_EOF };
        return;
    }

    char c = obe_lexer_current_char(lexer);
    if (isspace(c) && !obe_lexer_is_eof(lexer)) {
        while (isspace(c) && !obe_lexer_is_eof(lexer)) {
            c = obe_lexer_next_char(lexer);
        }
    }

    if (obe_lexer_is_eof(lexer)) {
        *token = (obe_token_t){ .kind = TOKEN_EOF };
        return;
    }

    if (isalpha(c) || c == '_') {
        obe_lexer_loc_t start_loc = lexer->loc;
        while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer)) {
            c = obe_lexer_next_char(lexer);
        }
        obe_string_t token_value = {
            .chars = lexer->source.chars + start_loc.offset,
            .length = lexer->loc.offset - start_loc.offset
        };
        token->value = token_value;
        token->loc = start_loc;
        if (obe_string_eq(token_value, obe_string_from_cstr("fn"))) {
            token->kind = TOKEN_KW_FN;
            return;
        }
        if (obe_string_eq(token_value, obe_string_from_cstr("br"))) {
            token->kind = TOKEN_KW_BR;
            return;
        }
        if (obe_string_eq(token_value,
                               obe_string_from_cstr("return"))) {
            token->kind = TOKEN_KW_RETURN;
            return;
        }
        if (obe_string_eq(token_value, obe_string_from_cstr("int"))) {
            token->kind = TOKEN_INT;
            return;
        }
        token->kind = TOKEN_IDENT;
        return;
    }

    if (c == '.') {
        obe_lexer_loc_t start_loc = lexer->loc;
        do {
            c = obe_lexer_next_char(lexer);
        } while ((isalnum(c) || c == '_') && !obe_lexer_is_eof(lexer));

        obe_string_t token_value = {
            .chars = lexer->source.chars + start_loc.offset,
            .length = lexer->loc.offset - start_loc.offset
        };

        token->value = token_value;
        token->loc = start_loc;
        token->kind = TOKEN_LABEL;
        return;
    }

    if (isdigit(c)) {
        obe_lexer_loc_t start_loc = lexer->loc;
        while (isdigit(c) && !obe_lexer_is_eof(lexer)) {
            c = obe_lexer_next_char(lexer);
        }
        obe_string_t token_value = {
            .chars = lexer->source.chars + start_loc.offset,
            .length = lexer->loc.offset - start_loc.offset
        };
        token->kind = TOKEN_NUMBER;
        token->value = token_value;
        token->loc = start_loc;
        return;
    }

    if (c == ';') {
        token->kind = TOKEN_SEMICOLON;
        token->loc = lexer->loc;
        token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 };
        obe_lexer_next_char(lexer);
        return;
    }

    if (c == ':') {
        token->kind = TOKEN_COLON;
        token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset , .length = 1} ;
        token->loc = lexer->loc;
        obe_lexer_next_char(lexer);
        return;
    }

    if (c == '=') {
        token->kind = TOKEN_EQ;
        token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 };
        token->loc = lexer->loc;
        obe_lexer_next_char(lexer);
        return;
    }

    if (c == '{') {
        token->kind = TOKEN_LBRACE;
        token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 };
        token->loc = lexer->loc;
        obe_lexer_next_char(lexer);
        return;
    }

    if (c == '}') {
        token->kind = TOKEN_RBRACE;
        token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 };
        token->loc = lexer->loc;
        obe_lexer_next_char(lexer);
        return;
    }

    token->kind = TOKEN_UNKOWN;
    token->value = (obe_string_t){ .chars = lexer->source.chars + lexer->loc.offset, .length = 1 };
    token->loc = lexer->loc;
    obe_lexer_next_char(lexer);
    return;
}

static char* token_to_cstr_table[] = { [TOKEN_KW_RETURN] = "return",
                                       [TOKEN_KW_FN] = "fn",
                                       [TOKEN_KW_BR] = "br",
                                       [TOKEN_IDENT] = "<ident>",
                                       [TOKEN_LABEL] = "<label>",
                                       [TOKEN_NUMBER] = "<number>",
                                       [TOKEN_INT] = "int",
                                       [TOKEN_EQ] = "=",
                                       [TOKEN_COLON] = ":",
                                       [TOKEN_SEMICOLON] = ";",
                                       [TOKEN_LBRACE] = "{",
                                       [TOKEN_RBRACE] = "}",
                                       [TOKEN_EOF] = "<eof>",
                                       [TOKEN_UNKOWN] = "<?unkown?>" };

char*
obe_token_to_cstr(obe_token_kind_t kind)
{
    return token_to_cstr_table[kind];
}