/*
* Copyright (C) 2023 Johnny Richard
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "lexer.h"

void
lexer_init(lexer_t *lexer, char *filepath)
{
  assert(lexer && "lexer must be defined");
  assert(filepath && "filepath must be defined");
  lexer->filepath = filepath;
  lexer->srclen   = 0;
  lexer->cur      = 0;
  lexer->row      = 0;
  lexer->bol      = 0;
  lexer_load_file_contents(lexer);
}

static void
lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t kind)
{
  token->kind = kind;
  token->value = string_view_new(lexer->src + lexer->cur, 1);
  token->filepath = lexer->filepath;
  token->row = lexer->row;
  token->col = lexer->cur - lexer->bol;
  token->bol = lexer->bol;
}

static void
lexer_drop_spaces(lexer_t *lexer)
{
  while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
    if (lexer_current_char(lexer) == '\n') {
      lexer_drop_char(lexer);
      lexer->row++;
      lexer->bol = lexer->cur;
      continue;
    }
    lexer_drop_char(lexer);
  }
}

static void
lexer_tokenize_number(lexer_t *lexer, token_t *token)
{
  size_t begin = lexer->cur;

  while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
    lexer_drop_char(lexer);
  }
  token->kind = TOKEN_NUMBER;
  token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
  token->filepath = lexer->filepath;
  token->row = lexer->row;
  token->col = begin - lexer->bol;
  token->bol = lexer->bol;
}

static void
lexer_tokenize_name(lexer_t *lexer, token_t *token)
{
  size_t begin = lexer->cur;

  while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
    lexer_drop_char(lexer);
  }
  token->kind = TOKEN_NAME;
  token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
  token->filepath = lexer->filepath;
  token->row = lexer->row;
  token->col = begin - lexer->bol;
  token->bol = lexer->bol;
}

void
lexer_next_token(lexer_t *lexer, token_t *token)
{
  lexer_drop_spaces(lexer);

  if (lexer_is_eof(lexer)) {
    lexer_define_literal_token_props(lexer, token, TOKEN_EOF);
    lexer_drop_char(lexer);
    return;
  }

  if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
    lexer_tokenize_number(lexer, token);
    return;
  }

  if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) {
    lexer_tokenize_name(lexer, token);
    return;
  }

  if (lexer_is_not_eof(lexer)) {
    if (lexer_current_char(lexer) == '(') {
      lexer_define_literal_token_props(lexer, token, TOKEN_OPAREN);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == ')') {
      lexer_define_literal_token_props(lexer, token, TOKEN_CPAREN);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == ':') {
      lexer_define_literal_token_props(lexer, token, TOKEN_COLON);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == ';') {
      lexer_define_literal_token_props(lexer, token, TOKEN_SEMICOLON);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == '{') {
      lexer_define_literal_token_props(lexer, token, TOKEN_OCURLY);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == '}') {
      lexer_define_literal_token_props(lexer, token, TOKEN_CCURLY);
      lexer_drop_char(lexer);
      return;
    }

    if (lexer_current_char(lexer) == '+'
        || lexer_current_char(lexer) == '-'
        || lexer_current_char(lexer) == '*'
        || lexer_current_char(lexer) == '/') {
      lexer_define_literal_token_props(lexer, token, TOKEN_OP);
      lexer_drop_char(lexer);
      return;
    }
  }

  lexer_define_literal_token_props(lexer, token, TOKEN_UNKNOWN);
  lexer_drop_char(lexer);
  return;
}

void
lexer_load_file_contents(lexer_t *lexer)
{
  assert(lexer && "lexer must be defined");

  FILE *file;
  file = fopen(lexer->filepath, "r");
  if (!file) {
    fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno));
    exit(EXIT_FAILURE);
  }

  fseek(file, 0L, SEEK_END);
  lexer->srclen = ftell(file);
  rewind(file);

  lexer->src = calloc(1, lexer->srclen + 1);
  if (!lexer->src) {
    fclose(file);
    perror("lexer_load_file_contents -> calloc");
    exit(EXIT_FAILURE);
  }

  if (fread(lexer->src, lexer->srclen, 1, file) != 1) {
    fclose(file);
    free(lexer->src);
    // FIXME: distinguish error using ferror and feof functions
    fprintf(stderr, "could not read file '%s'\n", lexer->filepath);
    exit(EXIT_FAILURE);
  }

}

void
lexer_step_back_to(lexer_t *lexer, token_t *token)
{
  lexer->cur = token->bol + token->col;
  lexer->row = token->row;
  lexer->bol = token->bol;
}

void
lexer_drop_char(lexer_t *lexer)
{
  lexer->cur++;
}

bool
lexer_is_eof(lexer_t *lexer)
{
  return lexer->cur >= lexer->srclen;
}


bool
lexer_is_not_eof(lexer_t *lexer)
{
  return !lexer_is_eof(lexer);
}

char
lexer_current_char(lexer_t *lexer)
{
  return lexer->src[lexer->cur];
}

char *
token_kind_to_str(token_kind_t kind)
{
  switch (kind) {
    case TOKEN_NAME:
      return "TOKEN_NAME";
    case TOKEN_OPAREN:
      return "(";
    case TOKEN_CPAREN:
      return ")";
    case TOKEN_COLON:
      return ":";
    case TOKEN_SEMICOLON:
      return ";";
    case TOKEN_OCURLY:
      return "{";
    case TOKEN_CCURLY:
      return "}";
    case TOKEN_NUMBER:
      return "TOKEN_NUMBER";
    case TOKEN_OP:
      return "TOKEN_OP";
    case TOKEN_EOF:
      return "TOKEN_EOF";
    case TOKEN_UNKNOWN:
      return "TOKEN_UNKNOWN";
    default:
      assert(false && "unreachable");
  }
}