/*
* Copyright (C) 2023 Johnny Richard
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include "lexer.h"
#include
#include
#include
#include
#include
#include
void
lexer_init(lexer_t *lexer, char *filepath)
{
assert(lexer && "lexer must be defined");
assert(filepath && "filepath must be defined");
lexer->filepath = filepath;
lexer->srclen = 0;
lexer->cur = 0;
lexer->row = 0;
lexer->bol = 0;
lexer_load_file_contents(lexer);
}
static void
lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t kind)
{
token->kind = kind;
token->value = string_view_new(lexer->src + lexer->cur, 1);
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
token->bol = lexer->bol;
}
static void
lexer_drop_spaces(lexer_t *lexer)
{
while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
if (lexer_current_char(lexer) == '\n') {
lexer_drop_char(lexer);
lexer->row++;
lexer->bol = lexer->cur;
continue;
}
lexer_drop_char(lexer);
}
}
static void
lexer_tokenize_number(lexer_t *lexer, token_t *token)
{
size_t begin = lexer->cur;
while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
lexer_drop_char(lexer);
}
token->kind = TOKEN_NUMBER;
token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = begin - lexer->bol;
token->bol = lexer->bol;
}
static void
lexer_tokenize_name(lexer_t *lexer, token_t *token)
{
size_t begin = lexer->cur;
while (lexer_is_not_eof(lexer) && (isalnum(lexer_current_char(lexer)) || lexer_current_char(lexer) == '_')) {
lexer_drop_char(lexer);
}
token->kind = TOKEN_NAME;
token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = begin - lexer->bol;
token->bol = lexer->bol;
}
static void
lexer_token_process_keyword(token_t *token)
{
if (string_view_eq(string_view_from_str("let"), token->value)) {
token->kind = TOKEN_KEYWORD_LET;
return;
}
if (string_view_eq(string_view_from_str("return"), token->value)) {
token->kind = TOKEN_KEYWORD_RETURN;
return;
}
if (string_view_eq(string_view_from_str("fn"), token->value)) {
token->kind = TOKEN_KEYWORD_FN;
return;
}
}
void
lexer_next_token(lexer_t *lexer, token_t *token)
{
lexer_drop_spaces(lexer);
if (lexer_is_eof(lexer)) {
lexer_define_literal_token_props(lexer, token, TOKEN_EOF);
lexer_drop_char(lexer);
return;
}
if (isdigit(lexer_current_char(lexer))) {
lexer_tokenize_number(lexer, token);
return;
}
if (isalpha(lexer_current_char(lexer))) {
lexer_tokenize_name(lexer, token);
lexer_token_process_keyword(token);
return;
}
if (lexer_current_char(lexer) == '(') {
lexer_define_literal_token_props(lexer, token, TOKEN_OPAREN);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == ')') {
lexer_define_literal_token_props(lexer, token, TOKEN_CPAREN);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == ':') {
lexer_define_literal_token_props(lexer, token, TOKEN_COLON);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == ';') {
lexer_define_literal_token_props(lexer, token, TOKEN_SEMICOLON);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '{') {
lexer_define_literal_token_props(lexer, token, TOKEN_OCURLY);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '}') {
lexer_define_literal_token_props(lexer, token, TOKEN_CCURLY);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '+') {
lexer_define_literal_token_props(lexer, token, TOKEN_PLUS);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '-') {
lexer_define_literal_token_props(lexer, token, TOKEN_MINUS);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '*') {
lexer_define_literal_token_props(lexer, token, TOKEN_STAR);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '/') {
lexer_define_literal_token_props(lexer, token, TOKEN_SLASH);
lexer_drop_char(lexer);
return;
}
if (lexer_current_char(lexer) == '=') {
lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL);
lexer_drop_char(lexer);
return;
}
lexer_define_literal_token_props(lexer, token, TOKEN_UNKNOWN);
lexer_drop_char(lexer);
return;
}
void
lexer_load_file_contents(lexer_t *lexer)
{
assert(lexer && "lexer must be defined");
FILE *file;
file = fopen(lexer->filepath, "r");
if (!file) {
fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno));
exit(EXIT_FAILURE);
}
fseek(file, 0L, SEEK_END);
lexer->srclen = ftell(file);
rewind(file);
lexer->src = calloc(1, lexer->srclen + 1);
if (!lexer->src) {
fclose(file);
perror("lexer_load_file_contents -> calloc");
exit(EXIT_FAILURE);
}
if (fread(lexer->src, lexer->srclen, 1, file) != 1) {
fclose(file);
free(lexer->src);
// FIXME: distinguish error using ferror and feof functions
fprintf(stderr, "could not read file '%s'\n", lexer->filepath);
exit(EXIT_FAILURE);
}
}
void
lexer_lookahead(lexer_t *lexer, token_t *token, size_t level)
{
uint32_t cur = lexer->cur;
uint32_t row = lexer->row;
uint32_t bol = lexer->bol;
while (level != 0) {
lexer_next_token(lexer, token);
level--;
if (token->kind == TOKEN_EOF) {
break;
}
}
lexer->cur = cur;
lexer->row = row;
lexer->bol = bol;
}
void
lexer_peek_next_token(lexer_t *lexer, token_t *token)
{
lexer_lookahead(lexer, token, 1);
}
void
lexer_drop_next_token(lexer_t *lexer)
{
token_t token;
lexer_next_token(lexer, &token);
}
void
lexer_drop_char(lexer_t *lexer)
{
lexer->cur++;
}
bool
lexer_is_eof(lexer_t *lexer)
{
return lexer->cur >= lexer->srclen;
}
bool
lexer_is_not_eof(lexer_t *lexer)
{
return !lexer_is_eof(lexer);
}
char
lexer_current_char(lexer_t *lexer)
{
return lexer->src[lexer->cur];
}
char *
token_kind_to_str(token_kind_t kind)
{
switch (kind) {
case TOKEN_NAME:
return "TOKEN_NAME";
case TOKEN_OPAREN:
return "(";
case TOKEN_CPAREN:
return ")";
case TOKEN_COLON:
return ":";
case TOKEN_SEMICOLON:
return ";";
case TOKEN_OCURLY:
return "{";
case TOKEN_CCURLY:
return "}";
case TOKEN_NUMBER:
return "TOKEN_NUMBER";
case TOKEN_PLUS:
return "+";
case TOKEN_MINUS:
return "-";
case TOKEN_STAR:
return "*";
case TOKEN_SLASH:
return "/";
case TOKEN_EQUAL:
return "=";
case TOKEN_KEYWORD_RETURN:
return "return";
case TOKEN_KEYWORD_FN:
return "fn";
case TOKEN_KEYWORD_LET:
return "let";
case TOKEN_EOF:
return "TOKEN_EOF";
case TOKEN_UNKNOWN:
return "TOKEN_UNKNOWN";
}
assert(false && "unreachable");
}