/*
* Copyright (C) 2023 Johnny Richard
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
typedef enum {
TOKEN_NAME,
TOKEN_OPAREN,
TOKEN_CPAREN,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_OCURLY,
TOKEN_CCURLY,
TOKEN_NUMBER,
TOKEN_EOF
} token_kind_t;
typedef struct token_t {
token_kind_t kind;
char *value;
uint32_t row;
uint32_t col;
} token_t;
typedef struct lexer_t {
char *filepath;
char *src;
size_t srclen;
uint32_t cur;
uint32_t row;
uint32_t bol;
} lexer_t;
void
lexer_load_file_contents(lexer_t *lexer)
{
assert(lexer && "lexer must be defined");
FILE *file;
file = fopen(lexer->filepath, "r");
if (!file) {
fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno));
exit(EXIT_FAILURE);
}
fseek(file, 0L, SEEK_END);
lexer->srclen = ftell(file);
rewind(file);
lexer->src = calloc(1, lexer->srclen + 1);
if (!lexer->src) {
fclose(file);
perror("lexer_load_file_contents -> calloc");
exit(EXIT_FAILURE);
}
if (fread(lexer->src, lexer->srclen, 1, file) != 1) {
fclose(file);
free(lexer->src);
// FIXME: distinguish error using ferror and feof functions
fprintf(stderr, "could not read file '%s'\n", lexer->filepath);
exit(EXIT_FAILURE);
}
}
void
lexer_init(lexer_t *lexer, char *filepath)
{
assert(lexer && "lexer must be defined");
assert(filepath && "filepath must be defined");
lexer->filepath = filepath;
lexer->srclen = 0;
lexer->cur = 0;
lexer->row = 0;
lexer->bol = 0;
lexer_load_file_contents(lexer);
}
void
lexer_drop_char(lexer_t *lexer)
{
lexer->cur++;
}
bool
lexer_is_eof(lexer_t *lexer)
{
return lexer->cur >= lexer->srclen;
}
bool
lexer_is_not_eof(lexer_t *lexer)
{
return !lexer_is_eof(lexer);
}
char
lexer_current_char(lexer_t *lexer)
{
return lexer->src[lexer->cur];
}
void
lexer_next_token(lexer_t *lexer, token_t *token)
{
while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
if (lexer_current_char(lexer) == '\n') {
lexer_drop_char(lexer);
lexer->row++;
lexer->bol = lexer->cur;
continue;
}
lexer_drop_char(lexer);
}
if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
size_t begin = lexer->cur;
while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
lexer_drop_char(lexer);
}
token->kind = TOKEN_NUMBER;
token->value = strndup(lexer->src + begin, lexer->cur - begin);
token->row = lexer->row;
token->col = begin - lexer->bol;
return;
}
if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) {
size_t begin = lexer->cur;
while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
lexer_drop_char(lexer);
}
token->kind = TOKEN_NAME;
token->value = strndup(lexer->src + begin, lexer->cur - begin);
token->row = lexer->row;
token->col = begin - lexer->bol;
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') {
token->kind = TOKEN_OPAREN;
token->value = strdup("(");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') {
token->kind = TOKEN_CPAREN;
token->value = strdup(")");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') {
token->kind = TOKEN_COLON;
token->value = strdup(":");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') {
token->kind = TOKEN_SEMICOLON;
token->value = strdup(";");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') {
token->kind = TOKEN_OCURLY;
token->value = strdup("{");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') {
token->kind = TOKEN_CCURLY;
token->value = strdup("}");
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
lexer_drop_char(lexer);
return;
}
token->kind = TOKEN_EOF;
return;
}
void
print_usage()
{
fputs("pipac ", stderr);
}
int
main(int argc, char **argv)
{
if (argc < 2) {
print_usage();
return EXIT_FAILURE;
}
char *filepath = argv[1];
lexer_t lexer;
lexer_init(&lexer, filepath);
printf("[INFO]: %d bytes loaded [filename='%s']\n", lexer.srclen, lexer.filepath);
token_t token;
for (lexer_next_token(&lexer, &token); token.kind != TOKEN_EOF; lexer_next_token(&lexer, &token)) {
printf("%s:%d:%d: [kind=%d, value='%s']\n", lexer.filepath, token.row + 1, token.col + 1, token.kind, token.value);
}
return EXIT_SUCCESS;
}