/* * Copyright (C) 2023 Johnny Richard * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include "lexer.h" void lexer_init(lexer_t *lexer, char *filepath) { assert(lexer && "lexer must be defined"); assert(filepath && "filepath must be defined"); lexer->filepath = filepath; lexer->srclen = 0; lexer->cur = 0; lexer->row = 0; lexer->bol = 0; lexer_load_file_contents(lexer); } static void lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t kind) { token->kind = kind; token->value = string_view_new(lexer->src + lexer->cur, 1); token->filepath = lexer->filepath; token->row = lexer->row; token->col = lexer->cur - lexer->bol; token->bol = lexer->bol; } static void lexer_drop_spaces(lexer_t *lexer) { while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { if (lexer_current_char(lexer) == '\n') { lexer_drop_char(lexer); lexer->row++; lexer->bol = lexer->cur; continue; } lexer_drop_char(lexer); } } static void lexer_tokenize_number(lexer_t *lexer, token_t *token) { size_t begin = lexer->cur; while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_drop_char(lexer); } token->kind = TOKEN_NUMBER; token->value = string_view_new(lexer->src + begin, lexer->cur - begin); token->filepath = lexer->filepath; token->row = lexer->row; token->col = begin - lexer->bol; token->bol = lexer->bol; } static void lexer_tokenize_name(lexer_t *lexer, token_t *token) { size_t begin = lexer->cur; while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { lexer_drop_char(lexer); } token->kind = TOKEN_NAME; token->value = string_view_new(lexer->src + begin, lexer->cur - begin); token->filepath = lexer->filepath; token->row = lexer->row; token->col = begin - lexer->bol; token->bol = lexer->bol; } void lexer_next_token(lexer_t *lexer, token_t *token) { lexer_drop_spaces(lexer); if (lexer_is_eof(lexer)) { lexer_define_literal_token_props(lexer, token, TOKEN_EOF); lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_tokenize_number(lexer, token); return; } if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { lexer_tokenize_name(lexer, token); return; } if (lexer_is_not_eof(lexer)) { if (lexer_current_char(lexer) == '(') { lexer_define_literal_token_props(lexer, token, TOKEN_OPAREN); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == ')') { lexer_define_literal_token_props(lexer, token, TOKEN_CPAREN); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == ':') { lexer_define_literal_token_props(lexer, token, TOKEN_COLON); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == ';') { lexer_define_literal_token_props(lexer, token, TOKEN_SEMICOLON); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == '{') { lexer_define_literal_token_props(lexer, token, TOKEN_OCURLY); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == '}') { lexer_define_literal_token_props(lexer, token, TOKEN_CCURLY); lexer_drop_char(lexer); return; } if (lexer_current_char(lexer) == '+' || lexer_current_char(lexer) == '-' || lexer_current_char(lexer) == '*' || lexer_current_char(lexer) == '/') { lexer_define_literal_token_props(lexer, token, TOKEN_OP); lexer_drop_char(lexer); return; } } lexer_define_literal_token_props(lexer, token, TOKEN_UNKNOWN); lexer_drop_char(lexer); return; } void lexer_load_file_contents(lexer_t *lexer) { assert(lexer && "lexer must be defined"); FILE *file; file = fopen(lexer->filepath, "r"); if (!file) { fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); exit(EXIT_FAILURE); } fseek(file, 0L, SEEK_END); lexer->srclen = ftell(file); rewind(file); lexer->src = calloc(1, lexer->srclen + 1); if (!lexer->src) { fclose(file); perror("lexer_load_file_contents -> calloc"); exit(EXIT_FAILURE); } if (fread(lexer->src, lexer->srclen, 1, file) != 1) { fclose(file); free(lexer->src); // FIXME: distinguish error using ferror and feof functions fprintf(stderr, "could not read file '%s'\n", lexer->filepath); exit(EXIT_FAILURE); } } void lexer_step_back_to(lexer_t *lexer, token_t *token) { lexer->cur = token->bol + token->col; lexer->row = token->row; lexer->bol = token->bol; } void lexer_drop_char(lexer_t *lexer) { lexer->cur++; } bool lexer_is_eof(lexer_t *lexer) { return lexer->cur >= lexer->srclen; } bool lexer_is_not_eof(lexer_t *lexer) { return !lexer_is_eof(lexer); } char lexer_current_char(lexer_t *lexer) { return lexer->src[lexer->cur]; } char * token_kind_to_str(token_kind_t kind) { switch (kind) { case TOKEN_NAME: return "TOKEN_NAME"; case TOKEN_OPAREN: return "("; case TOKEN_CPAREN: return ")"; case TOKEN_COLON: return ":"; case TOKEN_SEMICOLON: return ";"; case TOKEN_OCURLY: return "{"; case TOKEN_CCURLY: return "}"; case TOKEN_NUMBER: return "TOKEN_NUMBER"; case TOKEN_OP: return "TOKEN_OP"; case TOKEN_EOF: return "TOKEN_EOF"; case TOKEN_UNKNOWN: return "TOKEN_UNKNOWN"; default: assert(false && "unreachable"); } }