/* * Copyright (C) 2023 Johnny Richard * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include typedef enum { TOKEN_NAME, TOKEN_OPAREN, TOKEN_CPAREN, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_OCURLY, TOKEN_CCURLY, TOKEN_NUMBER, TOKEN_EOF } token_kind_t; typedef struct token_t { token_kind_t kind; char *value; uint32_t row; uint32_t col; } token_t; typedef struct lexer_t { char *filepath; char *src; size_t srclen; uint32_t cur; uint32_t row; uint32_t bol; } lexer_t; void lexer_load_file_contents(lexer_t *lexer) { assert(lexer && "lexer must be defined"); FILE *file; file = fopen(lexer->filepath, "r"); if (!file) { fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); exit(EXIT_FAILURE); } fseek(file, 0L, SEEK_END); lexer->srclen = ftell(file); rewind(file); lexer->src = calloc(1, lexer->srclen + 1); if (!lexer->src) { fclose(file); perror("lexer_load_file_contents -> calloc"); exit(EXIT_FAILURE); } if (fread(lexer->src, lexer->srclen, 1, file) != 1) { fclose(file); free(lexer->src); // FIXME: distinguish error using ferror and feof functions fprintf(stderr, "could not read file '%s'\n", lexer->filepath); exit(EXIT_FAILURE); } } void lexer_init(lexer_t *lexer, char *filepath) { assert(lexer && "lexer must be defined"); assert(filepath && "filepath must be defined"); lexer->filepath = filepath; lexer->srclen = 0; lexer->cur = 0; lexer->row = 0; lexer->bol = 0; lexer_load_file_contents(lexer); } void lexer_drop_char(lexer_t *lexer) { lexer->cur++; } bool lexer_is_eof(lexer_t *lexer) { return lexer->cur >= lexer->srclen; } bool lexer_is_not_eof(lexer_t *lexer) { return !lexer_is_eof(lexer); } char lexer_current_char(lexer_t *lexer) { return lexer->src[lexer->cur]; } void lexer_next_token(lexer_t *lexer, token_t *token) { while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { if (lexer_current_char(lexer) == '\n') { lexer_drop_char(lexer); lexer->row++; lexer->bol = lexer->cur; continue; } lexer_drop_char(lexer); } if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { size_t begin = lexer->cur; while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_drop_char(lexer); } token->kind = TOKEN_NUMBER; token->value = strndup(lexer->src + begin, lexer->cur - begin); token->row = lexer->row; token->col = begin - lexer->bol; return; } if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { size_t begin = lexer->cur; while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { lexer_drop_char(lexer); } token->kind = TOKEN_NAME; token->value = strndup(lexer->src + begin, lexer->cur - begin); token->row = lexer->row; token->col = begin - lexer->bol; return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { token->kind = TOKEN_OPAREN; token->value = strdup("("); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { token->kind = TOKEN_CPAREN; token->value = strdup(")"); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { token->kind = TOKEN_COLON; token->value = strdup(":"); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { token->kind = TOKEN_SEMICOLON; token->value = strdup(";"); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { token->kind = TOKEN_OCURLY; token->value = strdup("{"); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { token->kind = TOKEN_CCURLY; token->value = strdup("}"); token->row = lexer->row; token->col = lexer->cur - lexer->bol; lexer_drop_char(lexer); return; } token->kind = TOKEN_EOF; return; } void print_usage() { fputs("pipac \n", stderr); } int main(int argc, char **argv) { if (argc < 2) { print_usage(); return EXIT_FAILURE; } char *filepath = argv[1]; lexer_t lexer; lexer_init(&lexer, filepath); printf("[INFO]: %ld bytes loaded [filename='%s']\n", lexer.srclen, lexer.filepath); token_t token; for (lexer_next_token(&lexer, &token); token.kind != TOKEN_EOF; lexer_next_token(&lexer, &token)) { printf("%s:%d:%d: [kind=%d, value='%s']\n", lexer.filepath, token.row + 1, token.col + 1, token.kind, token.value); } return EXIT_SUCCESS; }