diff options
Diffstat (limited to 'src/pipac.c')
-rw-r--r-- | src/pipac.c | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/src/pipac.c b/src/pipac.c new file mode 100644 index 0000000..dba6109 --- /dev/null +++ b/src/pipac.c @@ -0,0 +1,256 @@ +/* +* Copyright (C) 2023 Johnny Richard +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <https://www.gnu.org/licenses/>. +*/ +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> + +typedef enum { + TOKEN_NAME, + TOKEN_OPAREN, + TOKEN_CPAREN, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_OCURLY, + TOKEN_CCURLY, + TOKEN_NUMBER, + TOKEN_EOF +} token_kind_t; + +typedef struct token_t { + token_kind_t kind; + char *value; + uint32_t row; + uint32_t col; +} token_t; + + +typedef struct lexer_t { + char *filepath; + char *src; + size_t srclen; + uint32_t cur; + uint32_t row; + uint32_t bol; +} lexer_t; + +void +lexer_load_file_contents(lexer_t *lexer) +{ + assert(lexer && "lexer must be defined"); + + FILE *file; + file = fopen(lexer->filepath, "r"); + if (!file) { + fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); + exit(EXIT_FAILURE); + } + + fseek(file, 0L, SEEK_END); + lexer->srclen = ftell(file); + rewind(file); + + lexer->src = calloc(1, lexer->srclen + 1); + if (!lexer->src) { + fclose(file); + perror("lexer_load_file_contents -> calloc"); + exit(EXIT_FAILURE); + } + + if (fread(lexer->src, lexer->srclen, 1, file) != 1) { + fclose(file); + free(lexer->src); + // FIXME: distinguish error using ferror and feof functions + fprintf(stderr, "could not read file '%s'\n", lexer->filepath); + exit(EXIT_FAILURE); + } + +} + +void +lexer_init(lexer_t *lexer, char *filepath) +{ + assert(lexer && "lexer must be defined"); + assert(filepath && "filepath must be defined"); + lexer->filepath = filepath; + lexer->srclen = 0; + lexer->cur = 0; + lexer->row = 0; + lexer->bol = 0; + lexer_load_file_contents(lexer); +} + +void +lexer_drop_char(lexer_t *lexer) +{ + lexer->cur++; +} + + +bool +lexer_is_eof(lexer_t *lexer) +{ + return lexer->cur >= lexer->srclen; +} + + +bool +lexer_is_not_eof(lexer_t *lexer) +{ + return !lexer_is_eof(lexer); +} + +char +lexer_current_char(lexer_t *lexer) +{ + return lexer->src[lexer->cur]; +} + +void +lexer_next_token(lexer_t *lexer, token_t *token) +{ + while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { + if (lexer_current_char(lexer) == '\n') { + lexer_drop_char(lexer); + lexer->row++; + lexer->bol = lexer->cur; + continue; + } + lexer_drop_char(lexer); + } + + if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NUMBER; + token->value = strndup(lexer->src + begin, lexer->cur - begin); + token->row = lexer->row; + token->col = begin - lexer->bol; + return; + } + + if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NAME; + token->value = strndup(lexer->src + begin, lexer->cur - begin); + token->row = lexer->row; + token->col = begin - lexer->bol; + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { + token->kind = TOKEN_OPAREN; + token->value = strdup("("); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { + token->kind = TOKEN_CPAREN; + token->value = strdup(")"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { + token->kind = TOKEN_COLON; + token->value = strdup(":"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { + token->kind = TOKEN_SEMICOLON; + token->value = strdup(";"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { + token->kind = TOKEN_OCURLY; + token->value = strdup("{"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { + token->kind = TOKEN_CCURLY; + token->value = strdup("}"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + token->kind = TOKEN_EOF; + return; +} + +void +print_usage() +{ + fputs("pipac <filename.pipa>\n", stderr); +} + +void +print_tokens(lexer_t *lexer) { + token_t token; + for (lexer_next_token(lexer, &token); token.kind != TOKEN_EOF; lexer_next_token(lexer, &token)) { + printf("%s:%d:%d: [kind=%d, value='%s']\n", lexer->filepath, token.row + 1, token.col + 1, token.kind, token.value); + } +} + +int +main(int argc, char **argv) +{ + if (argc < 2) { + print_usage(); + return EXIT_FAILURE; + } + + char *filepath = argv[1]; + + lexer_t lexer; + lexer_init(&lexer, filepath); + + printf("[INFO]: %ld bytes loaded [filename='%s']\n", lexer.srclen, lexer.filepath); + + print_tokens(&lexer); + + return EXIT_SUCCESS; +} |