From b4d3c6b5685024efc651e31e2308b0d76bbc3c06 Mon Sep 17 00:00:00 2001 From: Johnny Richard Date: Fri, 14 Apr 2023 21:04:18 +0200 Subject: lexer: Extract lexer.c and lexer.h from pipa.c Signed-off-by: Johnny Richard --- src/pipac.c | 205 +----------------------------------------------------------- 1 file changed, 1 insertion(+), 204 deletions(-) (limited to 'src/pipac.c') diff --git a/src/pipac.c b/src/pipac.c index dba6109..f1e5865 100644 --- a/src/pipac.c +++ b/src/pipac.c @@ -14,212 +14,9 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include -#include -#include #include #include -#include -#include -#include - -typedef enum { - TOKEN_NAME, - TOKEN_OPAREN, - TOKEN_CPAREN, - TOKEN_COLON, - TOKEN_SEMICOLON, - TOKEN_OCURLY, - TOKEN_CCURLY, - TOKEN_NUMBER, - TOKEN_EOF -} token_kind_t; - -typedef struct token_t { - token_kind_t kind; - char *value; - uint32_t row; - uint32_t col; -} token_t; - - -typedef struct lexer_t { - char *filepath; - char *src; - size_t srclen; - uint32_t cur; - uint32_t row; - uint32_t bol; -} lexer_t; - -void -lexer_load_file_contents(lexer_t *lexer) -{ - assert(lexer && "lexer must be defined"); - - FILE *file; - file = fopen(lexer->filepath, "r"); - if (!file) { - fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); - exit(EXIT_FAILURE); - } - - fseek(file, 0L, SEEK_END); - lexer->srclen = ftell(file); - rewind(file); - - lexer->src = calloc(1, lexer->srclen + 1); - if (!lexer->src) { - fclose(file); - perror("lexer_load_file_contents -> calloc"); - exit(EXIT_FAILURE); - } - - if (fread(lexer->src, lexer->srclen, 1, file) != 1) { - fclose(file); - free(lexer->src); - // FIXME: distinguish error using ferror and feof functions - fprintf(stderr, "could not read file '%s'\n", lexer->filepath); - exit(EXIT_FAILURE); - } - -} - -void -lexer_init(lexer_t *lexer, char *filepath) -{ - assert(lexer && "lexer must be defined"); - assert(filepath && "filepath must be defined"); - lexer->filepath = filepath; - lexer->srclen = 0; - lexer->cur = 0; - lexer->row = 0; - lexer->bol = 0; - lexer_load_file_contents(lexer); -} - -void -lexer_drop_char(lexer_t *lexer) -{ - lexer->cur++; -} - - -bool -lexer_is_eof(lexer_t *lexer) -{ - return lexer->cur >= lexer->srclen; -} - - -bool -lexer_is_not_eof(lexer_t *lexer) -{ - return !lexer_is_eof(lexer); -} - -char -lexer_current_char(lexer_t *lexer) -{ - return lexer->src[lexer->cur]; -} - -void -lexer_next_token(lexer_t *lexer, token_t *token) -{ - while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { - if (lexer_current_char(lexer) == '\n') { - lexer_drop_char(lexer); - lexer->row++; - lexer->bol = lexer->cur; - continue; - } - lexer_drop_char(lexer); - } - - if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NUMBER; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NAME; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { - token->kind = TOKEN_OPAREN; - token->value = strdup("("); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { - token->kind = TOKEN_CPAREN; - token->value = strdup(")"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { - token->kind = TOKEN_COLON; - token->value = strdup(":"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { - token->kind = TOKEN_SEMICOLON; - token->value = strdup(";"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { - token->kind = TOKEN_OCURLY; - token->value = strdup("{"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { - token->kind = TOKEN_CCURLY; - token->value = strdup("}"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - token->kind = TOKEN_EOF; - return; -} +#include "lexer.h" void print_usage() -- cgit v1.2.3