diff options
author | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-14 21:04:18 +0200 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-14 21:04:18 +0200 |
commit | b4d3c6b5685024efc651e31e2308b0d76bbc3c06 (patch) | |
tree | 63fef1a04052cd185c07f2119593f0d3e23a3d4c /src | |
parent | e0f96e02d6277f92b24ea3afaa49d6c0a7a6731c (diff) |
lexer: Extract lexer.c and lexer.h from pipa.c
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer.c | 192 | ||||
-rw-r--r-- | src/lexer.h | 68 | ||||
-rw-r--r-- | src/pipac.c | 205 |
3 files changed, 261 insertions, 204 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..7b0206a --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,192 @@ +/* +* Copyright (C) 2023 Johnny Richard +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <https://www.gnu.org/licenses/>. +*/ + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include "lexer.h" + +void +lexer_init(lexer_t *lexer, char *filepath) +{ + assert(lexer && "lexer must be defined"); + assert(filepath && "filepath must be defined"); + lexer->filepath = filepath; + lexer->srclen = 0; + lexer->cur = 0; + lexer->row = 0; + lexer->bol = 0; + lexer_load_file_contents(lexer); +} + +void +lexer_next_token(lexer_t *lexer, token_t *token) +{ + while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { + if (lexer_current_char(lexer) == '\n') { + lexer_drop_char(lexer); + lexer->row++; + lexer->bol = lexer->cur; + continue; + } + lexer_drop_char(lexer); + } + + if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NUMBER; + token->value = strndup(lexer->src + begin, lexer->cur - begin); + token->row = lexer->row; + token->col = begin - lexer->bol; + return; + } + + if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NAME; + token->value = strndup(lexer->src + begin, lexer->cur - begin); + token->row = lexer->row; + token->col = begin - lexer->bol; + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { + token->kind = TOKEN_OPAREN; + token->value = strdup("("); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { + token->kind = TOKEN_CPAREN; + token->value = strdup(")"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { + token->kind = TOKEN_COLON; + token->value = strdup(":"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { + token->kind = TOKEN_SEMICOLON; + token->value = strdup(";"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { + token->kind = TOKEN_OCURLY; + token->value = strdup("{"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { + token->kind = TOKEN_CCURLY; + token->value = strdup("}"); + token->row = lexer->row; + token->col = lexer->cur - lexer->bol; + lexer_drop_char(lexer); + return; + } + + token->kind = TOKEN_EOF; + return; +} + +void +lexer_load_file_contents(lexer_t *lexer) +{ + assert(lexer && "lexer must be defined"); + + FILE *file; + file = fopen(lexer->filepath, "r"); + if (!file) { + fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); + exit(EXIT_FAILURE); + } + + fseek(file, 0L, SEEK_END); + lexer->srclen = ftell(file); + rewind(file); + + lexer->src = calloc(1, lexer->srclen + 1); + if (!lexer->src) { + fclose(file); + perror("lexer_load_file_contents -> calloc"); + exit(EXIT_FAILURE); + } + + if (fread(lexer->src, lexer->srclen, 1, file) != 1) { + fclose(file); + free(lexer->src); + // FIXME: distinguish error using ferror and feof functions + fprintf(stderr, "could not read file '%s'\n", lexer->filepath); + exit(EXIT_FAILURE); + } + +} + +void +lexer_drop_char(lexer_t *lexer) +{ + lexer->cur++; +} + +bool +lexer_is_eof(lexer_t *lexer) +{ + return lexer->cur >= lexer->srclen; +} + + +bool +lexer_is_not_eof(lexer_t *lexer) +{ + return !lexer_is_eof(lexer); +} + +char +lexer_current_char(lexer_t *lexer) +{ + return lexer->src[lexer->cur]; +} + diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..29cfc11 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,68 @@ +/* +* Copyright (C) 2023 Johnny Richard +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <https://www.gnu.org/licenses/>. +*/ +#ifndef LEXER_H +#define LEXER_H + +#include <stdint.h> +#include <stdlib.h> +#include <stdbool.h> + +typedef enum { + TOKEN_NAME, + TOKEN_OPAREN, + TOKEN_CPAREN, + TOKEN_COLON, + TOKEN_SEMICOLON, + TOKEN_OCURLY, + TOKEN_CCURLY, + TOKEN_NUMBER, + TOKEN_EOF +} token_kind_t; + +typedef struct token_t { + token_kind_t kind; + char *value; + uint32_t row; + uint32_t col; +} token_t; + + +typedef struct lexer_t { + char *filepath; + char *src; + size_t srclen; + uint32_t cur; + uint32_t row; + uint32_t bol; +} lexer_t; + +void lexer_init(lexer_t *lexer, char *filepath); + +void lexer_next_token(lexer_t *lexer, token_t *token); + +void lexer_load_file_contents(lexer_t *lexer); + +char lexer_current_char(lexer_t *lexer); + +bool lexer_is_eof(lexer_t *lexer); + +bool lexer_is_not_eof(lexer_t *lexer); + +void lexer_drop_char(lexer_t *lexer); + +#endif /* LEXER_H */ + diff --git a/src/pipac.c b/src/pipac.c index dba6109..f1e5865 100644 --- a/src/pipac.c +++ b/src/pipac.c @@ -14,212 +14,9 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <ctype.h> - -typedef enum { - TOKEN_NAME, - TOKEN_OPAREN, - TOKEN_CPAREN, - TOKEN_COLON, - TOKEN_SEMICOLON, - TOKEN_OCURLY, - TOKEN_CCURLY, - TOKEN_NUMBER, - TOKEN_EOF -} token_kind_t; - -typedef struct token_t { - token_kind_t kind; - char *value; - uint32_t row; - uint32_t col; -} token_t; - - -typedef struct lexer_t { - char *filepath; - char *src; - size_t srclen; - uint32_t cur; - uint32_t row; - uint32_t bol; -} lexer_t; - -void -lexer_load_file_contents(lexer_t *lexer) -{ - assert(lexer && "lexer must be defined"); - - FILE *file; - file = fopen(lexer->filepath, "r"); - if (!file) { - fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); - exit(EXIT_FAILURE); - } - - fseek(file, 0L, SEEK_END); - lexer->srclen = ftell(file); - rewind(file); - - lexer->src = calloc(1, lexer->srclen + 1); - if (!lexer->src) { - fclose(file); - perror("lexer_load_file_contents -> calloc"); - exit(EXIT_FAILURE); - } - - if (fread(lexer->src, lexer->srclen, 1, file) != 1) { - fclose(file); - free(lexer->src); - // FIXME: distinguish error using ferror and feof functions - fprintf(stderr, "could not read file '%s'\n", lexer->filepath); - exit(EXIT_FAILURE); - } - -} - -void -lexer_init(lexer_t *lexer, char *filepath) -{ - assert(lexer && "lexer must be defined"); - assert(filepath && "filepath must be defined"); - lexer->filepath = filepath; - lexer->srclen = 0; - lexer->cur = 0; - lexer->row = 0; - lexer->bol = 0; - lexer_load_file_contents(lexer); -} - -void -lexer_drop_char(lexer_t *lexer) -{ - lexer->cur++; -} - - -bool -lexer_is_eof(lexer_t *lexer) -{ - return lexer->cur >= lexer->srclen; -} - - -bool -lexer_is_not_eof(lexer_t *lexer) -{ - return !lexer_is_eof(lexer); -} - -char -lexer_current_char(lexer_t *lexer) -{ - return lexer->src[lexer->cur]; -} - -void -lexer_next_token(lexer_t *lexer, token_t *token) -{ - while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { - if (lexer_current_char(lexer) == '\n') { - lexer_drop_char(lexer); - lexer->row++; - lexer->bol = lexer->cur; - continue; - } - lexer_drop_char(lexer); - } - - if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NUMBER; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NAME; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { - token->kind = TOKEN_OPAREN; - token->value = strdup("("); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { - token->kind = TOKEN_CPAREN; - token->value = strdup(")"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { - token->kind = TOKEN_COLON; - token->value = strdup(":"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { - token->kind = TOKEN_SEMICOLON; - token->value = strdup(";"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { - token->kind = TOKEN_OCURLY; - token->value = strdup("{"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { - token->kind = TOKEN_CCURLY; - token->value = strdup("}"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - token->kind = TOKEN_EOF; - return; -} +#include "lexer.h" void print_usage() |