diff options
author | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-14 21:04:18 +0200 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-14 21:04:18 +0200 |
commit | b4d3c6b5685024efc651e31e2308b0d76bbc3c06 (patch) | |
tree | 63fef1a04052cd185c07f2119593f0d3e23a3d4c /src/pipac.c | |
parent | e0f96e02d6277f92b24ea3afaa49d6c0a7a6731c (diff) |
lexer: Extract lexer.c and lexer.h from pipa.c
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src/pipac.c')
-rw-r--r-- | src/pipac.c | 205 |
1 files changed, 1 insertions, 204 deletions
diff --git a/src/pipac.c b/src/pipac.c index dba6109..f1e5865 100644 --- a/src/pipac.c +++ b/src/pipac.c @@ -14,212 +14,9 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <ctype.h> - -typedef enum { - TOKEN_NAME, - TOKEN_OPAREN, - TOKEN_CPAREN, - TOKEN_COLON, - TOKEN_SEMICOLON, - TOKEN_OCURLY, - TOKEN_CCURLY, - TOKEN_NUMBER, - TOKEN_EOF -} token_kind_t; - -typedef struct token_t { - token_kind_t kind; - char *value; - uint32_t row; - uint32_t col; -} token_t; - - -typedef struct lexer_t { - char *filepath; - char *src; - size_t srclen; - uint32_t cur; - uint32_t row; - uint32_t bol; -} lexer_t; - -void -lexer_load_file_contents(lexer_t *lexer) -{ - assert(lexer && "lexer must be defined"); - - FILE *file; - file = fopen(lexer->filepath, "r"); - if (!file) { - fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno)); - exit(EXIT_FAILURE); - } - - fseek(file, 0L, SEEK_END); - lexer->srclen = ftell(file); - rewind(file); - - lexer->src = calloc(1, lexer->srclen + 1); - if (!lexer->src) { - fclose(file); - perror("lexer_load_file_contents -> calloc"); - exit(EXIT_FAILURE); - } - - if (fread(lexer->src, lexer->srclen, 1, file) != 1) { - fclose(file); - free(lexer->src); - // FIXME: distinguish error using ferror and feof functions - fprintf(stderr, "could not read file '%s'\n", lexer->filepath); - exit(EXIT_FAILURE); - } - -} - -void -lexer_init(lexer_t *lexer, char *filepath) -{ - assert(lexer && "lexer must be defined"); - assert(filepath && "filepath must be defined"); - lexer->filepath = filepath; - lexer->srclen = 0; - lexer->cur = 0; - lexer->row = 0; - lexer->bol = 0; - lexer_load_file_contents(lexer); -} - -void -lexer_drop_char(lexer_t *lexer) -{ - lexer->cur++; -} - - -bool -lexer_is_eof(lexer_t *lexer) -{ - return lexer->cur >= lexer->srclen; -} - - -bool -lexer_is_not_eof(lexer_t *lexer) -{ - return !lexer_is_eof(lexer); -} - -char -lexer_current_char(lexer_t *lexer) -{ - return lexer->src[lexer->cur]; -} - -void -lexer_next_token(lexer_t *lexer, token_t *token) -{ - while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { - if (lexer_current_char(lexer) == '\n') { - lexer_drop_char(lexer); - lexer->row++; - lexer->bol = lexer->cur; - continue; - } - lexer_drop_char(lexer); - } - - if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NUMBER; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NAME; - token->value = strndup(lexer->src + begin, lexer->cur - begin); - token->row = lexer->row; - token->col = begin - lexer->bol; - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') { - token->kind = TOKEN_OPAREN; - token->value = strdup("("); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') { - token->kind = TOKEN_CPAREN; - token->value = strdup(")"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') { - token->kind = TOKEN_COLON; - token->value = strdup(":"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') { - token->kind = TOKEN_SEMICOLON; - token->value = strdup(";"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') { - token->kind = TOKEN_OCURLY; - token->value = strdup("{"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') { - token->kind = TOKEN_CCURLY; - token->value = strdup("}"); - token->row = lexer->row; - token->col = lexer->cur - lexer->bol; - lexer_drop_char(lexer); - return; - } - - token->kind = TOKEN_EOF; - return; -} +#include "lexer.h" void print_usage() |