diff options
author | Carlos Maniero <carlosmaniero@gmail.com> | 2023-04-18 09:18:46 -0300 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-18 17:08:31 +0200 |
commit | 841bc101123be63f0fc9810e7cca7127da7d3e3a (patch) | |
tree | dbf1a06e26241ed04f659de7e903e0db05c98ab7 /src | |
parent | 4f85dcc986ffde45dda59ed0a64c075261ddbe07 (diff) |
lexer: Extract tokenization functions
make the next token function small by extracting the
functions that make tokens.
Signed-off-by: Carlos Maniero <carlosmaniero@gmail.com>
Reviewed-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer.c | 55 |
1 files changed, 34 insertions, 21 deletions
diff --git a/src/lexer.c b/src/lexer.c index 3e48fd5..bc40efc 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -45,7 +45,7 @@ lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t ki token->col = lexer->cur - lexer->bol; } -void +static void lexer_drop_spaces(lexer_t *lexer) { while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { @@ -59,35 +59,48 @@ lexer_drop_spaces(lexer_t *lexer) } } +static void +lexer_tokenize_number(lexer_t *lexer, token_t *token) +{ + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NUMBER; + token->value = string_view_new(lexer->src + begin, lexer->cur - begin); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = begin - lexer->bol; +} + +static void +lexer_tokenize_name(lexer_t *lexer, token_t *token) +{ + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NAME; + token->value = string_view_new(lexer->src + begin, lexer->cur - begin); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = begin - lexer->bol; +} + void lexer_next_token(lexer_t *lexer, token_t *token) { lexer_drop_spaces(lexer); - if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NUMBER; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; + if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_tokenize_number(lexer, token); return; } if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NAME; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; + lexer_tokenize_name(lexer, token); return; } |