From 841bc101123be63f0fc9810e7cca7127da7d3e3a Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Tue, 18 Apr 2023 09:18:46 -0300 Subject: lexer: Extract tokenization functions make the next token function small by extracting the functions that make tokens. Signed-off-by: Carlos Maniero Reviewed-by: Johnny Richard --- src/lexer.c | 55 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 3e48fd5..bc40efc 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -45,7 +45,7 @@ lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t ki token->col = lexer->cur - lexer->bol; } -void +static void lexer_drop_spaces(lexer_t *lexer) { while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) { @@ -59,35 +59,48 @@ lexer_drop_spaces(lexer_t *lexer) } } +static void +lexer_tokenize_number(lexer_t *lexer, token_t *token) +{ + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NUMBER; + token->value = string_view_new(lexer->src + begin, lexer->cur - begin); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = begin - lexer->bol; +} + +static void +lexer_tokenize_name(lexer_t *lexer, token_t *token) +{ + size_t begin = lexer->cur; + + while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { + lexer_drop_char(lexer); + } + token->kind = TOKEN_NAME; + token->value = string_view_new(lexer->src + begin, lexer->cur - begin); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = begin - lexer->bol; +} + void lexer_next_token(lexer_t *lexer, token_t *token) { lexer_drop_spaces(lexer); - if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NUMBER; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; + if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { + lexer_tokenize_number(lexer, token); return; } if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) { - size_t begin = lexer->cur; - - while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) { - lexer_drop_char(lexer); - } - token->kind = TOKEN_NAME; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; + lexer_tokenize_name(lexer, token); return; } -- cgit v1.2.3