summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorCarlos Maniero <carlosmaniero@gmail.com>2023-04-18 09:18:46 -0300
committerJohnny Richard <johnny@johnnyrichard.com>2023-04-18 17:08:31 +0200
commit841bc101123be63f0fc9810e7cca7127da7d3e3a (patch)
treedbf1a06e26241ed04f659de7e903e0db05c98ab7 /src/lexer.c
parent4f85dcc986ffde45dda59ed0a64c075261ddbe07 (diff)
lexer: Extract tokenization functions
make the next token function small by extracting the functions that make tokens. Signed-off-by: Carlos Maniero <carlosmaniero@gmail.com> Reviewed-by: Johnny Richard <johnny@johnnyrichard.com>
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c55
1 files changed, 34 insertions, 21 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 3e48fd5..bc40efc 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -45,7 +45,7 @@ lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t ki
token->col = lexer->cur - lexer->bol;
}
-void
+static void
lexer_drop_spaces(lexer_t *lexer)
{
while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
@@ -59,35 +59,48 @@ lexer_drop_spaces(lexer_t *lexer)
}
}
+static void
+lexer_tokenize_number(lexer_t *lexer, token_t *token)
+{
+ size_t begin = lexer->cur;
+
+ while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
+ lexer_drop_char(lexer);
+ }
+ token->kind = TOKEN_NUMBER;
+ token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
+ token->filepath = lexer->filepath;
+ token->row = lexer->row;
+ token->col = begin - lexer->bol;
+}
+
+static void
+lexer_tokenize_name(lexer_t *lexer, token_t *token)
+{
+ size_t begin = lexer->cur;
+
+ while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
+ lexer_drop_char(lexer);
+ }
+ token->kind = TOKEN_NAME;
+ token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
+ token->filepath = lexer->filepath;
+ token->row = lexer->row;
+ token->col = begin - lexer->bol;
+}
+
void
lexer_next_token(lexer_t *lexer, token_t *token)
{
lexer_drop_spaces(lexer);
- if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
- size_t begin = lexer->cur;
- while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
- lexer_drop_char(lexer);
- }
- token->kind = TOKEN_NUMBER;
- token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
- token->filepath = lexer->filepath;
- token->row = lexer->row;
- token->col = begin - lexer->bol;
+ if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
+ lexer_tokenize_number(lexer, token);
return;
}
if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) {
- size_t begin = lexer->cur;
-
- while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
- lexer_drop_char(lexer);
- }
- token->kind = TOKEN_NAME;
- token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
- token->filepath = lexer->filepath;
- token->row = lexer->row;
- token->col = begin - lexer->bol;
+ lexer_tokenize_name(lexer, token);
return;
}