diff options
author | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-18 18:33:36 +0200 |
---|---|---|
committer | Carlos Maniero <carlosmaniero@gmail.com> | 2023-04-18 13:40:37 -0300 |
commit | 8b9899bcf338d4aca95419dfd1006f7f822a16c9 (patch) | |
tree | fec4c33cbd91f8be121c6f0bbeb36a2dc249814b | |
parent | 3433ac8bd34f302cec30f7d2faeb00a9c77e5ef8 (diff) |
lexer: Add tokenizer for OP and UNKNOWN tokens
We want to tokenizer arithmetic expressions.
We are handling exceptional cases with UNKNOWN token.
Co-authored-by: Carlos Maniero <carlosmaniero@gmail.com>
Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
-rw-r--r-- | src/lexer.c | 21 | ||||
-rw-r--r-- | src/lexer.h | 4 | ||||
-rw-r--r-- | test/lexer_test.c | 38 |
3 files changed, 51 insertions, 12 deletions
diff --git a/src/lexer.c b/src/lexer.c index bc40efc..6c38f6e 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -18,6 +18,7 @@ #include <assert.h> #include <ctype.h> #include <errno.h> +#include <stdbool.h> #include <stdio.h> #include <string.h> #include "lexer.h" @@ -39,7 +40,7 @@ static void lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t kind) { token->kind = kind; - token->value = string_view_new(lexer->src, 1); + token->value = string_view_new(lexer->src + lexer->cur, 1); token->filepath = lexer->filepath; token->row = lexer->row; token->col = lexer->cur - lexer->bol; @@ -140,9 +141,19 @@ lexer_next_token(lexer_t *lexer, token_t *token) lexer_drop_char(lexer); return; } + + if (lexer_current_char(lexer) == '+' + || lexer_current_char(lexer) == '-' + || lexer_current_char(lexer) == '*' + || lexer_current_char(lexer) == '/') { + lexer_define_literal_token_props(lexer, token, TOKEN_OP); + lexer_drop_char(lexer); + return; + } } - token->kind = TOKEN_EOF; + lexer_define_literal_token_props(lexer, token, TOKEN_UNKNOWN); + lexer_drop_char(lexer); return; } @@ -224,10 +235,14 @@ token_kind_to_str(token_kind_t kind) return "}"; case TOKEN_NUMBER: return "TOKEN_NUMBER"; + case TOKEN_OP: + return "TOKEN_OP"; case TOKEN_EOF: return "TOKEN_EOF"; + case TOKEN_UNKNOWN: + return "TOKEN_UNKNOWN"; default: - return "UNKNOW_TOKEN"; + assert(false && "unreachable"); } } diff --git a/src/lexer.h b/src/lexer.h index a091fb7..ae8ab60 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -31,7 +31,9 @@ typedef enum { TOKEN_OCURLY, TOKEN_CCURLY, TOKEN_NUMBER, - TOKEN_EOF + TOKEN_OP, + TOKEN_EOF, + TOKEN_UNKNOWN } token_kind_t; typedef struct token_t { diff --git a/test/lexer_test.c b/test/lexer_test.c index 1fc93cc..4a31bbf 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -30,18 +30,19 @@ make_lexer_from_static_src(lexer_t *lexer, char *src, int srclen) } void -assert_tokenize_number(char *str, char *expected) +assert_token(token_kind_t expected_kind, char *source, char *expected) { lexer_t lexer; token_t token; - make_lexer_from_static_src(&lexer, str, strlen(str)); + make_lexer_from_static_src(&lexer, source, strlen(source)); lexer_next_token(&lexer, &token); char actual[token.value.size + 1]; string_view_to_str(&token.value, actual); - assert_string_equal("TOKEN_NUMBER", token_kind_to_str(token.kind)); + assert_string_equal(token_kind_to_str(expected_kind), token_kind_to_str(token.kind)); + assert_int(expected_kind, ==, token.kind); assert_string_equal(expected, actual); } @@ -49,18 +50,39 @@ static MunitResult test_tokenize_number(const MunitParameter params[], void *user_data_or_fixture) { - lexer_t lexer; - token_t token; + assert_token(TOKEN_NUMBER, "1", "1"); + assert_token(TOKEN_NUMBER, " 13 ", "13"); + assert_token(TOKEN_NUMBER, " \n 13 ", "13"); + + return MUNIT_OK; +} + +static MunitResult +test_tokenize_op(const MunitParameter params[], + void *user_data_or_fixture) +{ + assert_token(TOKEN_OP, " + 2", "+"); + assert_token(TOKEN_OP, " - \n", "-"); + assert_token(TOKEN_OP, " * ;", "*"); + assert_token(TOKEN_OP, " / ", "/"); - assert_tokenize_number("1", "1"); - assert_tokenize_number(" 13 ", "13"); - assert_tokenize_number(" \n 13 ", "13"); + return MUNIT_OK; +} + +static MunitResult +test_tokenize_unknown(const MunitParameter params[], + void *user_data_or_fixture) +{ + assert_token(TOKEN_UNKNOWN, " @ ", "@"); + assert_token(TOKEN_UNKNOWN, " $ ", "$"); return MUNIT_OK; } static MunitTest tests[] = { { "/test_tokenize_digit", test_tokenize_number, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_tokenize_op", test_tokenize_op, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_tokenize_unknown", test_tokenize_unknown, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } }; |