summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohnny Richard <johnny@johnnyrichard.com>2023-04-18 18:33:36 +0200
committerCarlos Maniero <carlosmaniero@gmail.com>2023-04-18 13:40:37 -0300
commit8b9899bcf338d4aca95419dfd1006f7f822a16c9 (patch)
treefec4c33cbd91f8be121c6f0bbeb36a2dc249814b
parent3433ac8bd34f302cec30f7d2faeb00a9c77e5ef8 (diff)
lexer: Add tokenizer for OP and UNKNOWN tokens
We want to tokenizer arithmetic expressions. We are handling exceptional cases with UNKNOWN token. Co-authored-by: Carlos Maniero <carlosmaniero@gmail.com> Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
-rw-r--r--src/lexer.c21
-rw-r--r--src/lexer.h4
-rw-r--r--test/lexer_test.c38
3 files changed, 51 insertions, 12 deletions
diff --git a/src/lexer.c b/src/lexer.c
index bc40efc..6c38f6e 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -18,6 +18,7 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "lexer.h"
@@ -39,7 +40,7 @@ static void
lexer_define_literal_token_props(lexer_t *lexer, token_t *token, token_kind_t kind)
{
token->kind = kind;
- token->value = string_view_new(lexer->src, 1);
+ token->value = string_view_new(lexer->src + lexer->cur, 1);
token->filepath = lexer->filepath;
token->row = lexer->row;
token->col = lexer->cur - lexer->bol;
@@ -140,9 +141,19 @@ lexer_next_token(lexer_t *lexer, token_t *token)
lexer_drop_char(lexer);
return;
}
+
+ if (lexer_current_char(lexer) == '+'
+ || lexer_current_char(lexer) == '-'
+ || lexer_current_char(lexer) == '*'
+ || lexer_current_char(lexer) == '/') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_OP);
+ lexer_drop_char(lexer);
+ return;
+ }
}
- token->kind = TOKEN_EOF;
+ lexer_define_literal_token_props(lexer, token, TOKEN_UNKNOWN);
+ lexer_drop_char(lexer);
return;
}
@@ -224,10 +235,14 @@ token_kind_to_str(token_kind_t kind)
return "}";
case TOKEN_NUMBER:
return "TOKEN_NUMBER";
+ case TOKEN_OP:
+ return "TOKEN_OP";
case TOKEN_EOF:
return "TOKEN_EOF";
+ case TOKEN_UNKNOWN:
+ return "TOKEN_UNKNOWN";
default:
- return "UNKNOW_TOKEN";
+ assert(false && "unreachable");
}
}
diff --git a/src/lexer.h b/src/lexer.h
index a091fb7..ae8ab60 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -31,7 +31,9 @@ typedef enum {
TOKEN_OCURLY,
TOKEN_CCURLY,
TOKEN_NUMBER,
- TOKEN_EOF
+ TOKEN_OP,
+ TOKEN_EOF,
+ TOKEN_UNKNOWN
} token_kind_t;
typedef struct token_t {
diff --git a/test/lexer_test.c b/test/lexer_test.c
index 1fc93cc..4a31bbf 100644
--- a/test/lexer_test.c
+++ b/test/lexer_test.c
@@ -30,18 +30,19 @@ make_lexer_from_static_src(lexer_t *lexer, char *src, int srclen)
}
void
-assert_tokenize_number(char *str, char *expected)
+assert_token(token_kind_t expected_kind, char *source, char *expected)
{
lexer_t lexer;
token_t token;
- make_lexer_from_static_src(&lexer, str, strlen(str));
+ make_lexer_from_static_src(&lexer, source, strlen(source));
lexer_next_token(&lexer, &token);
char actual[token.value.size + 1];
string_view_to_str(&token.value, actual);
- assert_string_equal("TOKEN_NUMBER", token_kind_to_str(token.kind));
+ assert_string_equal(token_kind_to_str(expected_kind), token_kind_to_str(token.kind));
+ assert_int(expected_kind, ==, token.kind);
assert_string_equal(expected, actual);
}
@@ -49,18 +50,39 @@ static MunitResult
test_tokenize_number(const MunitParameter params[],
void *user_data_or_fixture)
{
- lexer_t lexer;
- token_t token;
+ assert_token(TOKEN_NUMBER, "1", "1");
+ assert_token(TOKEN_NUMBER, " 13 ", "13");
+ assert_token(TOKEN_NUMBER, " \n 13 ", "13");
+
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_tokenize_op(const MunitParameter params[],
+ void *user_data_or_fixture)
+{
+ assert_token(TOKEN_OP, " + 2", "+");
+ assert_token(TOKEN_OP, " - \n", "-");
+ assert_token(TOKEN_OP, " * ;", "*");
+ assert_token(TOKEN_OP, " / ", "/");
- assert_tokenize_number("1", "1");
- assert_tokenize_number(" 13 ", "13");
- assert_tokenize_number(" \n 13 ", "13");
+ return MUNIT_OK;
+}
+
+static MunitResult
+test_tokenize_unknown(const MunitParameter params[],
+ void *user_data_or_fixture)
+{
+ assert_token(TOKEN_UNKNOWN, " @ ", "@");
+ assert_token(TOKEN_UNKNOWN, " $ ", "$");
return MUNIT_OK;
}
static MunitTest tests[] = {
{ "/test_tokenize_digit", test_tokenize_number, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_tokenize_op", test_tokenize_op, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_tokenize_unknown", test_tokenize_unknown, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }
};