diff options
author | Carlos Maniero <carlos@maniero.me> | 2023-05-06 12:29:05 -0300 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-05-06 22:53:49 +0200 |
commit | ccd5e8585f10488eed72c772cc1804efea6b8fb4 (patch) | |
tree | f5690cb92e94758d44f5c29715c0d2415659fa7c | |
parent | 6a297e0ef57f2ae6d6134bd44a33c55fa9628cfe (diff) |
lexer: Tokenize logical and bitwise operators
The followed logic operators were added to lexer:
TOKEN_EQUAL ==
TOKEN_NOT !
TOKEN_NOT_EQUAL !=
TOKEN_GT >
TOKEN_GT_EQUAL >=
TOKEN_LT <
TOKEN_LT_EQUAL <=
TOKEN_AND &&
TOKEN_OR ||
Bitwise operators were also added
TOKEN_BITWISE_AND &
TOKEN_BITWISE_OR |
TOKEN_BITWISE_SHIFT_LEFT <<
TOKEN_BITWISE_SHIFT_RIGHT >>
TOKEN_BITWISE_XOR ^
TOKEN_BITWISE_NOT ~
TOKEN_EQUAL '=' was renamed TOKEN_ASSIGN, and now TOKEN_EQUAL is used
for the logical comparator '=='.
Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r-- | src/lexer.c | 152 | ||||
-rw-r--r-- | src/lexer.h | 23 | ||||
-rw-r--r-- | src/parser.c | 6 | ||||
-rw-r--r-- | test/lexer_test.c | 17 |
4 files changed, 173 insertions, 25 deletions
diff --git a/src/lexer.c b/src/lexer.c index 5a6d245..72c27cd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -62,6 +62,17 @@ lexer_drop_spaces(lexer_t *lexer) } static void +lexer_tokenize_from_given_position_to_cursor(lexer_t *lexer, token_t *token, size_t position, token_kind_t kind) +{ + token->kind = kind; + token->value = string_view_new(lexer->src + position, lexer->cur - position); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = position - lexer->bol; + token->bol = lexer->bol; +} + +static void lexer_tokenize_number(lexer_t *lexer, token_t *token) { size_t begin = lexer->cur; @@ -69,12 +80,8 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token) while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_drop_char(lexer); } - token->kind = TOKEN_NUMBER; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NUMBER); } static void @@ -86,12 +93,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token) lexer_drop_char(lexer); } - token->kind = TOKEN_NAME; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NAME); } static void @@ -194,7 +196,99 @@ lexer_next_token(lexer_t *lexer, token_t *token) } if (lexer_current_char(lexer) == '=') { - lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL); + lexer_define_literal_token_props(lexer, token, TOKEN_ASSIGN); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '!') { + lexer_define_literal_token_props(lexer, token, TOKEN_NOT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_NOT_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_define_literal_token_props(lexer, token, TOKEN_GT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_GT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_RIGHT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_define_literal_token_props(lexer, token, TOKEN_LT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_LT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_LEFT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '&') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_AND); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '&') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_AND); + return; + } + return; + } + + if (lexer_current_char(lexer) == '|') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_OR); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '|') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_OR); + return; + } + return; + } + + if (lexer_current_char(lexer) == '^') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_XOR); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '~') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_NOT); lexer_drop_char(lexer); return; } @@ -322,8 +416,38 @@ token_kind_to_str(token_kind_t kind) return "*"; case TOKEN_SLASH: return "/"; - case TOKEN_EQUAL: + case TOKEN_ASSIGN: return "="; + case TOKEN_EQUAL: + return "=="; + case TOKEN_NOT: + return "!"; + case TOKEN_NOT_EQUAL: + return "!="; + case TOKEN_GT: + return ">"; + case TOKEN_GT_EQUAL: + return ">="; + case TOKEN_LT: + return "<"; + case TOKEN_LT_EQUAL: + return "<="; + case TOKEN_AND: + return "&&"; + case TOKEN_OR: + return "||"; + case TOKEN_BITWISE_AND: + return "&"; + case TOKEN_BITWISE_OR: + return "|"; + case TOKEN_BITWISE_SHIFT_LEFT: + return "<<"; + case TOKEN_BITWISE_SHIFT_RIGHT: + return ">>"; + case TOKEN_BITWISE_XOR: + return "^"; + case TOKEN_BITWISE_NOT: + return "~"; case TOKEN_KEYWORD_RETURN: return "return"; case TOKEN_KEYWORD_FN: diff --git a/src/lexer.h b/src/lexer.h index 6449a0a..dd442cc 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -24,23 +24,42 @@ typedef enum { + // Non-Literal Tokens TOKEN_NAME, + TOKEN_NUMBER, + TOKEN_EOF, + + // Literal Tokens TOKEN_OPAREN, TOKEN_CPAREN, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_OCURLY, TOKEN_CCURLY, - TOKEN_NUMBER, TOKEN_PLUS, TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, + TOKEN_ASSIGN, TOKEN_EQUAL, + TOKEN_NOT, + TOKEN_NOT_EQUAL, + TOKEN_GT, + TOKEN_GT_EQUAL, + TOKEN_LT, + TOKEN_LT_EQUAL, + TOKEN_AND, + TOKEN_OR, + TOKEN_BITWISE_AND, + TOKEN_BITWISE_OR, + TOKEN_BITWISE_SHIFT_LEFT, + TOKEN_BITWISE_SHIFT_RIGHT, + TOKEN_BITWISE_XOR, + TOKEN_BITWISE_NOT, TOKEN_KEYWORD_RETURN, TOKEN_KEYWORD_FN, TOKEN_KEYWORD_LET, - TOKEN_EOF, + TOKEN_UNKNOWN } token_kind_t; diff --git a/src/parser.c b/src/parser.c index 49803eb..baa2ef5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -264,7 +264,7 @@ parser_parse_variable_assignment(parser_t *parser) return NULL; } - if (!drop_expected_token(parser, TOKEN_EQUAL)) + if (!drop_expected_token(parser, TOKEN_ASSIGN)) return NULL; ast_node_t *expression = parser_parse_expression(parser); @@ -318,7 +318,7 @@ parser_parse_variable_declaration(parser_t *parser) return NULL; } - if (!drop_expected_token(parser, TOKEN_EQUAL)) { + if (!drop_expected_token(parser, TOKEN_ASSIGN)) { return NULL; } @@ -360,7 +360,7 @@ is_next_statement_a_variable_assignement(parser_t *parser) lexer_lookahead(parser->lexer, &token, 2); - return token.kind == TOKEN_EQUAL; + return token.kind == TOKEN_ASSIGN; } static bool diff --git a/test/lexer_test.c b/test/lexer_test.c index 3c43342..87928b4 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -17,6 +17,7 @@ #define MUNIT_ENABLE_ASSERT_ALIASES #include "lexer.h" #include "munit.h" +#include <stdio.h> void make_lexer_from_static_src(lexer_t *lexer, char *src) @@ -88,17 +89,21 @@ test_tokenize_name(const MunitParameter params[], void *user_data_or_fixture) } static MunitResult -test_tokenize_op(const MunitParameter params[], void *user_data_or_fixture) +test_tokenize_basic_check(const MunitParameter params[], void *user_data_or_fixture) { assert_token_at(" + 2", 0, TOKEN_PLUS, "+"); assert_token_at(" - \n", 0, TOKEN_MINUS, "-"); assert_token_at(" * ;", 0, TOKEN_STAR, "*"); assert_token_at(" / ", 0, TOKEN_SLASH, "/"); - assert_token_at(" = ", 0, TOKEN_EQUAL, "="); - assert_token_at("1 * 2", 0, TOKEN_NUMBER, "1"); - assert_token_at("1 * 2", 1, TOKEN_STAR, "*"); - assert_token_at("1 * 2", 2, TOKEN_NUMBER, "2"); + for (size_t kind = TOKEN_OPAREN; kind < TOKEN_UNKNOWN; kind++) { + char source[128]; + sprintf(source, "1 %s 2", token_kind_to_str(kind)); + + assert_token_at(source, 0, TOKEN_NUMBER, "1"); + assert_token_at(source, 1, kind, token_kind_to_str(kind)); + assert_token_at(source, 2, TOKEN_NUMBER, "2"); + } return MUNIT_OK; } @@ -134,7 +139,7 @@ static MunitTest tests[] = { { "/test_tokenize_digit", test_tokenize_number, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_tokenize_keywords", test_tokenize_keywords, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_tokenize_name", test_tokenize_name, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, - { "/test_tokenize_op", test_tokenize_op, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_tokenize_basic_check", test_tokenize_basic_check, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_tokenize_unknown", test_tokenize_unknown, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { "/test_peek_next_token", test_peek_next_token, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } |