summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Maniero <carlos@maniero.me>2023-05-06 12:29:05 -0300
committerJohnny Richard <johnny@johnnyrichard.com>2023-05-06 22:53:49 +0200
commitccd5e8585f10488eed72c772cc1804efea6b8fb4 (patch)
treef5690cb92e94758d44f5c29715c0d2415659fa7c
parent6a297e0ef57f2ae6d6134bd44a33c55fa9628cfe (diff)
lexer: Tokenize logical and bitwise operators
The followed logic operators were added to lexer: TOKEN_EQUAL == TOKEN_NOT ! TOKEN_NOT_EQUAL != TOKEN_GT > TOKEN_GT_EQUAL >= TOKEN_LT < TOKEN_LT_EQUAL <= TOKEN_AND && TOKEN_OR || Bitwise operators were also added TOKEN_BITWISE_AND & TOKEN_BITWISE_OR | TOKEN_BITWISE_SHIFT_LEFT << TOKEN_BITWISE_SHIFT_RIGHT >> TOKEN_BITWISE_XOR ^ TOKEN_BITWISE_NOT ~ TOKEN_EQUAL '=' was renamed TOKEN_ASSIGN, and now TOKEN_EQUAL is used for the logical comparator '=='. Signed-off-by: Carlos Maniero <carlos@maniero.me>
-rw-r--r--src/lexer.c152
-rw-r--r--src/lexer.h23
-rw-r--r--src/parser.c6
-rw-r--r--test/lexer_test.c17
4 files changed, 173 insertions, 25 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 5a6d245..72c27cd 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -62,6 +62,17 @@ lexer_drop_spaces(lexer_t *lexer)
}
static void
+lexer_tokenize_from_given_position_to_cursor(lexer_t *lexer, token_t *token, size_t position, token_kind_t kind)
+{
+ token->kind = kind;
+ token->value = string_view_new(lexer->src + position, lexer->cur - position);
+ token->filepath = lexer->filepath;
+ token->row = lexer->row;
+ token->col = position - lexer->bol;
+ token->bol = lexer->bol;
+}
+
+static void
lexer_tokenize_number(lexer_t *lexer, token_t *token)
{
size_t begin = lexer->cur;
@@ -69,12 +80,8 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token)
while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
lexer_drop_char(lexer);
}
- token->kind = TOKEN_NUMBER;
- token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
- token->filepath = lexer->filepath;
- token->row = lexer->row;
- token->col = begin - lexer->bol;
- token->bol = lexer->bol;
+
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NUMBER);
}
static void
@@ -86,12 +93,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token)
lexer_drop_char(lexer);
}
- token->kind = TOKEN_NAME;
- token->value = string_view_new(lexer->src + begin, lexer->cur - begin);
- token->filepath = lexer->filepath;
- token->row = lexer->row;
- token->col = begin - lexer->bol;
- token->bol = lexer->bol;
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NAME);
}
static void
@@ -194,7 +196,99 @@ lexer_next_token(lexer_t *lexer, token_t *token)
}
if (lexer_current_char(lexer) == '=') {
- lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL);
+ lexer_define_literal_token_props(lexer, token, TOKEN_ASSIGN);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '=') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_EQUAL);
+ return;
+ }
+
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '!') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_NOT);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '=') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_NOT_EQUAL);
+ return;
+ }
+
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '>') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_GT);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '=') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_GT_EQUAL);
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '>') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_RIGHT);
+ return;
+ }
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '<') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_LT);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '=') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_LT_EQUAL);
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '<') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_LEFT);
+ return;
+ }
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '&') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_AND);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '&') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_AND);
+ return;
+ }
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '|') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_OR);
+ lexer_drop_char(lexer);
+
+ if (lexer_current_char(lexer) == '|') {
+ lexer_drop_char(lexer);
+ lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_OR);
+ return;
+ }
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '^') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_XOR);
+ lexer_drop_char(lexer);
+ return;
+ }
+
+ if (lexer_current_char(lexer) == '~') {
+ lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_NOT);
lexer_drop_char(lexer);
return;
}
@@ -322,8 +416,38 @@ token_kind_to_str(token_kind_t kind)
return "*";
case TOKEN_SLASH:
return "/";
- case TOKEN_EQUAL:
+ case TOKEN_ASSIGN:
return "=";
+ case TOKEN_EQUAL:
+ return "==";
+ case TOKEN_NOT:
+ return "!";
+ case TOKEN_NOT_EQUAL:
+ return "!=";
+ case TOKEN_GT:
+ return ">";
+ case TOKEN_GT_EQUAL:
+ return ">=";
+ case TOKEN_LT:
+ return "<";
+ case TOKEN_LT_EQUAL:
+ return "<=";
+ case TOKEN_AND:
+ return "&&";
+ case TOKEN_OR:
+ return "||";
+ case TOKEN_BITWISE_AND:
+ return "&";
+ case TOKEN_BITWISE_OR:
+ return "|";
+ case TOKEN_BITWISE_SHIFT_LEFT:
+ return "<<";
+ case TOKEN_BITWISE_SHIFT_RIGHT:
+ return ">>";
+ case TOKEN_BITWISE_XOR:
+ return "^";
+ case TOKEN_BITWISE_NOT:
+ return "~";
case TOKEN_KEYWORD_RETURN:
return "return";
case TOKEN_KEYWORD_FN:
diff --git a/src/lexer.h b/src/lexer.h
index 6449a0a..dd442cc 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -24,23 +24,42 @@
typedef enum
{
+ // Non-Literal Tokens
TOKEN_NAME,
+ TOKEN_NUMBER,
+ TOKEN_EOF,
+
+ // Literal Tokens
TOKEN_OPAREN,
TOKEN_CPAREN,
TOKEN_COLON,
TOKEN_SEMICOLON,
TOKEN_OCURLY,
TOKEN_CCURLY,
- TOKEN_NUMBER,
TOKEN_PLUS,
TOKEN_MINUS,
TOKEN_STAR,
TOKEN_SLASH,
+ TOKEN_ASSIGN,
TOKEN_EQUAL,
+ TOKEN_NOT,
+ TOKEN_NOT_EQUAL,
+ TOKEN_GT,
+ TOKEN_GT_EQUAL,
+ TOKEN_LT,
+ TOKEN_LT_EQUAL,
+ TOKEN_AND,
+ TOKEN_OR,
+ TOKEN_BITWISE_AND,
+ TOKEN_BITWISE_OR,
+ TOKEN_BITWISE_SHIFT_LEFT,
+ TOKEN_BITWISE_SHIFT_RIGHT,
+ TOKEN_BITWISE_XOR,
+ TOKEN_BITWISE_NOT,
TOKEN_KEYWORD_RETURN,
TOKEN_KEYWORD_FN,
TOKEN_KEYWORD_LET,
- TOKEN_EOF,
+
TOKEN_UNKNOWN
} token_kind_t;
diff --git a/src/parser.c b/src/parser.c
index 49803eb..baa2ef5 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -264,7 +264,7 @@ parser_parse_variable_assignment(parser_t *parser)
return NULL;
}
- if (!drop_expected_token(parser, TOKEN_EQUAL))
+ if (!drop_expected_token(parser, TOKEN_ASSIGN))
return NULL;
ast_node_t *expression = parser_parse_expression(parser);
@@ -318,7 +318,7 @@ parser_parse_variable_declaration(parser_t *parser)
return NULL;
}
- if (!drop_expected_token(parser, TOKEN_EQUAL)) {
+ if (!drop_expected_token(parser, TOKEN_ASSIGN)) {
return NULL;
}
@@ -360,7 +360,7 @@ is_next_statement_a_variable_assignement(parser_t *parser)
lexer_lookahead(parser->lexer, &token, 2);
- return token.kind == TOKEN_EQUAL;
+ return token.kind == TOKEN_ASSIGN;
}
static bool
diff --git a/test/lexer_test.c b/test/lexer_test.c
index 3c43342..87928b4 100644
--- a/test/lexer_test.c
+++ b/test/lexer_test.c
@@ -17,6 +17,7 @@
#define MUNIT_ENABLE_ASSERT_ALIASES
#include "lexer.h"
#include "munit.h"
+#include <stdio.h>
void
make_lexer_from_static_src(lexer_t *lexer, char *src)
@@ -88,17 +89,21 @@ test_tokenize_name(const MunitParameter params[], void *user_data_or_fixture)
}
static MunitResult
-test_tokenize_op(const MunitParameter params[], void *user_data_or_fixture)
+test_tokenize_basic_check(const MunitParameter params[], void *user_data_or_fixture)
{
assert_token_at(" + 2", 0, TOKEN_PLUS, "+");
assert_token_at(" - \n", 0, TOKEN_MINUS, "-");
assert_token_at(" * ;", 0, TOKEN_STAR, "*");
assert_token_at(" / ", 0, TOKEN_SLASH, "/");
- assert_token_at(" = ", 0, TOKEN_EQUAL, "=");
- assert_token_at("1 * 2", 0, TOKEN_NUMBER, "1");
- assert_token_at("1 * 2", 1, TOKEN_STAR, "*");
- assert_token_at("1 * 2", 2, TOKEN_NUMBER, "2");
+ for (size_t kind = TOKEN_OPAREN; kind < TOKEN_UNKNOWN; kind++) {
+ char source[128];
+ sprintf(source, "1 %s 2", token_kind_to_str(kind));
+
+ assert_token_at(source, 0, TOKEN_NUMBER, "1");
+ assert_token_at(source, 1, kind, token_kind_to_str(kind));
+ assert_token_at(source, 2, TOKEN_NUMBER, "2");
+ }
return MUNIT_OK;
}
@@ -134,7 +139,7 @@ static MunitTest tests[] = {
{ "/test_tokenize_digit", test_tokenize_number, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ "/test_tokenize_keywords", test_tokenize_keywords, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ "/test_tokenize_name", test_tokenize_name, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
- { "/test_tokenize_op", test_tokenize_op, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/test_tokenize_basic_check", test_tokenize_basic_check, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ "/test_tokenize_unknown", test_tokenize_unknown, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ "/test_peek_next_token", test_peek_next_token, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }