From ccd5e8585f10488eed72c772cc1804efea6b8fb4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Sat, 6 May 2023 12:29:05 -0300 Subject: lexer: Tokenize logical and bitwise operators The followed logic operators were added to lexer: TOKEN_EQUAL == TOKEN_NOT ! TOKEN_NOT_EQUAL != TOKEN_GT > TOKEN_GT_EQUAL >= TOKEN_LT < TOKEN_LT_EQUAL <= TOKEN_AND && TOKEN_OR || Bitwise operators were also added TOKEN_BITWISE_AND & TOKEN_BITWISE_OR | TOKEN_BITWISE_SHIFT_LEFT << TOKEN_BITWISE_SHIFT_RIGHT >> TOKEN_BITWISE_XOR ^ TOKEN_BITWISE_NOT ~ TOKEN_EQUAL '=' was renamed TOKEN_ASSIGN, and now TOKEN_EQUAL is used for the logical comparator '=='. Signed-off-by: Carlos Maniero --- src/lexer.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 138 insertions(+), 14 deletions(-) (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c index 5a6d245..72c27cd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -61,6 +61,17 @@ lexer_drop_spaces(lexer_t *lexer) } } +static void +lexer_tokenize_from_given_position_to_cursor(lexer_t *lexer, token_t *token, size_t position, token_kind_t kind) +{ + token->kind = kind; + token->value = string_view_new(lexer->src + position, lexer->cur - position); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = position - lexer->bol; + token->bol = lexer->bol; +} + static void lexer_tokenize_number(lexer_t *lexer, token_t *token) { @@ -69,12 +80,8 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token) while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_drop_char(lexer); } - token->kind = TOKEN_NUMBER; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NUMBER); } static void @@ -86,12 +93,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token) lexer_drop_char(lexer); } - token->kind = TOKEN_NAME; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NAME); } static void @@ -194,7 +196,99 @@ lexer_next_token(lexer_t *lexer, token_t *token) } if (lexer_current_char(lexer) == '=') { - lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL); + lexer_define_literal_token_props(lexer, token, TOKEN_ASSIGN); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '!') { + lexer_define_literal_token_props(lexer, token, TOKEN_NOT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_NOT_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_define_literal_token_props(lexer, token, TOKEN_GT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_GT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_RIGHT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_define_literal_token_props(lexer, token, TOKEN_LT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_LT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_LEFT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '&') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_AND); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '&') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_AND); + return; + } + return; + } + + if (lexer_current_char(lexer) == '|') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_OR); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '|') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_OR); + return; + } + return; + } + + if (lexer_current_char(lexer) == '^') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_XOR); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '~') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_NOT); lexer_drop_char(lexer); return; } @@ -322,8 +416,38 @@ token_kind_to_str(token_kind_t kind) return "*"; case TOKEN_SLASH: return "/"; - case TOKEN_EQUAL: + case TOKEN_ASSIGN: return "="; + case TOKEN_EQUAL: + return "=="; + case TOKEN_NOT: + return "!"; + case TOKEN_NOT_EQUAL: + return "!="; + case TOKEN_GT: + return ">"; + case TOKEN_GT_EQUAL: + return ">="; + case TOKEN_LT: + return "<"; + case TOKEN_LT_EQUAL: + return "<="; + case TOKEN_AND: + return "&&"; + case TOKEN_OR: + return "||"; + case TOKEN_BITWISE_AND: + return "&"; + case TOKEN_BITWISE_OR: + return "|"; + case TOKEN_BITWISE_SHIFT_LEFT: + return "<<"; + case TOKEN_BITWISE_SHIFT_RIGHT: + return ">>"; + case TOKEN_BITWISE_XOR: + return "^"; + case TOKEN_BITWISE_NOT: + return "~"; case TOKEN_KEYWORD_RETURN: return "return"; case TOKEN_KEYWORD_FN: -- cgit v1.2.3