From ccd5e8585f10488eed72c772cc1804efea6b8fb4 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Sat, 6 May 2023 12:29:05 -0300 Subject: lexer: Tokenize logical and bitwise operators The followed logic operators were added to lexer: TOKEN_EQUAL == TOKEN_NOT ! TOKEN_NOT_EQUAL != TOKEN_GT > TOKEN_GT_EQUAL >= TOKEN_LT < TOKEN_LT_EQUAL <= TOKEN_AND && TOKEN_OR || Bitwise operators were also added TOKEN_BITWISE_AND & TOKEN_BITWISE_OR | TOKEN_BITWISE_SHIFT_LEFT << TOKEN_BITWISE_SHIFT_RIGHT >> TOKEN_BITWISE_XOR ^ TOKEN_BITWISE_NOT ~ TOKEN_EQUAL '=' was renamed TOKEN_ASSIGN, and now TOKEN_EQUAL is used for the logical comparator '=='. Signed-off-by: Carlos Maniero --- src/lexer.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ src/lexer.h | 23 ++++++++- src/parser.c | 6 +-- 3 files changed, 162 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/lexer.c b/src/lexer.c index 5a6d245..72c27cd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -61,6 +61,17 @@ lexer_drop_spaces(lexer_t *lexer) } } +static void +lexer_tokenize_from_given_position_to_cursor(lexer_t *lexer, token_t *token, size_t position, token_kind_t kind) +{ + token->kind = kind; + token->value = string_view_new(lexer->src + position, lexer->cur - position); + token->filepath = lexer->filepath; + token->row = lexer->row; + token->col = position - lexer->bol; + token->bol = lexer->bol; +} + static void lexer_tokenize_number(lexer_t *lexer, token_t *token) { @@ -69,12 +80,8 @@ lexer_tokenize_number(lexer_t *lexer, token_t *token) while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_drop_char(lexer); } - token->kind = TOKEN_NUMBER; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NUMBER); } static void @@ -86,12 +93,7 @@ lexer_tokenize_name(lexer_t *lexer, token_t *token) lexer_drop_char(lexer); } - token->kind = TOKEN_NAME; - token->value = string_view_new(lexer->src + begin, lexer->cur - begin); - token->filepath = lexer->filepath; - token->row = lexer->row; - token->col = begin - lexer->bol; - token->bol = lexer->bol; + lexer_tokenize_from_given_position_to_cursor(lexer, token, begin, TOKEN_NAME); } static void @@ -194,7 +196,99 @@ lexer_next_token(lexer_t *lexer, token_t *token) } if (lexer_current_char(lexer) == '=') { - lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL); + lexer_define_literal_token_props(lexer, token, TOKEN_ASSIGN); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '!') { + lexer_define_literal_token_props(lexer, token, TOKEN_NOT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_NOT_EQUAL); + return; + } + + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_define_literal_token_props(lexer, token, TOKEN_GT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_GT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '>') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_RIGHT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_define_literal_token_props(lexer, token, TOKEN_LT); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '=') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_LT_EQUAL); + return; + } + + if (lexer_current_char(lexer) == '<') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_BITWISE_SHIFT_LEFT); + return; + } + return; + } + + if (lexer_current_char(lexer) == '&') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_AND); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '&') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_AND); + return; + } + return; + } + + if (lexer_current_char(lexer) == '|') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_OR); + lexer_drop_char(lexer); + + if (lexer_current_char(lexer) == '|') { + lexer_drop_char(lexer); + lexer_tokenize_from_given_position_to_cursor(lexer, token, lexer->cur - 2, TOKEN_OR); + return; + } + return; + } + + if (lexer_current_char(lexer) == '^') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_XOR); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '~') { + lexer_define_literal_token_props(lexer, token, TOKEN_BITWISE_NOT); lexer_drop_char(lexer); return; } @@ -322,8 +416,38 @@ token_kind_to_str(token_kind_t kind) return "*"; case TOKEN_SLASH: return "/"; - case TOKEN_EQUAL: + case TOKEN_ASSIGN: return "="; + case TOKEN_EQUAL: + return "=="; + case TOKEN_NOT: + return "!"; + case TOKEN_NOT_EQUAL: + return "!="; + case TOKEN_GT: + return ">"; + case TOKEN_GT_EQUAL: + return ">="; + case TOKEN_LT: + return "<"; + case TOKEN_LT_EQUAL: + return "<="; + case TOKEN_AND: + return "&&"; + case TOKEN_OR: + return "||"; + case TOKEN_BITWISE_AND: + return "&"; + case TOKEN_BITWISE_OR: + return "|"; + case TOKEN_BITWISE_SHIFT_LEFT: + return "<<"; + case TOKEN_BITWISE_SHIFT_RIGHT: + return ">>"; + case TOKEN_BITWISE_XOR: + return "^"; + case TOKEN_BITWISE_NOT: + return "~"; case TOKEN_KEYWORD_RETURN: return "return"; case TOKEN_KEYWORD_FN: diff --git a/src/lexer.h b/src/lexer.h index 6449a0a..dd442cc 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -24,23 +24,42 @@ typedef enum { + // Non-Literal Tokens TOKEN_NAME, + TOKEN_NUMBER, + TOKEN_EOF, + + // Literal Tokens TOKEN_OPAREN, TOKEN_CPAREN, TOKEN_COLON, TOKEN_SEMICOLON, TOKEN_OCURLY, TOKEN_CCURLY, - TOKEN_NUMBER, TOKEN_PLUS, TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, + TOKEN_ASSIGN, TOKEN_EQUAL, + TOKEN_NOT, + TOKEN_NOT_EQUAL, + TOKEN_GT, + TOKEN_GT_EQUAL, + TOKEN_LT, + TOKEN_LT_EQUAL, + TOKEN_AND, + TOKEN_OR, + TOKEN_BITWISE_AND, + TOKEN_BITWISE_OR, + TOKEN_BITWISE_SHIFT_LEFT, + TOKEN_BITWISE_SHIFT_RIGHT, + TOKEN_BITWISE_XOR, + TOKEN_BITWISE_NOT, TOKEN_KEYWORD_RETURN, TOKEN_KEYWORD_FN, TOKEN_KEYWORD_LET, - TOKEN_EOF, + TOKEN_UNKNOWN } token_kind_t; diff --git a/src/parser.c b/src/parser.c index 49803eb..baa2ef5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -264,7 +264,7 @@ parser_parse_variable_assignment(parser_t *parser) return NULL; } - if (!drop_expected_token(parser, TOKEN_EQUAL)) + if (!drop_expected_token(parser, TOKEN_ASSIGN)) return NULL; ast_node_t *expression = parser_parse_expression(parser); @@ -318,7 +318,7 @@ parser_parse_variable_declaration(parser_t *parser) return NULL; } - if (!drop_expected_token(parser, TOKEN_EQUAL)) { + if (!drop_expected_token(parser, TOKEN_ASSIGN)) { return NULL; } @@ -360,7 +360,7 @@ is_next_statement_a_variable_assignement(parser_t *parser) lexer_lookahead(parser->lexer, &token, 2); - return token.kind == TOKEN_EQUAL; + return token.kind == TOKEN_ASSIGN; } static bool -- cgit v1.2.3