From b8e76aa1e888a4602d4220206bbea8d04882fae2 Mon Sep 17 00:00:00 2001 From: Carlos Maniero Date: Wed, 26 Apr 2023 01:57:40 -0300 Subject: lexer: Split operation tokens into their own token The +, -, *, and / tokens used to be TOKEN_OP, but the TOKEN_OP has been removed and a token for each operation has been introduced. Python's token names were followed: https://docs.python.org/3/library/token.html Signed-off-by: Carlos Maniero Reviewed-by: Johnny Richard --- src/lexer.c | 41 ++++++++++++++++++++++++++++++++++++----- src/lexer.h | 6 +++++- src/parser.c | 18 +++--------------- test/lexer_test.c | 12 ++++++------ 4 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index bbf29fc..e5f232b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -151,9 +151,32 @@ lexer_next_token(lexer_t *lexer, token_t *token) return; } - if (lexer_current_char(lexer) == '+' || lexer_current_char(lexer) == '-' || lexer_current_char(lexer) == '*' || - lexer_current_char(lexer) == '/' || lexer_current_char(lexer) == '=') { - lexer_define_literal_token_props(lexer, token, TOKEN_OP); + if (lexer_current_char(lexer) == '+') { + lexer_define_literal_token_props(lexer, token, TOKEN_PLUS); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '-') { + lexer_define_literal_token_props(lexer, token, TOKEN_MINUS); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '*') { + lexer_define_literal_token_props(lexer, token, TOKEN_STAR); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '/') { + lexer_define_literal_token_props(lexer, token, TOKEN_SLASH); + lexer_drop_char(lexer); + return; + } + + if (lexer_current_char(lexer) == '=') { + lexer_define_literal_token_props(lexer, token, TOKEN_EQUAL); lexer_drop_char(lexer); return; } @@ -248,8 +271,16 @@ token_kind_to_str(token_kind_t kind) return "}"; case TOKEN_NUMBER: return "TOKEN_NUMBER"; - case TOKEN_OP: - return "TOKEN_OP"; + case TOKEN_PLUS: + return "+"; + case TOKEN_MINUS: + return "-"; + case TOKEN_STAR: + return "*"; + case TOKEN_SLASH: + return "/"; + case TOKEN_EQUAL: + return "="; case TOKEN_EOF: return "TOKEN_EOF"; case TOKEN_UNKNOWN: diff --git a/src/lexer.h b/src/lexer.h index 8c84745..d4e84e1 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -32,7 +32,11 @@ typedef enum TOKEN_OCURLY, TOKEN_CCURLY, TOKEN_NUMBER, - TOKEN_OP, + TOKEN_PLUS, + TOKEN_MINUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_EQUAL, TOKEN_EOF, TOKEN_UNKNOWN } token_kind_t; diff --git a/src/parser.c b/src/parser.c index f3c6328..fd836af 100644 --- a/src/parser.c +++ b/src/parser.c @@ -138,8 +138,7 @@ parser_parse_term(parser_t *parser, ast_node_t *node) token_t token; lexer_next_token(parser->lexer, &token); - while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("*")) || - string_view_eq(token.value, string_view_from_str("/")))) { + while (token.kind == TOKEN_STAR || token.kind == TOKEN_SLASH) { ast_node_t *left = ast_node_new(); *left = *node; @@ -173,8 +172,7 @@ parser_parse_expression(parser_t *parser, ast_node_t *node) token_t token; lexer_next_token(parser->lexer, &token); - while (token.kind == TOKEN_OP && (string_view_eq(token.value, string_view_from_str("+")) || - string_view_eq(token.value, string_view_from_str("-")))) { + while (token.kind == TOKEN_PLUS || token.kind == TOKEN_MINUS) { ast_node_t *left = ast_node_new(); *left = *node; @@ -220,19 +218,9 @@ parser_parse_variable_definition(parser_t *parser, string_view_t variable_name, token_t equal_token; - if (!expected_token(&equal_token, parser, TOKEN_OP)) + if (!expected_token(&equal_token, parser, TOKEN_EQUAL)) return false; - if (!string_view_eq(equal_token.value, string_view_from_str("="))) { - parser_error_t error; - error.token = equal_token; - - sprintf(error.message, "expected '=' but got " SVFMT, SVARG(&equal_token.value)); - - parser->errors[parser->errors_len++] = error; - return false; - } - ast_node_t *expression = ast_node_new(); if (!parser_parse_expression(parser, expression) || !drop_expected_token(parser, TOKEN_SEMICOLON)) { diff --git a/test/lexer_test.c b/test/lexer_test.c index abfac16..bbb6784 100644 --- a/test/lexer_test.c +++ b/test/lexer_test.c @@ -64,14 +64,14 @@ test_tokenize_number(const MunitParameter params[], void *user_data_or_fixture) static MunitResult test_tokenize_op(const MunitParameter params[], void *user_data_or_fixture) { - assert_token_at(" + 2", 0, TOKEN_OP, "+"); - assert_token_at(" - \n", 0, TOKEN_OP, "-"); - assert_token_at(" * ;", 0, TOKEN_OP, "*"); - assert_token_at(" / ", 0, TOKEN_OP, "/"); - assert_token_at(" = ", 0, TOKEN_OP, "="); + assert_token_at(" + 2", 0, TOKEN_PLUS, "+"); + assert_token_at(" - \n", 0, TOKEN_MINUS, "-"); + assert_token_at(" * ;", 0, TOKEN_STAR, "*"); + assert_token_at(" / ", 0, TOKEN_SLASH, "/"); + assert_token_at(" = ", 0, TOKEN_EQUAL, "="); assert_token_at("1 * 2", 0, TOKEN_NUMBER, "1"); - assert_token_at("1 * 2", 1, TOKEN_OP, "*"); + assert_token_at("1 * 2", 1, TOKEN_STAR, "*"); assert_token_at("1 * 2", 2, TOKEN_NUMBER, "2"); return MUNIT_OK; -- cgit v1.2.3