diff options
author | Carlos Maniero <carlosmaniero@gmail.com> | 2023-04-20 13:32:36 -0300 |
---|---|---|
committer | Johnny Richard <johnny@johnnyrichard.com> | 2023-04-20 18:31:55 +0200 |
commit | 3ceb85bd93fa87a5be3682ab8995abea82a63ea3 (patch) | |
tree | 9512c4255bd87967dfd42a8ca68abd18e34e7407 | |
parent | a47e5ceb6eefdac9c5f5473e1fee0d33a5f4646e (diff) |
parser: Stop exiting on parser error
Previously, when an error occurred during parsing, the application
would exit, making it difficult to test the parser and limiting the
compiler's extensibility. This commit improves the parser's error
handling by allowing for continued execution after an error, enabling
easier testing and increased flexibility.
The parser is prepared to handle multiples errors, although the
current implementation always returns a single error, it may be
useful given multiples functions where we can show errors by context.
Signed-off-by: Carlos Maniero <carlosmaniero@gmail.com>
Reviwed-by: Johnny Richard <johnny@johnnyrichard.com>
-rw-r--r-- | src/lexer.c | 6 | ||||
-rw-r--r-- | src/parser.c | 138 | ||||
-rw-r--r-- | src/parser.h | 9 | ||||
-rw-r--r-- | src/pipac.c | 20 | ||||
-rw-r--r-- | test/parser_test.c | 41 |
5 files changed, 170 insertions, 44 deletions
diff --git a/src/lexer.c b/src/lexer.c index 77ae68e..2c8ffb9 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -95,6 +95,12 @@ lexer_next_token(lexer_t *lexer, token_t *token) { lexer_drop_spaces(lexer); + if (lexer_is_eof(lexer)) { + lexer_define_literal_token_props(lexer, token, TOKEN_EOF); + lexer_drop_char(lexer); + return; + } + if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) { lexer_tokenize_number(lexer, token); return; diff --git a/src/parser.c b/src/parser.c index b1e6a1f..4646de0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -16,6 +16,7 @@ */ #include <assert.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -30,83 +31,136 @@ parser_init(parser_t *parser, lexer_t *lexer) assert(parser && "parser must be defined"); assert(lexer && "lexer must be defined"); parser->lexer = lexer; + parser->errors_len = 0; } -static token_t -expected_token(parser_t *parser, token_kind_t kind) +static bool +expected_token(token_t *token, parser_t *parser, token_kind_t kind) { - token_t token; - lexer_next_token(parser->lexer, &token); - - if (token.kind == TOKEN_EOF) { - fprintf( - stderr, - "%s:%d:%d: [ERROR]: expected '%s' but got end of file\n", - token.filepath, token.row + 1, token.col + 1, + lexer_next_token(parser->lexer, token); + parser_error_t error; + + if (token->kind == TOKEN_EOF) { + error.token = *token; + sprintf( + error.message, + "expected '%s' but got end of file", token_kind_to_str(kind) ); - exit(EXIT_FAILURE); + + parser->errors[parser->errors_len++] = error; + return false; } - if (token.kind != kind) { - fprintf( - stderr, - "%s:%d:%d: [ERROR]: expected '%s' but got '%s'\n", - token.filepath, token.row + 1, token.col + 1, - token_kind_to_str(kind), token_kind_to_str(token.kind) + if (token->kind != kind) { + error.token = *token; + sprintf( + error.message, + "expected '%s' but got '%s'", + token_kind_to_str(kind), token_kind_to_str(token->kind) ); - exit(EXIT_FAILURE); + parser->errors[parser->errors_len++] = error; + return false; } - return token; + return true; +} + +static bool +drop_expected_token(parser_t *parser, token_kind_t kind) +{ + token_t ignored_token; + return expected_token(&ignored_token, parser, kind); } -static type_t -parser_parse_type(parser_t *parser) +static bool +parser_parse_type(type_t *type, parser_t *parser) { - token_t token = expected_token(parser, TOKEN_NAME); + token_t token; + + if(!expected_token(&token, parser, TOKEN_NAME)) return false; if (string_view_eq(token.value, string_view_from_str("i32"))) { - return TYPE_I32; + *type = TYPE_I32; + return true; } - fprintf(stderr, "[ERROR]: expected type 'i32' but got '"SVFMT"'\n", SVARG(&token.value)); - exit(EXIT_FAILURE); + parser_error_t error; + error.token = token; + + sprintf( + error.message, + "type '"SVFMT"' is not defined", + SVARG(&token.value) + ); + + parser->errors[parser->errors_len++] = error; + return false; } -void +bool parser_parse_return_stmt(parser_t *parser, ast_node_t *node) { - expected_token(parser, TOKEN_OCURLY); - token_t return_keyword_token = expected_token(parser, TOKEN_NAME); + token_t return_keyword_token; + + if (!drop_expected_token(parser, TOKEN_OCURLY)) return false; + + if(!expected_token(&return_keyword_token, parser, TOKEN_NAME)) return false; if (!string_view_eq(return_keyword_token.value, string_view_from_str("return"))) { - // TODO: Add filename:row:col prefix to expected token exceptions - fprintf(stderr, "[ERROR]: expected 'return' keyword but got '"SVFMT"'\n", SVARG(&return_keyword_token.value)); - exit(EXIT_FAILURE); + parser_error_t error; + error.token = return_keyword_token; + + sprintf( + error.message, + "expected 'return' keyword but got '"SVFMT"'", + SVARG(&return_keyword_token.value) + ); + + parser->errors[parser->errors_len++] = error; + return false; } - token_t number_token = expected_token(parser, TOKEN_NUMBER); - expected_token(parser, TOKEN_SEMICOLON); - expected_token(parser, TOKEN_CCURLY); + token_t number_token; + + if (!expected_token(&number_token, parser, TOKEN_NUMBER)) return false; + + if (!drop_expected_token(parser, TOKEN_SEMICOLON)) return false; + if (!drop_expected_token(parser, TOKEN_CCURLY)) return false; char number_as_str[number_token.value.size]; string_view_to_str(&number_token.value, number_as_str); ast_node_init_return_stmt(node, atoi(number_as_str)); + return true; } -void +bool parser_parse_function_declaration(parser_t *parser, ast_node_t *node) { - token_t func_name_token = expected_token(parser, TOKEN_NAME); - expected_token(parser, TOKEN_OPAREN); - expected_token(parser, TOKEN_CPAREN); - expected_token(parser, TOKEN_COLON); - type_t return_type = parser_parse_type(parser); + token_t func_name_token; + + if (!expected_token(&func_name_token, parser, TOKEN_NAME)) { + return false; + } + + if (!drop_expected_token(parser, TOKEN_OPAREN)) return false; + if (!drop_expected_token(parser, TOKEN_CPAREN)) return false; + if (!drop_expected_token(parser, TOKEN_COLON)) return false; + + type_t return_type; + + if(!parser_parse_type(&return_type, parser)) { + return false; + } ast_node_t *return_node = ast_node_new(); - parser_parse_return_stmt(parser, return_node); + + bool parsed_return = parser_parse_return_stmt(parser, return_node); + + if (!parsed_return) { + return false; + } ast_node_init_function_declaration( node, @@ -114,4 +168,6 @@ parser_parse_function_declaration(parser_t *parser, ast_node_t *node) return_type, return_node ); + + return true; } diff --git a/src/parser.h b/src/parser.h index 988006e..5f73ff3 100644 --- a/src/parser.h +++ b/src/parser.h @@ -21,13 +21,20 @@ #include "lexer.h" #include "string_view.h" +typedef struct parser_error_t { + token_t token; + char message [256]; +} parser_error_t; + typedef struct parser_t { lexer_t *lexer; + int errors_len; + parser_error_t errors[1]; } parser_t; void parser_init(parser_t *parser, lexer_t *lexer); -void parser_parse_function_declaration(parser_t *parser, ast_node_t *node); +bool parser_parse_function_declaration(parser_t *parser, ast_node_t *node); #endif /* PARSER_H */ diff --git a/src/pipac.c b/src/pipac.c index 41294c4..2b7b1f5 100644 --- a/src/pipac.c +++ b/src/pipac.c @@ -47,6 +47,20 @@ print_tokens(lexer_t *lexer) { } } +void +parser_print_errors(parser_t *parser) { + for (int i=0; i < parser->errors_len; i++) { + parser_error_t error = parser->errors[i]; + + fprintf( + stderr, + "%s:%d:%d: [ERROR]: %s\n", + error.token.filepath, error.token.row + 1, error.token.col + 1, + error.message + ); + } +} + int main(int argc, char **argv) { @@ -64,7 +78,11 @@ main(int argc, char **argv) parser_init(&parser, &lexer); ast_node_t* func = ast_node_new(); - parser_parse_function_declaration(&parser, func); + + if (!parser_parse_function_declaration(&parser, func)) { + parser_print_errors(&parser); + return EXIT_FAILURE; + } generate_gas_x86_64_linux(func); diff --git a/test/parser_test.c b/test/parser_test.c index 30aa285..d0d36ca 100644 --- a/test/parser_test.c +++ b/test/parser_test.c @@ -23,6 +23,7 @@ void make_lexer_from_static_src(lexer_t *lexer, char *src) { + lexer->filepath = "test.pipa"; lexer->srclen = 0; lexer->cur = 0; lexer->row = 0; @@ -31,6 +32,22 @@ make_lexer_from_static_src(lexer_t *lexer, char *src) lexer->srclen = strlen(src); } +void +assert_parser_error(char* src, char* error_msg) { + parser_t parser; + lexer_t lexer; + + make_lexer_from_static_src(&lexer, src); + parser_init(&parser, &lexer); + + ast_node_t *ast_function = ast_node_new(); + + bool parsed = parser_parse_function_declaration(&parser, ast_function); + assert_false(parsed); + assert_int(1, ==, parser.errors_len); + assert_string_equal(error_msg, parser.errors[0].message); +} + static MunitResult test_parse_function(const MunitParameter params[], void *user_data_or_fixture) @@ -41,7 +58,9 @@ test_parse_function(const MunitParameter params[], make_lexer_from_static_src(&lexer, "main(): i32 { return 42; }"); parser_init(&parser, &lexer); ast_node_t *ast_function = ast_node_new(); - parser_parse_function_declaration(&parser, ast_function); + + bool parsed = parser_parse_function_declaration(&parser, ast_function); + assert_true(parsed); char actual[5]; @@ -59,8 +78,28 @@ test_parse_function(const MunitParameter params[], return MUNIT_OK; } +static MunitResult +test_parse_basic_syntax_errors(const MunitParameter params[], + void *user_data_or_fixture) +{ + assert_parser_error("(): i32 { return 42; }" , "expected 'TOKEN_NAME' but got '('"); + assert_parser_error("main): i32 { return 42; }" , "expected '(' but got ')'"); + assert_parser_error("main(: i32 { return 42; }" , "expected ')' but got ':'"); + assert_parser_error("main() i32 { return 42; }" , "expected ':' but got 'TOKEN_NAME'"); + assert_parser_error("main(): { return 42; }" , "expected 'TOKEN_NAME' but got '{'"); + assert_parser_error("main(): i32 return 42; }" , "expected '{' but got 'TOKEN_NAME'"); + assert_parser_error("main(): i32 { 42; }" , "expected 'TOKEN_NAME' but got 'TOKEN_NUMBER'"); + assert_parser_error("main(): i32 { return; }" , "expected 'TOKEN_NUMBER' but got ';'"); + assert_parser_error("main(): i32 { return 42;" , "expected '}' but got end of file"); + assert_parser_error("main(): beff { return 42; }" , "type 'beff' is not defined"); + assert_parser_error("main(): i32 { oxi 42; }" , "expected 'return' keyword but got 'oxi'"); + + return MUNIT_OK; +} + static MunitTest tests[] = { { "/test_parse_function", test_parse_function, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, + { "/test_parse_basic_syntax_errors", test_parse_basic_syntax_errors, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL }, { NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } }; |