parser: Split block into small functions

Since it is possible to look a future token without consuming it, it was possible to split the block parser into small chunks of code. There is the performance drawback, because now the parser makes multiple lookups to the same token. However IMO that it is not a big concern given the small computation required to get a token. Also it can be easily addressed by computing all token in advance. Memory Leak: During the refactor I found some extra memory leaks related to not released scopes. So then, more than just printing a message I introduced an assert on scope.c to make sure developers will get this feedback asap because our testing framework suppress messages from stderr when the test passes. Signed-off-by: Carlos Maniero <carlos@maniero.me>
author: Carlos Maniero <carlos@maniero.me> 2023-05-02 23:45:52 -0300
committer: Johnny Richard <johnny@johnnyrichard.com> 2023-05-03 22:52:41 +0200
commit: b18a53b912ae66ad2bb23985640c9fac56ced358 (patch)
tree: eadc80d198b9ddab5f1fe77377e1249a3ec3259f
parent: e623c701d2ef41cf4993590e2932c7538c83fc54 (diff)
3 files changed, 118 insertions, 72 deletions
diff --git a/src/parser.c b/src/parser.c
index 5f7a709..d5ffa21 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -316,6 +316,83 @@ parser_parse_variable_definition(parser_t *parser, ast_node_t *node)
   return true;
 }
 
+static bool
+is_next_statement_a_variable_declaration(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+
+  if (token.kind != TOKEN_NAME) {
+    return false;
+  }
+
+  lexer_lookahead(parser->lexer, &token, 2);
+
+  return token.kind == TOKEN_COLON;
+}
+
+static bool
+is_next_statement_a_variable_assignement(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+
+  if (token.kind != TOKEN_NAME) {
+    return false;
+  }
+
+  lexer_lookahead(parser->lexer, &token, 2);
+
+  return token.kind == TOKEN_EQUAL;
+}
+
+static bool
+is_next_statement_return(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  return token.kind == TOKEN_KEYWORD_RETURN;
+}
+
+static bool
+is_block_end(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF;
+}
+
+static void
+parser_error_report_unexpected_token(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  parser_error_t *error = &parser->errors[parser->errors_len++];
+  error->token = token;
+
+  sprintf(
+    error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value));
+}
+
+static bool
+parser_ensure_function_return_statement(parser_t *parser, vector_t *body, token_t *function_token)
+{
+  ast_node_t *latest_node = vector_at(body, body->size - 1);
+
+  if (latest_node->kind != AST_RETURN_STMT) {
+    parser_error_t error;
+    error.token = *function_token;
+
+    sprintf(error.message, "expected 'return' keyword.");
+
+    parser->errors[parser->errors_len++] = error;
+    vector_destroy(body);
+    return false;
+  }
+
+  return true;
+}
+
 static vector_t *
 parser_parse_block_declarations(parser_t *parser)
 {
@@ -327,18 +404,11 @@ parser_parse_block_declarations(parser_t *parser)
   lexer_peek_next_token(parser->lexer, &current_token);
 
   scope_enter(parser->scope);
-
   vector_t *body = vector_new();
 
-  while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) {
-    if (current_token.kind != TOKEN_NAME && current_token.kind != TOKEN_KEYWORD_RETURN) {
-      parser_error_push_unexpected_kind(parser, &current_token, TOKEN_NAME);
-      scope_leave(parser->scope);
-      vector_destroy(body);
-      return NULL;
-    }
+  while (!is_block_end(parser)) {
 
-    if (current_token.kind == TOKEN_KEYWORD_RETURN) {
+    if (is_next_statement_return(parser)) {
       ast_node_t *return_node = parser_parse_return_stmt(parser);
 
       if (return_node == NULL) {
@@ -348,82 +418,51 @@ parser_parse_block_declarations(parser_t *parser)
       }
 
       vector_push_back(body, return_node);
-    } else {
-      token_t token;
-      lexer_lookahead(parser->lexer, &token, 2);
-
-      switch (token.kind) {
-        case TOKEN_COLON: {
-          ast_node_t *variable_node = ast_node_new();
-
-          if (!parser_parse_variable_definition(parser, variable_node)) {
-            ast_node_destroy(variable_node);
-            vector_destroy(body);
-            return NULL;
-          }
-
-          vector_push_back(body, variable_node);
-          break;
-        }
-        case TOKEN_EQUAL: {
-          ast_node_t *variable_assignment = ast_node_new();
-
-          if (!parser_parse_variable_assignment(parser, variable_assignment)) {
-            ast_node_destroy(variable_assignment);
-            vector_destroy(body);
-            return NULL;
-          }
-
-          vector_push_back(body, variable_assignment);
-          break;
-        }
-        case TOKEN_NAME:
-        case TOKEN_OPAREN:
-        case TOKEN_CPAREN:
-        case TOKEN_SEMICOLON:
-        case TOKEN_OCURLY:
-        case TOKEN_CCURLY:
-        case TOKEN_NUMBER:
-        case TOKEN_PLUS:
-        case TOKEN_KEYWORD_RETURN:
-        case TOKEN_MINUS:
-        case TOKEN_STAR:
-        case TOKEN_SLASH:
-        case TOKEN_EOF:
-        case TOKEN_UNKNOWN:
-          // FIXME: Show an error it means syntax error
-          lexer_drop_next_token(parser->lexer);
-          break;
+      continue;
+    }
+
+    if (is_next_statement_a_variable_declaration(parser)) {
+      ast_node_t *variable_node = ast_node_new();
+
+      if (!parser_parse_variable_definition(parser, variable_node)) {
+        scope_leave(parser->scope);
+        ast_node_destroy(variable_node);
+        vector_destroy(body);
+        return NULL;
       }
+
+      vector_push_back(body, variable_node);
+      continue;
     }
 
-    lexer_peek_next_token(parser->lexer, &current_token);
-  }
+    if (is_next_statement_a_variable_assignement(parser)) {
+      ast_node_t *variable_assignment = ast_node_new();
 
-  lexer_next_token(parser->lexer, &current_token);
+      if (!parser_parse_variable_assignment(parser, variable_assignment)) {
+        scope_leave(parser->scope);
+        ast_node_destroy(variable_assignment);
+        vector_destroy(body);
+        return NULL;
+      }
+
+      vector_push_back(body, variable_assignment);
+      continue;
+    }
+
+    parser_error_report_unexpected_token(parser);
 
-  if (current_token.kind == TOKEN_EOF) {
-    parser_error_push_unexpected_kind(parser, &current_token, TOKEN_CCURLY);
     scope_leave(parser->scope);
     vector_destroy(body);
     return NULL;
   }
 
-  ast_node_t *latest_node = vector_at(body, body->size - 1);
-
-  if (latest_node->kind != AST_RETURN_STMT) {
-    parser_error_t error;
-    error.token = current_token;
-
-    sprintf(error.message, "expected 'return' keyword.");
+  scope_leave(parser->scope);
 
-    parser->errors[parser->errors_len++] = error;
-    scope_leave(parser->scope);
+  if (!drop_expected_token(parser, TOKEN_CCURLY)) {
     vector_destroy(body);
     return NULL;
   }
 
-  scope_leave(parser->scope);
   return body;
 }
 
@@ -462,6 +501,11 @@ parser_parse_function_declaration(parser_t *parser)
     return NULL;
   }
 
+  if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) {
+    vector_destroy(body);
+    return NULL;
+  }
+
   ast_node_t *node = ast_node_new();
   ast_node_init_function_declaration(node, func_name_token.value, return_type, body);
 
diff --git a/src/scope.c b/src/scope.c
index 662e59c..6338e60 100644
--- a/src/scope.c
+++ b/src/scope.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 #include "scope.h"
+#include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
@@ -58,6 +59,7 @@ scope_destroy(scope_t *scope)
     fprintf(stderr,
             "Stack not cleaned before destroying. This may lead to memory leaks.\n"
             "Please make sure to call the leave function before destroying it.");
+    assert(scope->stack->size == 1);
   }
 
   for (size_t i = 0; i < scope->stack->size; i++) {
diff --git a/test/parser_test.c b/test/parser_test.c
index 3f814c6..ce77828 100644
--- a/test/parser_test.c
+++ b/test/parser_test.c
@@ -210,7 +210,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or
   assert_parser_error("main() i32 { return 42; }", "expected ':' but got 'TOKEN_NAME'");
   assert_parser_error("main(): { return 42; }", "expected 'TOKEN_NAME' but got '{'");
   assert_parser_error("main(): i32 return 42; }", "expected '{' but got 'return'");
-  assert_parser_error("main(): i32 { 42; }", "expected 'TOKEN_NAME' but got 'TOKEN_NUMBER'");
+  assert_parser_error("main(): i32 { 42; }", "unexpected token 'TOKEN_NUMBER' value='42'");
   assert_parser_error("main(): i32 { return; }", "unexpected '; (;)' token");
   assert_parser_error("main(): i32 { return 42;", "expected '}' but got end of file");
   assert_parser_error("main(): beff { return 42; }", "type 'beff' is not defined");
@@ -218,7 +218,7 @@ test_parse_basic_syntax_errors(const MunitParameter params[], void *user_data_or
   assert_parser_error("main(): i32 { b = 1; return b; }", "trying to assign 'b' before defining it.");
   // FIXME: once function calls are implemented, this error should inform that
   // neither a variable or function call was found.
-  assert_parser_error("main(): i32 { oxi 42; }", "expected 'TOKEN_NAME' but got 'TOKEN_NUMBER'");
+  assert_parser_error("main(): i32 { oxi 42; }", "unexpected token 'TOKEN_NAME' value='oxi'");
 
   return MUNIT_OK;
 }
author	Carlos Maniero <carlos@maniero.me>	2023-05-02 23:45:52 -0300
committer	Johnny Richard <johnny@johnnyrichard.com>	2023-05-03 22:52:41 +0200
commit	b18a53b912ae66ad2bb23985640c9fac56ced358 (patch)
tree	eadc80d198b9ddab5f1fe77377e1249a3ec3259f
parent	e623c701d2ef41cf4993590e2932c7538c83fc54 (diff)