parser: Split block into small functions

Since it is possible to look a future token without consuming it, it was possible to split the block parser into small chunks of code. There is the performance drawback, because now the parser makes multiple lookups to the same token. However IMO that it is not a big concern given the small computation required to get a token. Also it can be easily addressed by computing all token in advance. Memory Leak: During the refactor I found some extra memory leaks related to not released scopes. So then, more than just printing a message I introduced an assert on scope.c to make sure developers will get this feedback asap because our testing framework suppress messages from stderr when the test passes. Signed-off-by: Carlos Maniero <carlos@maniero.me>
author: Carlos Maniero <carlos@maniero.me> 2023-05-02 23:45:52 -0300
committer: Johnny Richard <johnny@johnnyrichard.com> 2023-05-03 22:52:41 +0200
commit: b18a53b912ae66ad2bb23985640c9fac56ced358 (patch)
tree: eadc80d198b9ddab5f1fe77377e1249a3ec3259f /src
parent: e623c701d2ef41cf4993590e2932c7538c83fc54 (diff)
2 files changed, 116 insertions, 70 deletions
diff --git a/src/parser.c b/src/parser.c
index 5f7a709..d5ffa21 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -316,6 +316,83 @@ parser_parse_variable_definition(parser_t *parser, ast_node_t *node)
   return true;
 }
 
+static bool
+is_next_statement_a_variable_declaration(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+
+  if (token.kind != TOKEN_NAME) {
+    return false;
+  }
+
+  lexer_lookahead(parser->lexer, &token, 2);
+
+  return token.kind == TOKEN_COLON;
+}
+
+static bool
+is_next_statement_a_variable_assignement(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+
+  if (token.kind != TOKEN_NAME) {
+    return false;
+  }
+
+  lexer_lookahead(parser->lexer, &token, 2);
+
+  return token.kind == TOKEN_EQUAL;
+}
+
+static bool
+is_next_statement_return(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  return token.kind == TOKEN_KEYWORD_RETURN;
+}
+
+static bool
+is_block_end(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  return token.kind == TOKEN_CCURLY || token.kind == TOKEN_EOF;
+}
+
+static void
+parser_error_report_unexpected_token(parser_t *parser)
+{
+  token_t token;
+  lexer_peek_next_token(parser->lexer, &token);
+  parser_error_t *error = &parser->errors[parser->errors_len++];
+  error->token = token;
+
+  sprintf(
+    error->message, "unexpected token '%s' value='" SVFMT "'", token_kind_to_str(token.kind), SVARG(&token.value));
+}
+
+static bool
+parser_ensure_function_return_statement(parser_t *parser, vector_t *body, token_t *function_token)
+{
+  ast_node_t *latest_node = vector_at(body, body->size - 1);
+
+  if (latest_node->kind != AST_RETURN_STMT) {
+    parser_error_t error;
+    error.token = *function_token;
+
+    sprintf(error.message, "expected 'return' keyword.");
+
+    parser->errors[parser->errors_len++] = error;
+    vector_destroy(body);
+    return false;
+  }
+
+  return true;
+}
+
 static vector_t *
 parser_parse_block_declarations(parser_t *parser)
 {
@@ -327,18 +404,11 @@ parser_parse_block_declarations(parser_t *parser)
   lexer_peek_next_token(parser->lexer, &current_token);
 
   scope_enter(parser->scope);
-
   vector_t *body = vector_new();
 
-  while (current_token.kind != TOKEN_CCURLY && current_token.kind != TOKEN_EOF) {
-    if (current_token.kind != TOKEN_NAME && current_token.kind != TOKEN_KEYWORD_RETURN) {
-      parser_error_push_unexpected_kind(parser, &current_token, TOKEN_NAME);
-      scope_leave(parser->scope);
-      vector_destroy(body);
-      return NULL;
-    }
+  while (!is_block_end(parser)) {
 
-    if (current_token.kind == TOKEN_KEYWORD_RETURN) {
+    if (is_next_statement_return(parser)) {
       ast_node_t *return_node = parser_parse_return_stmt(parser);
 
       if (return_node == NULL) {
@@ -348,82 +418,51 @@ parser_parse_block_declarations(parser_t *parser)
       }
 
       vector_push_back(body, return_node);
-    } else {
-      token_t token;
-      lexer_lookahead(parser->lexer, &token, 2);
-
-      switch (token.kind) {
-        case TOKEN_COLON: {
-          ast_node_t *variable_node = ast_node_new();
-
-          if (!parser_parse_variable_definition(parser, variable_node)) {
-            ast_node_destroy(variable_node);
-            vector_destroy(body);
-            return NULL;
-          }
-
-          vector_push_back(body, variable_node);
-          break;
-        }
-        case TOKEN_EQUAL: {
-          ast_node_t *variable_assignment = ast_node_new();
-
-          if (!parser_parse_variable_assignment(parser, variable_assignment)) {
-            ast_node_destroy(variable_assignment);
-            vector_destroy(body);
-            return NULL;
-          }
-
-          vector_push_back(body, variable_assignment);
-          break;
-        }
-        case TOKEN_NAME:
-        case TOKEN_OPAREN:
-        case TOKEN_CPAREN:
-        case TOKEN_SEMICOLON:
-        case TOKEN_OCURLY:
-        case TOKEN_CCURLY:
-        case TOKEN_NUMBER:
-        case TOKEN_PLUS:
-        case TOKEN_KEYWORD_RETURN:
-        case TOKEN_MINUS:
-        case TOKEN_STAR:
-        case TOKEN_SLASH:
-        case TOKEN_EOF:
-        case TOKEN_UNKNOWN:
-          // FIXME: Show an error it means syntax error
-          lexer_drop_next_token(parser->lexer);
-          break;
+      continue;
+    }
+
+    if (is_next_statement_a_variable_declaration(parser)) {
+      ast_node_t *variable_node = ast_node_new();
+
+      if (!parser_parse_variable_definition(parser, variable_node)) {
+        scope_leave(parser->scope);
+        ast_node_destroy(variable_node);
+        vector_destroy(body);
+        return NULL;
       }
+
+      vector_push_back(body, variable_node);
+      continue;
     }
 
-    lexer_peek_next_token(parser->lexer, &current_token);
-  }
+    if (is_next_statement_a_variable_assignement(parser)) {
+      ast_node_t *variable_assignment = ast_node_new();
 
-  lexer_next_token(parser->lexer, &current_token);
+      if (!parser_parse_variable_assignment(parser, variable_assignment)) {
+        scope_leave(parser->scope);
+        ast_node_destroy(variable_assignment);
+        vector_destroy(body);
+        return NULL;
+      }
+
+      vector_push_back(body, variable_assignment);
+      continue;
+    }
+
+    parser_error_report_unexpected_token(parser);
 
-  if (current_token.kind == TOKEN_EOF) {
-    parser_error_push_unexpected_kind(parser, &current_token, TOKEN_CCURLY);
     scope_leave(parser->scope);
     vector_destroy(body);
     return NULL;
   }
 
-  ast_node_t *latest_node = vector_at(body, body->size - 1);
-
-  if (latest_node->kind != AST_RETURN_STMT) {
-    parser_error_t error;
-    error.token = current_token;
-
-    sprintf(error.message, "expected 'return' keyword.");
+  scope_leave(parser->scope);
 
-    parser->errors[parser->errors_len++] = error;
-    scope_leave(parser->scope);
+  if (!drop_expected_token(parser, TOKEN_CCURLY)) {
     vector_destroy(body);
     return NULL;
   }
 
-  scope_leave(parser->scope);
   return body;
 }
 
@@ -462,6 +501,11 @@ parser_parse_function_declaration(parser_t *parser)
     return NULL;
   }
 
+  if (!parser_ensure_function_return_statement(parser, body, &func_name_token)) {
+    vector_destroy(body);
+    return NULL;
+  }
+
   ast_node_t *node = ast_node_new();
   ast_node_init_function_declaration(node, func_name_token.value, return_type, body);
 
diff --git a/src/scope.c b/src/scope.c
index 662e59c..6338e60 100644
--- a/src/scope.c
+++ b/src/scope.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 #include "scope.h"
+#include <assert.h>
 #include <errno.h>
 #include <stdio.h>
 #include <string.h>
@@ -58,6 +59,7 @@ scope_destroy(scope_t *scope)
     fprintf(stderr,
             "Stack not cleaned before destroying. This may lead to memory leaks.\n"
             "Please make sure to call the leave function before destroying it.");
+    assert(scope->stack->size == 1);
   }
 
   for (size_t i = 0; i < scope->stack->size; i++) {
author	Carlos Maniero <carlos@maniero.me>	2023-05-02 23:45:52 -0300
committer	Johnny Richard <johnny@johnnyrichard.com>	2023-05-03 22:52:41 +0200
commit	b18a53b912ae66ad2bb23985640c9fac56ced358 (patch)
tree	eadc80d198b9ddab5f1fe77377e1249a3ec3259f /src
parent	e623c701d2ef41cf4993590e2932c7538c83fc54 (diff)