lexer: Extract lexer.c and lexer.h from pipa.c

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
author: Johnny Richard <johnny@johnnyrichard.com> 2023-04-14 21:04:18 +0200
committer: Johnny Richard <johnny@johnnyrichard.com> 2023-04-14 21:04:18 +0200
commit: b4d3c6b5685024efc651e31e2308b0d76bbc3c06 (patch)
tree: 63fef1a04052cd185c07f2119593f0d3e23a3d4c /src
parent: e0f96e02d6277f92b24ea3afaa49d6c0a7a6731c (diff)
3 files changed, 261 insertions, 204 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..7b0206a
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,192 @@
+/*
+* Copyright (C) 2023 Johnny Richard
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program.  If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include "lexer.h"
+
+void
+lexer_init(lexer_t *lexer, char *filepath)
+{
+  assert(lexer && "lexer must be defined");
+  assert(filepath && "filepath must be defined");
+  lexer->filepath = filepath;
+  lexer->srclen   = 0;
+  lexer->cur      = 0;
+  lexer->row      = 0;
+  lexer->bol      = 0;
+  lexer_load_file_contents(lexer);
+}
+
+void
+lexer_next_token(lexer_t *lexer, token_t *token)
+{
+  while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
+    if (lexer_current_char(lexer) == '\n') {
+      lexer_drop_char(lexer);
+      lexer->row++;
+      lexer->bol = lexer->cur;
+      continue;
+    }
+    lexer_drop_char(lexer);
+  }
+
+  if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
+    size_t begin = lexer->cur;
+
+    while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
+      lexer_drop_char(lexer);
+    }
+    token->kind = TOKEN_NUMBER;
+    token->value = strndup(lexer->src + begin, lexer->cur - begin);
+    token->row = lexer->row;
+    token->col = begin - lexer->bol;
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) {
+    size_t begin = lexer->cur;
+
+    while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
+      lexer_drop_char(lexer);
+    }
+    token->kind = TOKEN_NAME;
+    token->value = strndup(lexer->src + begin, lexer->cur - begin);
+    token->row = lexer->row;
+    token->col = begin - lexer->bol;
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') {
+    token->kind = TOKEN_OPAREN;
+    token->value = strdup("(");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') {
+    token->kind = TOKEN_CPAREN;
+    token->value = strdup(")");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') {
+    token->kind = TOKEN_COLON;
+    token->value = strdup(":");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') {
+    token->kind = TOKEN_SEMICOLON;
+    token->value = strdup(";");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') {
+    token->kind = TOKEN_OCURLY;
+    token->value = strdup("{");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') {
+    token->kind = TOKEN_CCURLY;
+    token->value = strdup("}");
+    token->row = lexer->row;
+    token->col = lexer->cur - lexer->bol;
+    lexer_drop_char(lexer);
+    return;
+  }
+
+  token->kind = TOKEN_EOF;
+  return;
+}
+
+void
+lexer_load_file_contents(lexer_t *lexer)
+{
+  assert(lexer && "lexer must be defined");
+
+  FILE *file;
+  file = fopen(lexer->filepath, "r");
+  if (!file) {
+    fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno));
+    exit(EXIT_FAILURE);
+  }
+
+  fseek(file, 0L, SEEK_END);
+  lexer->srclen = ftell(file);
+  rewind(file);
+
+  lexer->src = calloc(1, lexer->srclen + 1);
+  if (!lexer->src) {
+    fclose(file);
+    perror("lexer_load_file_contents -> calloc");
+    exit(EXIT_FAILURE);
+  }
+
+  if (fread(lexer->src, lexer->srclen, 1, file) != 1) {
+    fclose(file);
+    free(lexer->src);
+    // FIXME: distinguish error using ferror and feof functions
+    fprintf(stderr, "could not read file '%s'\n", lexer->filepath);
+    exit(EXIT_FAILURE);
+  }
+
+}
+
+void
+lexer_drop_char(lexer_t *lexer)
+{
+  lexer->cur++;
+}
+
+bool
+lexer_is_eof(lexer_t *lexer)
+{
+  return lexer->cur >= lexer->srclen;
+}
+
+
+bool
+lexer_is_not_eof(lexer_t *lexer)
+{
+  return !lexer_is_eof(lexer);
+}
+
+char
+lexer_current_char(lexer_t *lexer)
+{
+  return lexer->src[lexer->cur];
+}
+
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..29cfc11
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,68 @@
+/*
+* Copyright (C) 2023 Johnny Richard
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program.  If not, see <https://www.gnu.org/licenses/>.
+*/
+#ifndef LEXER_H
+#define LEXER_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+typedef enum {
+    TOKEN_NAME,
+    TOKEN_OPAREN,
+    TOKEN_CPAREN,
+    TOKEN_COLON,
+    TOKEN_SEMICOLON,
+    TOKEN_OCURLY,
+    TOKEN_CCURLY,
+    TOKEN_NUMBER,
+    TOKEN_EOF
+} token_kind_t;
+
+typedef struct token_t {
+  token_kind_t  kind;
+  char         *value;
+  uint32_t      row;
+  uint32_t      col;
+} token_t;
+
+
+typedef struct lexer_t {
+  char     *filepath;
+  char     *src;
+  size_t    srclen;
+  uint32_t  cur;
+  uint32_t  row;
+  uint32_t  bol;
+} lexer_t;
+
+void lexer_init(lexer_t *lexer, char *filepath);
+
+void lexer_next_token(lexer_t *lexer, token_t *token);
+
+void lexer_load_file_contents(lexer_t *lexer);
+
+char lexer_current_char(lexer_t *lexer);
+
+bool lexer_is_eof(lexer_t *lexer);
+
+bool lexer_is_not_eof(lexer_t *lexer);
+
+void lexer_drop_char(lexer_t *lexer);
+
+#endif /* LEXER_H */
+
diff --git a/src/pipac.c b/src/pipac.c
index dba6109..f1e5865 100644
--- a/src/pipac.c
+++ b/src/pipac.c
@@ -14,212 +14,9 @@
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <ctype.h>
-
-typedef enum {
-    TOKEN_NAME,
-    TOKEN_OPAREN,
-    TOKEN_CPAREN,
-    TOKEN_COLON,
-    TOKEN_SEMICOLON,
-    TOKEN_OCURLY,
-    TOKEN_CCURLY,
-    TOKEN_NUMBER,
-    TOKEN_EOF
-} token_kind_t;
-
-typedef struct token_t {
-  token_kind_t  kind;
-  char         *value;
-  uint32_t      row;
-  uint32_t      col;
-} token_t;
-
-
-typedef struct lexer_t {
-  char     *filepath;
-  char     *src;
-  size_t    srclen;
-  uint32_t  cur;
-  uint32_t  row;
-  uint32_t  bol;
-} lexer_t;
-
-void
-lexer_load_file_contents(lexer_t *lexer)
-{
-  assert(lexer && "lexer must be defined");
-
-  FILE *file;
-  file = fopen(lexer->filepath, "r");
-  if (!file) {
-    fprintf(stderr, "tried to open file '%s': %s\n", lexer->filepath, strerror(errno));
-    exit(EXIT_FAILURE);
-  }
-
-  fseek(file, 0L, SEEK_END);
-  lexer->srclen = ftell(file);
-  rewind(file);
-
-  lexer->src = calloc(1, lexer->srclen + 1);
-  if (!lexer->src) {
-    fclose(file);
-    perror("lexer_load_file_contents -> calloc");
-    exit(EXIT_FAILURE);
-  }
-
-  if (fread(lexer->src, lexer->srclen, 1, file) != 1) {
-    fclose(file);
-    free(lexer->src);
-    // FIXME: distinguish error using ferror and feof functions
-    fprintf(stderr, "could not read file '%s'\n", lexer->filepath);
-    exit(EXIT_FAILURE);
-  }
-
-}
-
-void
-lexer_init(lexer_t *lexer, char *filepath)
-{
-  assert(lexer && "lexer must be defined");
-  assert(filepath && "filepath must be defined");
-  lexer->filepath = filepath;
-  lexer->srclen   = 0;
-  lexer->cur      = 0;
-  lexer->row      = 0;
-  lexer->bol      = 0;
-  lexer_load_file_contents(lexer);
-}
-
-void
-lexer_drop_char(lexer_t *lexer)
-{
-  lexer->cur++;
-}
-
-
-bool
-lexer_is_eof(lexer_t *lexer)
-{
-  return lexer->cur >= lexer->srclen;
-}
-
-
-bool
-lexer_is_not_eof(lexer_t *lexer)
-{
-  return !lexer_is_eof(lexer);
-}
-
-char
-lexer_current_char(lexer_t *lexer)
-{
-  return lexer->src[lexer->cur];
-}
-
-void
-lexer_next_token(lexer_t *lexer, token_t *token)
-{
-  while (lexer_is_not_eof(lexer) && isspace(lexer_current_char(lexer))) {
-    if (lexer_current_char(lexer) == '\n') {
-      lexer_drop_char(lexer);
-      lexer->row++;
-      lexer->bol = lexer->cur;
-      continue;
-    }
-    lexer_drop_char(lexer);
-  }
-
-  if (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
-    size_t begin = lexer->cur;
-
-    while (lexer_is_not_eof(lexer) && isdigit(lexer_current_char(lexer))) {
-      lexer_drop_char(lexer);
-    }
-    token->kind = TOKEN_NUMBER;
-    token->value = strndup(lexer->src + begin, lexer->cur - begin);
-    token->row = lexer->row;
-    token->col = begin - lexer->bol;
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && isalpha(lexer_current_char(lexer))) {
-    size_t begin = lexer->cur;
-
-    while (lexer_is_not_eof(lexer) && isalnum(lexer_current_char(lexer))) {
-      lexer_drop_char(lexer);
-    }
-    token->kind = TOKEN_NAME;
-    token->value = strndup(lexer->src + begin, lexer->cur - begin);
-    token->row = lexer->row;
-    token->col = begin - lexer->bol;
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '(') {
-    token->kind = TOKEN_OPAREN;
-    token->value = strdup("(");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ')') {
-    token->kind = TOKEN_CPAREN;
-    token->value = strdup(")");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ':') {
-    token->kind = TOKEN_COLON;
-    token->value = strdup(":");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == ';') {
-    token->kind = TOKEN_SEMICOLON;
-    token->value = strdup(";");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '{') {
-    token->kind = TOKEN_OCURLY;
-    token->value = strdup("{");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  if (lexer_is_not_eof(lexer) && lexer_current_char(lexer) == '}') {
-    token->kind = TOKEN_CCURLY;
-    token->value = strdup("}");
-    token->row = lexer->row;
-    token->col = lexer->cur - lexer->bol;
-    lexer_drop_char(lexer);
-    return;
-  }
-
-  token->kind = TOKEN_EOF;
-  return;
-}
+#include "lexer.h"
 
 void
 print_usage()
author	Johnny Richard <johnny@johnnyrichard.com>	2023-04-14 21:04:18 +0200
committer	Johnny Richard <johnny@johnnyrichard.com>	2023-04-14 21:04:18 +0200
commit	b4d3c6b5685024efc651e31e2308b0d76bbc3c06 (patch)
tree	63fef1a04052cd185c07f2119593f0d3e23a3d4c /src
parent	e0f96e02d6277f92b24ea3afaa49d6c0a7a6731c (diff)