Intial commit

Signed-off-by: Johnny Richard <johnny@johnnyrichard.com>
author: Johnny Richard <johnny@johnnyrichard.com> 2025-04-11 01:15:01 +0200
committer: Johnny Richard <johnny@johnnyrichard.com> 2025-04-14 23:11:22 +0200
commit: e7f69c8fbbbcbddde84933b2becd91e787d1ac63 (patch)
tree: 16cd17da17133494dd06aab614724e76b059d4ad /src/lexer.c
1 files changed, 253 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..9e9ab90
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,253 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include "utils.h"
+#include "array.h"
+#include "string_view.h"
+#include "lexer.h"
+
+void
+lexer_init(lexer_t *lexer, char *file_name)
+{
+    assert(lexer);
+
+    char *program = read_file_contents(file_name);
+    if (program == NULL) {
+        fprintf(stderr, "Unable to read file <%s>\n", file_name);
+        exit(EXIT_FAILURE);
+    }
+
+    lexer->file_name = file_name;
+    lexer->loc = (lex_loc_t) { 0 };
+    lexer->source = string_view_from_cstr(program);
+}
+
+bool
+lexer_is_eof(lexer_t *lexer)
+{
+    return !(lexer->loc.offset < lexer->source.size);
+}
+
+char
+lexer_current_char(lexer_t *lexer)
+{
+    return lexer->source.chars[lexer->loc.offset];
+}
+
+static bool
+_isspace(char c)
+{
+    return isspace(c) && c != '\n';
+}
+
+char
+lexer_next_char(lexer_t *lexer)
+{
+    assert(lexer->loc.offset < lexer->source.size);
+    char previous_char = lexer_current_char(lexer);
+    if (previous_char == '\n') {
+        lexer->loc.lineno++;
+        lexer->loc.lineoffset = ++lexer->loc.offset;
+    } else {
+        lexer->loc.offset++;
+    }
+    return lexer_current_char(lexer);
+}
+
+void
+lexer_next_token(lexer_t *lexer, token_t *token)
+{
+    if (lexer_is_eof(lexer)) {
+        *token = (token_t) { .kind = TOKEN_EOF };
+        return;
+    }
+
+    char c = lexer_current_char(lexer);
+    if (_isspace(c) && !lexer_is_eof(lexer)) {
+        while (_isspace(c) && !lexer_is_eof(lexer)) {
+            c = lexer_next_char(lexer);
+        }
+    }
+
+    if (lexer_is_eof(lexer)) {
+        *token = (token_t) { .kind = TOKEN_EOF };
+        return;
+    }
+
+    if (c == '\n') {
+        token->kind = TOKEN_EOS;
+        token->loc = lexer->loc;
+        token->value = (string_view_t) { .size = 1, .chars = lexer->source.chars + lexer->loc.offset };
+        lexer_next_char(lexer);
+        return;
+    }
+
+    if (isalpha(c)) {
+        lex_loc_t start_loc = lexer->loc;
+        while (isalnum(c) && !lexer_is_eof(lexer)) {
+            c = lexer_next_char(lexer);
+        }
+        string_view_t token_value = {
+            .size = lexer->loc.offset - start_loc.offset,
+            .chars = lexer->source.chars + start_loc.offset
+        };
+        token->value = token_value;
+        token->loc = start_loc;
+        if (string_view_eq(token_value, string_view_from_cstr("push"))) {
+            token->kind = TOKEN_KW_PUSH;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("dup"))) {
+            token->kind = TOKEN_KW_DUP;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("copy"))) {
+            token->kind = TOKEN_KW_COPY;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("swap"))) {
+            token->kind = TOKEN_KW_SWAP;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("drop"))) {
+            token->kind = TOKEN_KW_DROP;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("slide"))) {
+            token->kind = TOKEN_KW_SLIDE;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("add"))) {
+            token->kind = TOKEN_KW_ADD;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("sub"))) {
+            token->kind = TOKEN_KW_SUB;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("mul"))) {
+            token->kind = TOKEN_KW_MUL;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("div"))) {
+            token->kind = TOKEN_KW_DIV;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("mod"))) {
+            token->kind = TOKEN_KW_MOD;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("store"))) {
+            token->kind = TOKEN_KW_STORE;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("load"))) {
+            token->kind = TOKEN_KW_LOAD;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("call"))) {
+            token->kind = TOKEN_KW_CALL;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("ret"))) {
+            token->kind = TOKEN_KW_RET;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("jmp"))) {
+            token->kind = TOKEN_KW_JMP;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("jz"))) {
+            token->kind = TOKEN_KW_JMPZ;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("jn"))) {
+            token->kind = TOKEN_KW_JMPN;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("printi"))) {
+            token->kind = TOKEN_KW_PRINTI;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("printc"))) {
+            token->kind = TOKEN_KW_PRINTC;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("readi"))) {
+            token->kind = TOKEN_KW_READI;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("readc"))) {
+            token->kind = TOKEN_KW_READC;
+            return;
+        }
+        if (string_view_eq(token_value, string_view_from_cstr("end"))) {
+            token->kind = TOKEN_KW_END;
+            return;
+        }
+        token->kind = TOKEN_IDENT;
+        return;
+    }
+
+    if (isdigit(c)) {
+        lex_loc_t start_loc = lexer->loc;
+        while (isdigit(c) && !lexer_is_eof(lexer)) {
+            c = lexer_next_char(lexer);
+        }
+        string_view_t token_value = {
+            .size = lexer->loc.offset - start_loc.offset,
+            .chars = lexer->source.chars + start_loc.offset
+        };
+        token->kind = TOKEN_NUMBER;
+        token->value = token_value;
+        token->loc = start_loc;
+    }
+
+    if (c == ':') {
+        token->kind = TOKEN_COLON;
+        token->value = (string_view_t) { .size = 1, .chars = lexer->source.chars + lexer->loc.offset};
+        token->loc = lexer->loc;
+        lexer_next_char(lexer);
+        return;
+    }
+}
+
+static char *token_to_cstr_table[] = {
+    [TOKEN_KW_PUSH] = "push",
+    [TOKEN_KW_DUP] = "dup",
+    [TOKEN_KW_COPY] = "copy",
+    [TOKEN_KW_SWAP] = "swap",
+    [TOKEN_KW_DROP] = "drop",
+    [TOKEN_KW_SLIDE] = "slide",
+    [TOKEN_KW_ADD] = "add",
+    [TOKEN_KW_SUB] = "sub",
+    [TOKEN_KW_MUL] = "mul",
+    [TOKEN_KW_DIV] = "div",
+    [TOKEN_KW_MOD] = "mod",
+    [TOKEN_KW_STORE] = "store",
+    [TOKEN_KW_LOAD] = "load",
+    [TOKEN_KW_CALL] = "call",
+    [TOKEN_KW_RET] = "ret",
+    [TOKEN_KW_JMP] = "jmp",
+    [TOKEN_KW_JMPZ] = "jz",
+    [TOKEN_KW_JMPN] = "jn",
+    [TOKEN_KW_PRINTI] = "printi",
+    [TOKEN_KW_PRINTC] = "printc",
+    [TOKEN_KW_READI] = "readi",
+    [TOKEN_KW_READC] = "readc",
+    [TOKEN_KW_END] = "end",
+    [TOKEN_IDENT] = "identifier",
+    [TOKEN_EOS] = "<eos>",
+    [TOKEN_NUMBER] = "number",
+    [TOKEN_COLON] = ":",
+    [TOKEN_UNKOWN] = "<unkown>",
+    [TOKEN_EOF] = "<eof>",
+};
+
+char *
+token_to_cstr(token_kind_t kind)
+{
+    return token_to_cstr_table[kind];
+}
author	Johnny Richard <johnny@johnnyrichard.com>	2025-04-11 01:15:01 +0200
committer	Johnny Richard <johnny@johnnyrichard.com>	2025-04-14 23:11:22 +0200
commit	e7f69c8fbbbcbddde84933b2becd91e787d1ac63 (patch)
tree	16cd17da17133494dd06aab614724e76b059d4ad /src/lexer.c