#include #include #include #include #include #include "utils.h" #include "array.h" #include "string_view.h" #include "lexer.h" void lexer_init(lexer_t *lexer, char *file_name) { assert(lexer); char *program = read_file_contents(file_name); if (program == NULL) { fprintf(stderr, "Unable to read file <%s>\n", file_name); exit(EXIT_FAILURE); } lexer->file_name = file_name; lexer->loc = (lex_loc_t) { 0 }; lexer->source = string_view_from_cstr(program); } bool lexer_is_eof(lexer_t *lexer) { return !(lexer->loc.offset < lexer->source.size); } char lexer_current_char(lexer_t *lexer) { return lexer->source.chars[lexer->loc.offset]; } static bool _isspace(char c) { return isspace(c) && c != '\n'; } char lexer_next_char(lexer_t *lexer) { assert(lexer->loc.offset < lexer->source.size); char previous_char = lexer_current_char(lexer); if (previous_char == '\n') { lexer->loc.lineno++; lexer->loc.lineoffset = ++lexer->loc.offset; } else { lexer->loc.offset++; } return lexer_current_char(lexer); } void lexer_next_token(lexer_t *lexer, token_t *token) { if (lexer_is_eof(lexer)) { *token = (token_t) { .kind = TOKEN_EOF }; return; } char c = lexer_current_char(lexer); if (_isspace(c) && !lexer_is_eof(lexer)) { while (_isspace(c) && !lexer_is_eof(lexer)) { c = lexer_next_char(lexer); } } if (lexer_is_eof(lexer)) { *token = (token_t) { .kind = TOKEN_EOF }; return; } if (c == '\n') { token->kind = TOKEN_EOS; token->loc = lexer->loc; token->value = (string_view_t) { .size = 1, .chars = lexer->source.chars + lexer->loc.offset }; lexer_next_char(lexer); return; } if (isalpha(c)) { lex_loc_t start_loc = lexer->loc; while (isalnum(c) && !lexer_is_eof(lexer)) { c = lexer_next_char(lexer); } string_view_t token_value = { .size = lexer->loc.offset - start_loc.offset, .chars = lexer->source.chars + start_loc.offset }; token->value = token_value; token->loc = start_loc; if (string_view_eq(token_value, string_view_from_cstr("push"))) { token->kind = TOKEN_KW_PUSH; return; } if (string_view_eq(token_value, string_view_from_cstr("dup"))) { token->kind = TOKEN_KW_DUP; return; } if (string_view_eq(token_value, string_view_from_cstr("copy"))) { token->kind = TOKEN_KW_COPY; return; } if (string_view_eq(token_value, string_view_from_cstr("swap"))) { token->kind = TOKEN_KW_SWAP; return; } if (string_view_eq(token_value, string_view_from_cstr("drop"))) { token->kind = TOKEN_KW_DROP; return; } if (string_view_eq(token_value, string_view_from_cstr("slide"))) { token->kind = TOKEN_KW_SLIDE; return; } if (string_view_eq(token_value, string_view_from_cstr("add"))) { token->kind = TOKEN_KW_ADD; return; } if (string_view_eq(token_value, string_view_from_cstr("sub"))) { token->kind = TOKEN_KW_SUB; return; } if (string_view_eq(token_value, string_view_from_cstr("mul"))) { token->kind = TOKEN_KW_MUL; return; } if (string_view_eq(token_value, string_view_from_cstr("div"))) { token->kind = TOKEN_KW_DIV; return; } if (string_view_eq(token_value, string_view_from_cstr("mod"))) { token->kind = TOKEN_KW_MOD; return; } if (string_view_eq(token_value, string_view_from_cstr("store"))) { token->kind = TOKEN_KW_STORE; return; } if (string_view_eq(token_value, string_view_from_cstr("load"))) { token->kind = TOKEN_KW_LOAD; return; } if (string_view_eq(token_value, string_view_from_cstr("call"))) { token->kind = TOKEN_KW_CALL; return; } if (string_view_eq(token_value, string_view_from_cstr("ret"))) { token->kind = TOKEN_KW_RET; return; } if (string_view_eq(token_value, string_view_from_cstr("jmp"))) { token->kind = TOKEN_KW_JMP; return; } if (string_view_eq(token_value, string_view_from_cstr("jz"))) { token->kind = TOKEN_KW_JMPZ; return; } if (string_view_eq(token_value, string_view_from_cstr("jn"))) { token->kind = TOKEN_KW_JMPN; return; } if (string_view_eq(token_value, string_view_from_cstr("printi"))) { token->kind = TOKEN_KW_PRINTI; return; } if (string_view_eq(token_value, string_view_from_cstr("printc"))) { token->kind = TOKEN_KW_PRINTC; return; } if (string_view_eq(token_value, string_view_from_cstr("readi"))) { token->kind = TOKEN_KW_READI; return; } if (string_view_eq(token_value, string_view_from_cstr("readc"))) { token->kind = TOKEN_KW_READC; return; } if (string_view_eq(token_value, string_view_from_cstr("end"))) { token->kind = TOKEN_KW_END; return; } token->kind = TOKEN_IDENT; return; } if (isdigit(c)) { lex_loc_t start_loc = lexer->loc; while (isdigit(c) && !lexer_is_eof(lexer)) { c = lexer_next_char(lexer); } string_view_t token_value = { .size = lexer->loc.offset - start_loc.offset, .chars = lexer->source.chars + start_loc.offset }; token->kind = TOKEN_NUMBER; token->value = token_value; token->loc = start_loc; } if (c == ':') { token->kind = TOKEN_COLON; token->value = (string_view_t) { .size = 1, .chars = lexer->source.chars + lexer->loc.offset}; token->loc = lexer->loc; lexer_next_char(lexer); return; } } static char *token_to_cstr_table[] = { [TOKEN_KW_PUSH] = "push", [TOKEN_KW_DUP] = "dup", [TOKEN_KW_COPY] = "copy", [TOKEN_KW_SWAP] = "swap", [TOKEN_KW_DROP] = "drop", [TOKEN_KW_SLIDE] = "slide", [TOKEN_KW_ADD] = "add", [TOKEN_KW_SUB] = "sub", [TOKEN_KW_MUL] = "mul", [TOKEN_KW_DIV] = "div", [TOKEN_KW_MOD] = "mod", [TOKEN_KW_STORE] = "store", [TOKEN_KW_LOAD] = "load", [TOKEN_KW_CALL] = "call", [TOKEN_KW_RET] = "ret", [TOKEN_KW_JMP] = "jmp", [TOKEN_KW_JMPZ] = "jz", [TOKEN_KW_JMPN] = "jn", [TOKEN_KW_PRINTI] = "printi", [TOKEN_KW_PRINTC] = "printc", [TOKEN_KW_READI] = "readi", [TOKEN_KW_READC] = "readc", [TOKEN_KW_END] = "end", [TOKEN_IDENT] = "identifier", [TOKEN_EOS] = "", [TOKEN_NUMBER] = "number", [TOKEN_COLON] = ":", [TOKEN_UNKOWN] = "", [TOKEN_EOF] = "", }; char * token_to_cstr(token_kind_t kind) { return token_to_cstr_table[kind]; }