func eof[current: i64, source_len: i64] : bool return current >= source_len func peek[current: i64, source: str, source_len: i64] : u8 if eof(current, source_len) return 0 return source[current] func advance[current: ptr, column: ptr, source: str, source_len: i64] : u8 if eof(mem.read64(current), source_len) return 0 let c: u8 = source[mem.read64(current)] mem.write64(current, mem.read64(current) + 1) mem.write64(column, mem.read64(column) + 1) return c func match_char[expected: u8, current: ptr, column: ptr, source: str, source_len: i64] : bool if eof(mem.read64(current), source_len) return false if source[mem.read64(current)] != expected return false mem.write64(current, mem.read64(current) + 1) mem.write64(column, mem.read64(column) + 1) return true func zern_error[filename: str, line: i64, column: i64, message: str] : void io.print(filename) io.print(":") io.print_i64(line) io.print(":") io.print_i64(column) io.print(" ERROR: ") io.println(message) os.exit(1) func count_indentation[current: ptr, column: ptr, source: str, source_len: i64] : i64 let count = 0 while peek(mem.read64(current), source, source_len) == ' ' count = count + 1 advance(current, column, source, source_len) return count func handle_indentation[tokens: array, current: ptr, column: ptr, line: i64, source: str, source_len: i64, indent_stack: array, current_indent: ptr, filename: str] : void if peek(mem.read64(current), source, source_len) == 10 // \n return 0 let new_indent: i64 = count_indentation(current, column, source, source_len) if new_indent > mem.read64(current_indent) array.push(indent_stack, new_indent) add_token_with_lexeme("Indent", tokens, "", line, mem.read64(column)) else if new_indent < mem.read64(current_indent) while array.size(indent_stack) > 1 & array.nth(indent_stack, array.size(indent_stack) - 1) > new_indent array.pop(indent_stack) add_token_with_lexeme("Dedent", tokens, "", line, mem.read64(column)) if array.size(indent_stack) == 0 | array.nth(indent_stack, array.size(indent_stack) - 1) != new_indent zern_error(filename, line, mem.read64(column), "invalid indentation") mem.write64(current_indent, new_indent) func add_token[type: i64, tokens: array, source: str, start: i64, current: i64, line: i64, column: i64] : void let len: i64 = current - start let lexeme: str = mem.alloc(len + 1) for i in 0..len str.set(lexeme, i, source[start + i]) str.set(lexeme, len, 0) array.push(tokens, [type, lexeme, line, column]) func add_token_with_lexeme[type: i64, tokens: array, lexeme: str, line: i64, column: i64] : void array.push(tokens, [type, lexeme, line, column]) func scan_number[current: ptr, column: ptr, source: str, source_len: i64] : void if match_char('x', current, column, source, source_len) while str.is_hex_digit(peek(mem.read64(current), source, source_len)) advance(current, column, source, source_len) else if match_char('o', current, column, source, source_len) while peek(mem.read64(current), source, source_len) >= '0' & peek(mem.read64(current), source, source_len) <= '7' advance(current, column, source, source_len) else while str.is_digit(peek(mem.read64(current), source, source_len)) advance(current, column, source, source_len) func scan_identifier[tokens: array, current: ptr, column: ptr, start: i64, line: i64, source: str, source_len: i64] : void while str.is_alphanumeric(peek(mem.read64(current), source, source_len)) | peek(mem.read64(current), source, source_len) == '_' | peek(mem.read64(current), source, source_len) == '.' advance(current, column, source, source_len) let len: i64 = mem.read64(current) - start let lexeme: str = mem.alloc(len + 1) for i in 0..len str.set(lexeme, i, source[start + i]) str.set(lexeme, len, 0) let type: str = "Identifier" if str.equal(lexeme, "let") type = "KeywordLet" if str.equal(lexeme, "const") type = "KeywordConst" if str.equal(lexeme, "if") type = "KeywordIf" if str.equal(lexeme, "else") type = "KeywordElse" if str.equal(lexeme, "while") type = "KeywordWhile" if str.equal(lexeme, "for") type = "KeywordFor" if str.equal(lexeme, "in") type = "KeywordIn" if str.equal(lexeme, "func") type = "KeywordFunc" if str.equal(lexeme, "return") type = "KeywordReturn" if str.equal(lexeme, "break") type = "KeywordBreak" if str.equal(lexeme, "continue") type = "KeywordContinue" if str.equal(lexeme, "extern") type = "KeywordExtern" if str.equal(lexeme, "export") type = "KeywordExport" if str.equal(lexeme, "true") type = "True" if str.equal(lexeme, "false") type = "False" add_token_with_lexeme(type, tokens, lexeme, line, mem.read64(column)) func scan_token[tokens: array, current: ptr, line: ptr, column: ptr, source: str, source_len: i64, filename: str, indent_stack: array, current_indent: ptr] : void let start: i64 = mem.read64(current) let c: u8 = advance(current, column, source, source_len) if c == '(' add_token("LeftParen", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == ')' add_token("RightParen", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '[' add_token("LeftBracket", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == ']' add_token("RightBracket", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == ',' add_token("Comma", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '+' add_token("Plus", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '-' add_token("Minus", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '*' add_token("Star", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '%' add_token("Mod", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '^' add_token("Xor", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == ':' add_token("Colon", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '@' add_token("At", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '.' if match_char('.', current, column, source, source_len) add_token("DoubleDot", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else zern_error(filename, mem.read64(line), mem.read64(column), "expected '.' after '.'") else if c == '/' if match_char('/', current, column, source, source_len) while !eof(mem.read64(current), source_len) if peek(mem.read64(current), source, source_len) == 10 // \n break advance(current, column, source, source_len) else add_token("Slash", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '&' add_token("BitAnd", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '|' if match_char('>', current, column, source, source_len) add_token("Pipe", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else add_token("BitOr", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '!' if match_char('=', current, column, source, source_len) add_token("NotEqual", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else add_token("Bang", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '=' if match_char('=', current, column, source, source_len) add_token("DoubleEqual", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else add_token("Equal", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '>' if match_char('>', current, column, source, source_len) add_token("ShiftRight", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if match_char('=', current, column, source, source_len) add_token("GreaterEqual", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else add_token("Greater", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == '<' if match_char('<', current, column, source, source_len) add_token("ShiftLeft", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if match_char('=', current, column, source, source_len) add_token("LessEqual", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else add_token("Less", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == 39 // ' if eof(mem.read64(current), source_len) zern_error(filename, mem.read64(line), mem.read64(column), "unterminated char literal") advance(current, column, source, source_len) if !match_char(39, current, column, source, source_len) zern_error(filename, mem.read64(line), mem.read64(column), "expected ' after char literal") add_token("Char", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == 34 // " while !eof(mem.read64(current), source_len) if peek(mem.read64(current), source, source_len) == 34 break if peek(mem.read64(current), source, source_len) == 10 // \n mem.write64(line, mem.read64(line) + 1) mem.write64(column, 1) advance(current, column, source, source_len) if eof(mem.read64(current), source_len) zern_error(filename, mem.read64(line), mem.read64(column), "unterminated string") advance(current, column, source, source_len) add_token("String", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if c == ' ' | c == 13 // \r return 0 else if c == 10 // \n mem.write64(line, mem.read64(line) + 1) mem.write64(column, 1) handle_indentation(tokens, current, column, mem.read64(line), source, source_len, indent_stack, current_indent, filename) else if str.is_digit(c) scan_number(current, column, source, source_len) add_token("Number", tokens, source, start, mem.read64(current), mem.read64(line), mem.read64(column)) else if str.is_letter(c) | c == '_' scan_identifier(tokens, current, column, start, mem.read64(line), source, source_len) else zern_error(filename, mem.read64(line), mem.read64(column), "unexpected character") func tokenize[source: str, filename: str] : array let source_len: i64 = str.len(source) let current = 0 let line = 1 let column = 1 let indent_stack: array = [0] let current_indent = 0 let tokens: array = [] while !eof(current, source_len) scan_token(tokens, @current, @line, @column, source, source_len, filename, indent_stack, @current_indent) add_token_with_lexeme("Eof", tokens, "", line, column) return tokens func main[argc: i64, argv: ptr] : i64 if argc < 2 dbg.panic("expected an argument") let path: str = mem.read64(argv + 8) let source: str = io.read_file(path) let tokens: array = tokenize(source, path) for i in 0..array.size(tokens) let token: array = array.nth(tokens, i) io.print(array.nth(token, 0)) io.print(" ") io.print(array.nth(token, 1)) io.print(" ") io.print_i64(array.nth(token, 2)) io.print(" ") io.print_i64(array.nth(token, 3)) io.println("")