package main import "core:fmt" import "core:c/libc" import "core:math" Lexer :: struct { data: ^[dynamic]u8, read_position: u64, position: TextPosition, char, next: u8, last_token_kind: TokenKind, should_return_semicolon: bool, } lexer_create :: proc(data: ^[dynamic]u8) -> ^Lexer { lexer := new(Lexer) lexer^ = { data = data, read_position = 0, position = TextPosition { line = 1, column = 1, }, } lexer_advance(lexer) lexer_advance(lexer) return lexer } @(private = "file") lexer_advance :: proc(lexer: ^Lexer) { lexer.char = lexer.next if lexer.read_position < u64(len(lexer.data)) { lexer.next = lexer.data[lexer.read_position] } else { lexer.next = 0 } lexer.read_position += 1 if lexer.char == '\r' { lexer_advance(lexer) } if lexer.char == '\n' { lexer.position.line += 1 lexer.position.column = 1 } else { lexer.position.column += 1 } } @(private = "file") lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool { return lexer.last_token_kind == .CloseBrace || lexer.last_token_kind == .Semicolon || lexer.last_token_kind == .EOF || lexer.last_token_kind == .Invalid || lexer.last_token_kind == .OpenParen || lexer.last_token_kind == .OpenBrace || lexer.last_token_kind == .OpenBracket || lexer.last_token_kind == .Add || lexer.last_token_kind == .Subtract || lexer.last_token_kind == .Multiply || lexer.last_token_kind == .Divide || lexer.last_token_kind == .Modulo || lexer.last_token_kind == .Exponent || lexer.last_token_kind == .Assign || lexer.last_token_kind == .Not || lexer.last_token_kind == .BitwiseAnd || lexer.last_token_kind == .BitwiseOr || lexer.last_token_kind == .BitwiseXOR || lexer.last_token_kind == .BitwiseNot || lexer.last_token_kind == .LessThan || lexer.last_token_kind == .GreaterThan || lexer.last_token_kind == .BitwiseLeftShift || lexer.last_token_kind == .BitwiseRightShift } @(private = "file") lexer_skip_whitespace :: proc(lexer: ^Lexer) { // FIXME: Do the funny golang thing where newlines are semicolons based on some rules for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' { if lexer.char == '\n' { if !lexer_should_not_emit_semicolon(lexer) { lexer.should_return_semicolon = true lexer_advance(lexer) return } } lexer_advance(lexer) } } lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) { lexer_skip_whitespace(lexer) if lexer.should_return_semicolon { lexer.should_return_semicolon = false return token_create(.Semicolon, TextRange { start = lexer.position, end = lexer.position }) } defer lexer.last_token_kind = ret.kind crange := TextRange { start = lexer.position, end = lexer.position, } ret = token_create(.Invalid, crange) should_advance := true switch lexer.char { case '+': ret = token_create(.Add, crange) if lexer.next == '+' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Increment, crange) } case '-': ret = token_create(.Subtract, crange) if lexer.next == '-' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Decrement, crange) } else if lexer.next == '>' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Arrow, crange) } case '*': ret = token_create(.Multiply, crange) case '/': ret = token_create(.Divide, crange) case '%': ret = token_create(.Modulo, crange) case '`': ret = token_create(.Exponent, crange) case '=': ret = token_create(.Assign, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Equals, crange) } case '!': ret = token_create(.Not, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.NotEquals, crange) } case '<': ret = token_create(.LessThan, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.LessThanOrEqual, crange) } else if lexer.next == '<' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.BitwiseLeftShift, crange) } case '>': ret = token_create(.GreaterThan, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.GreaterThanOrEqual, crange) } else if lexer.next == '>' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.BitwiseRightShift, crange) } case '&': ret = token_create(.BitwiseAnd, crange) case '|': ret = token_create(.BitwiseOr, crange) case '^': ret = token_create(.BitwiseXOR, crange) case '~': ret = token_create(.BitwiseNot, crange) case '(': ret = token_create(.OpenParen, crange) case ')': ret = token_create(.CloseParen, crange) case '[': ret = token_create(.OpenBracket, crange) case ']': ret = token_create(.CloseBracket, crange) case '{': ret = token_create(.OpenBrace, crange) case '}': ret = token_create(.CloseBrace, crange) case '?': ret = token_create(.Question, crange) case ':': ret = token_create(.Colon, crange) case '.': ret = token_create(.Dot, crange) case ';': ret = token_create(.Semicolon, crange) case '"': ret = lexer_read_string(lexer, .String, '\"') case '\'': ret = lexer_read_string(lexer, .Character, '\'') case 'a'..='z': fallthrough case 'A'..='Z': fallthrough case '_': ret = lexer_read_identifier(lexer) should_advance = false case '0'..='9': ret = lexer_read_number(lexer) should_advance = false case 0: ret = token_create(.EOF, crange) should_advance = false } if should_advance { lexer_advance(lexer) } return } @(private = "file") lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token { crange := TextRange { start = lexer.position, end = lexer.position, } lexer_advance(lexer) str : [dynamic]u8 for lexer.char != outer { if lexer.char == '\\' { range := TextRange { start = lexer.position } lexer_advance(lexer) switch lexer.char { case 'n': append(&str, '\n'); break case 't': append(&str, '\t'); break case 'b': append(&str, '\b'); break case 'r': append(&str, '\r'); break case '\\': append(&str, '\\'); break case: range.end = lexer.position append(&g_message_list, message_create(.Warning, fmt.aprintf("Invalid string/character escape: %c at %s", lexer.char, "TODO LOCATION"), range), ) } lexer_advance(lexer) continue } append(&str, lexer.char) lexer_advance(lexer) } crange.end = lexer.position return token_create_u8(kind, str, crange) } @(private = "file") lexer_read_identifier :: proc(lexer: ^Lexer) -> Token { crange := TextRange { start = lexer.position } str : [dynamic]u8 for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' { append(&str, lexer.char) crange.end = lexer.position lexer_advance(lexer) } if compare_dyn_arr_string(&str, "fn") { return token_create(.Function, crange) } else if compare_dyn_arr_string(&str, "struct") { return token_create(.Struct, crange) } else if compare_dyn_arr_string(&str, "enum") { return token_create(.Enum, crange) } else if compare_dyn_arr_string(&str, "union") { return token_create(.Union, crange) } else if compare_dyn_arr_string(&str, "type") { return token_create(.Type, crange) } else if compare_dyn_arr_string(&str, "use") { return token_create(.Use, crange) } else if compare_dyn_arr_string(&str, "pub") { return token_create(.Pub, crange) } else if compare_dyn_arr_string(&str, "let") { return token_create(.Let, crange) } else if compare_dyn_arr_string(&str, "mut") { return token_create(.Mut, crange) } else if compare_dyn_arr_string(&str, "as") { return token_create(.As, crange) } else if compare_dyn_arr_string(&str, "in") { return token_create(.In, crange) } else if compare_dyn_arr_string(&str, "if") { return token_create(.If, crange) } else if compare_dyn_arr_string(&str, "else") { return token_create(.Else, crange) } else if compare_dyn_arr_string(&str, "elif") { return token_create(.Elif, crange) } else if compare_dyn_arr_string(&str, "for") { return token_create(.For, crange) } else if compare_dyn_arr_string(&str, "break") { return token_create(.Break, crange) } else if compare_dyn_arr_string(&str, "continue") { return token_create(.Continue, crange) } else if compare_dyn_arr_string(&str, "switch") { return token_create(.Switch, crange) } else if compare_dyn_arr_string(&str, "case") { return token_create(.Case, crange) } else if compare_dyn_arr_string(&str, "ret") { return token_create(.Ret, crange) } else if compare_dyn_arr_string(&str, "static") { return token_create(.Static, crange) } else if compare_dyn_arr_string(&str, "defer") { return token_create(.Defer, crange) } else if compare_dyn_arr_string(&str, "and") { return token_create(.And, crange) } else if compare_dyn_arr_string(&str, "or") { return token_create(.Or, crange) } return token_create_u8(.Identifier, str, crange) } @(private = "file") lexer_read_number :: proc(lexer: ^Lexer) -> Token { crange := TextRange { start = lexer.position, end = lexer.position, } // FIXME: Implement binary ReadMode :: enum { Normal, Hex, } read_mode := ReadMode.Normal if lexer.char == '0' && lexer.next == 'x' { read_mode = .Hex lexer_advance(lexer) crange.end = lexer.position lexer_advance(lexer) } whole_part : u64 = 0 if read_mode == .Normal { for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 { whole_part = whole_part * 10 + u64(lexer.char) - '0' crange.end = lexer.position lexer_advance(lexer) } } else if read_mode == .Hex { lowered := libc.tolower(i32(lexer.char)) for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' { digit := lowered - '0' if libc.isdigit(lowered) == 0 { digit = lowered - 'a' + 10 } whole_part = (whole_part << 4) | u64(digit) crange.end = lexer.position lexer_advance(lexer) } whole_part = whole_part >> 4 } if lexer.char == '.' { lexer_advance(lexer) // FIXME: Move this to another procedure because this is repeating lmfao fractional_part : u64 = 0 if read_mode == .Normal { for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 { fractional_part = fractional_part * 10 + u64(lexer.char) - '0' crange.end = lexer.position lexer_advance(lexer) } } else if read_mode == .Hex { append(&g_message_list, message_create(.Error, "Hexadecimal floating point numbers are not supported yet", crange)) lowered := libc.tolower(i32(lexer.char)) for libc.isxdigit(lowered) != 0 && lexer.char > 0 { digit := lowered - '0' if libc.isdigit(lowered) == 0 { digit = lowered - 'a' + 10 } fractional_part = fractional_part * 16 + u64(digit) crange.end = lexer.position lexer_advance(lexer) } fractional_part = fractional_part / 16 } fractional_part_clone := fractional_part count := 0 for fractional_part_clone != 0 { fractional_part_clone = fractional_part_clone / 10 count = count + 1 } floating : f64 = 0 floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part) return token_create_f64(.Float, floating, crange) } return token_create_u64(.Integer, whole_part, crange) }