package main import "core:fmt" import "core:c/libc" import "core:math" Lexer :: struct { data: ^[dynamic]u8, read_position: u64, position: TextPosition, char, next: u8, last_token_kind: TokenKind, should_return_semicolon: bool, } lexer_create :: proc(data: ^[dynamic]u8) -> ^Lexer { lexer := new(Lexer) lexer^ = { data = data, read_position = 0, position = TextPosition{line = 1, column = 1}, } lexer_advance(lexer) lexer_advance(lexer) return lexer } @(private = "file") lexer_advance :: proc(lexer: ^Lexer) { lexer.char = lexer.next if lexer.read_position < u64(len(lexer.data)) { lexer.next = lexer.data[lexer.read_position] } else { lexer.next = 0 } lexer.read_position += 1 if lexer.char == '\r' { lexer_advance(lexer) } if lexer.char == '\n' { lexer.position.line += 1 lexer.position.column = 1 } else { lexer.position.column += 1 } } @(private = "file") lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool { return( lexer.last_token_kind == .CloseBrace || lexer.last_token_kind == .Semicolon || lexer.last_token_kind == .EOF || lexer.last_token_kind == .Invalid || lexer.last_token_kind == .OpenParen || lexer.last_token_kind == .OpenBrace || lexer.last_token_kind == .OpenBracket || lexer.last_token_kind == .CloseParen || lexer.last_token_kind == .CloseBrace || lexer.last_token_kind == .CloseBracket || lexer.last_token_kind == .Add || lexer.last_token_kind == .Subtract || lexer.last_token_kind == .Multiply || lexer.last_token_kind == .Divide || lexer.last_token_kind == .Modulo || lexer.last_token_kind == .Exponent || lexer.last_token_kind == .Assign || lexer.last_token_kind == .Not || lexer.last_token_kind == .BitwiseAnd || lexer.last_token_kind == .BitwiseOr || lexer.last_token_kind == .BitwiseXOR || lexer.last_token_kind == .BitwiseNot || lexer.last_token_kind == .LessThan || lexer.last_token_kind == .GreaterThan || lexer.last_token_kind == .BitwiseLeftShift || lexer.last_token_kind == .BitwiseRightShift || lexer.last_token_kind == .Comma \ ) } @(private = "file") lexer_skip_whitespace :: proc(lexer: ^Lexer) { // FIXME: Do the funny golang thing where newlines are semicolons based on some rules for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' { if lexer.char == '\n' { if !lexer_should_not_emit_semicolon(lexer) { lexer.should_return_semicolon = true lexer_advance(lexer) return } } lexer_advance(lexer) } } lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) { lexer_skip_whitespace(lexer) if lexer.char == '\\' { lexer_advance(lexer) for lexer.char != '\n' && lexer.char != 0 { lexer_advance(lexer) } return lexer_next(lexer) } if lexer.should_return_semicolon { lexer.should_return_semicolon = false return token_create(.Semicolon, TextRange{start = lexer.position, end = lexer.position}) } defer lexer.last_token_kind = ret.kind crange := TextRange { start = lexer.position, end = lexer.position, } ret = token_create(.Invalid, crange) should_advance := true switch lexer.char { case '+': ret = token_create(.Add, crange) if lexer.next == '+' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Increment, crange) } case '-': ret = token_create(.Subtract, crange) if lexer.next == '-' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Decrement, crange) } else if lexer.next == '>' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Arrow, crange) } case '*': ret = token_create(.Multiply, crange) case '/': ret = token_create(.Divide, crange) case '%': ret = token_create(.Modulo, crange) case '`': ret = token_create(.Exponent, crange) case '=': ret = token_create(.Assign, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.Equals, crange) } case '!': ret = token_create(.Not, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.NotEquals, crange) } case '<': ret = token_create(.LessThan, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.LessThanOrEqual, crange) } else if lexer.next == '<' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.BitwiseLeftShift, crange) } case '>': ret = token_create(.GreaterThan, crange) if lexer.next == '=' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.GreaterThanOrEqual, crange) } else if lexer.next == '>' { lexer_advance(lexer) crange.end = lexer.position ret = token_create(.BitwiseRightShift, crange) } case '&': ret = token_create(.BitwiseAnd, crange) case '|': ret = token_create(.BitwiseOr, crange) case '^': ret = token_create(.BitwiseXOR, crange) case '~': ret = token_create(.BitwiseNot, crange) case '(': ret = token_create(.OpenParen, crange) case ')': ret = token_create(.CloseParen, crange) case '[': ret = token_create(.OpenBracket, crange) case ']': ret = token_create(.CloseBracket, crange) case '{': ret = token_create(.OpenBrace, crange) case '}': ret = token_create(.CloseBrace, crange) case '?': ret = token_create(.Question, crange) case ':': ret = token_create(.Colon, crange) case '.': ret = token_create(.Dot, crange) case ',': ret = token_create(.Comma, crange) case ';': ret = token_create(.Semicolon, crange) case '"': ret = lexer_read_string(lexer, .String, '\"') case '\'': ret = lexer_read_string(lexer, .Character, '\'') case 'a' ..= 'z': fallthrough case 'A' ..= 'Z': fallthrough case '_': ret = lexer_read_identifier(lexer) should_advance = false case '0' ..= '9': ret = lexer_read_number(lexer) should_advance = false case 0: ret = token_create(.EOF, crange) should_advance = false } if should_advance { lexer_advance(lexer) } return } @(private = "file") lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token { crange := TextRange { start = lexer.position, end = lexer.position, } lexer_advance(lexer) str: [dynamic]u8 for lexer.char != outer { if lexer.char == '\\' { range := TextRange { start = lexer.position, } lexer_advance(lexer) switch lexer.char { case 'n': append(&str, '\n');break case 't': append(&str, '\t');break case 'b': append(&str, '\b');break case 'r': append(&str, '\r');break case '\\': append(&str, '\\');break case: range.end = lexer.position append( &g_message_list, message_create( .Warning, fmt.aprintf( "Invalid string/character escape: %c at %s", lexer.char, "TODO LOCATION", ), range, ), ) } lexer_advance(lexer) continue } append(&str, lexer.char) lexer_advance(lexer) } crange.end = lexer.position return token_create_u8(kind, str, crange) } @(private = "file") lexer_read_identifier :: proc(lexer: ^Lexer) -> Token { crange := TextRange { start = lexer.position, } str: [dynamic]u8 for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' { append(&str, lexer.char) crange.end = lexer.position lexer_advance(lexer) } if compare_dyn_arr_string( &str, "fn", ) {return token_create(.Function, crange)} else if compare_dyn_arr_string(&str, "struct") {return token_create(.Struct, crange)} else if compare_dyn_arr_string(&str, "enum") {return token_create(.Enum, crange)} else if compare_dyn_arr_string(&str, "union") {return token_create(.Union, crange)} else if compare_dyn_arr_string(&str, "type") {return token_create(.Type, crange)} else if compare_dyn_arr_string(&str, "use") {return token_create(.Use, crange)} else if compare_dyn_arr_string(&str, "pub") {return token_create(.Pub, crange)} else if compare_dyn_arr_string(&str, "let") {return token_create(.Let, crange)} else if compare_dyn_arr_string(&str, "mut") {return token_create(.Mut, crange)} else if compare_dyn_arr_string(&str, "as") {return token_create(.As, crange)} else if compare_dyn_arr_string(&str, "in") {return token_create(.In, crange)} else if compare_dyn_arr_string(&str, "if") {return token_create(.If, crange)} else if compare_dyn_arr_string(&str, "else") {return token_create(.Else, crange)} else if compare_dyn_arr_string(&str, "elif") {return token_create(.Elif, crange)} else if compare_dyn_arr_string(&str, "for") {return token_create(.For, crange)} else if compare_dyn_arr_string(&str, "break") {return token_create(.Break, crange)} else if compare_dyn_arr_string(&str, "continue") {return token_create(.Continue, crange)} else if compare_dyn_arr_string(&str, "switch") {return token_create(.Switch, crange)} else if compare_dyn_arr_string(&str, "case") {return token_create(.Case, crange)} else if compare_dyn_arr_string(&str, "ret") {return token_create(.Ret, crange)} else if compare_dyn_arr_string(&str, "static") {return token_create(.Static, crange)} else if compare_dyn_arr_string(&str, "defer") {return token_create(.Defer, crange)} else if compare_dyn_arr_string(&str, "let") {return token_create(.Let, crange)} else if compare_dyn_arr_string(&str, "and") {return token_create(.And, crange)} else if compare_dyn_arr_string(&str, "or") {return token_create(.Or, crange)} return token_create_u8(.Identifier, str, crange) } @(private = "file") lexer_read_number :: proc(lexer: ^Lexer) -> Token { crange := TextRange { start = lexer.position, end = lexer.position, } // FIXME: Implement binary ReadMode :: enum { Normal, Hex, } read_mode := ReadMode.Normal if lexer.char == '0' && lexer.next == 'x' { read_mode = .Hex lexer_advance(lexer) crange.end = lexer.position lexer_advance(lexer) } whole_part: u64 = 0 if read_mode == .Normal { for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 { whole_part = whole_part * 10 + u64(lexer.char) - '0' crange.end = lexer.position lexer_advance(lexer) } } else if read_mode == .Hex { lowered := libc.tolower(i32(lexer.char)) for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' { digit := lowered - '0' if libc.isdigit(lowered) == 0 { digit = lowered - 'a' + 10 } whole_part = (whole_part << 4) | u64(digit) crange.end = lexer.position lexer_advance(lexer) } whole_part = whole_part >> 4 } if lexer.char == '.' { lexer_advance(lexer) // FIXME: Move this to another procedure because this is repeating lmfao fractional_part: u64 = 0 if read_mode == .Normal { for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 { fractional_part = fractional_part * 10 + u64(lexer.char) - '0' crange.end = lexer.position lexer_advance(lexer) } } else if read_mode == .Hex { append( &g_message_list, message_create( .Error, "Hexadecimal floating point numbers are not supported yet", crange, ), ) lowered := libc.tolower(i32(lexer.char)) for libc.isxdigit(lowered) != 0 && lexer.char > 0 { digit := lowered - '0' if libc.isdigit(lowered) == 0 { digit = lowered - 'a' + 10 } fractional_part = fractional_part * 16 + u64(digit) crange.end = lexer.position lexer_advance(lexer) } fractional_part = fractional_part / 16 } fractional_part_clone := fractional_part count := 0 for fractional_part_clone != 0 { fractional_part_clone = fractional_part_clone / 10 count = count + 1 } floating: f64 = 0 floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part) return token_create_f64(.Float, floating, crange) } return token_create_u64(.Integer, whole_part, crange) }