362 lines
11 KiB
Odin
362 lines
11 KiB
Odin
package main
|
|
|
|
import "core:fmt"
|
|
import "core:c/libc"
|
|
import "core:math"
|
|
|
|
Lexer :: struct {
|
|
data: ^[dynamic]u8,
|
|
read_position: u64,
|
|
position: TextPosition,
|
|
|
|
char, next: u8,
|
|
last_token_kind: TokenKind,
|
|
should_return_semicolon: bool,
|
|
}
|
|
|
|
lexer_create :: proc(data: ^[dynamic]u8) -> ^Lexer {
|
|
lexer := new(Lexer)
|
|
lexer^ = {
|
|
data = data,
|
|
read_position = 0,
|
|
position = TextPosition {
|
|
line = 1,
|
|
column = 1,
|
|
},
|
|
}
|
|
lexer_advance(lexer)
|
|
lexer_advance(lexer)
|
|
return lexer
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_advance :: proc(lexer: ^Lexer) {
|
|
lexer.char = lexer.next
|
|
if lexer.read_position < u64(len(lexer.data)) {
|
|
lexer.next = lexer.data[lexer.read_position]
|
|
} else {
|
|
lexer.next = 0
|
|
}
|
|
lexer.read_position += 1
|
|
if lexer.char == '\r' {
|
|
lexer_advance(lexer)
|
|
}
|
|
if lexer.char == '\n' {
|
|
lexer.position.line += 1
|
|
lexer.position.column = 1
|
|
} else {
|
|
lexer.position.column += 1
|
|
}
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool {
|
|
return lexer.last_token_kind == .CloseBrace ||
|
|
lexer.last_token_kind == .Semicolon ||
|
|
lexer.last_token_kind == .EOF ||
|
|
lexer.last_token_kind == .Invalid ||
|
|
lexer.last_token_kind == .OpenParen ||
|
|
lexer.last_token_kind == .OpenBrace ||
|
|
lexer.last_token_kind == .OpenBracket ||
|
|
lexer.last_token_kind == .Add ||
|
|
lexer.last_token_kind == .Subtract ||
|
|
lexer.last_token_kind == .Multiply ||
|
|
lexer.last_token_kind == .Divide ||
|
|
lexer.last_token_kind == .Modulo ||
|
|
lexer.last_token_kind == .Exponent ||
|
|
lexer.last_token_kind == .Assign ||
|
|
lexer.last_token_kind == .Not ||
|
|
lexer.last_token_kind == .BitwiseAnd ||
|
|
lexer.last_token_kind == .BitwiseOr ||
|
|
lexer.last_token_kind == .BitwiseXOR ||
|
|
lexer.last_token_kind == .BitwiseNot ||
|
|
lexer.last_token_kind == .LessThan ||
|
|
lexer.last_token_kind == .GreaterThan ||
|
|
lexer.last_token_kind == .BitwiseLeftShift ||
|
|
lexer.last_token_kind == .BitwiseRightShift
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_skip_whitespace :: proc(lexer: ^Lexer) {
|
|
// FIXME: Do the funny golang thing where newlines are semicolons based on some rules
|
|
for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' {
|
|
if lexer.char == '\n' {
|
|
if !lexer_should_not_emit_semicolon(lexer) {
|
|
lexer.should_return_semicolon = true
|
|
lexer_advance(lexer)
|
|
return
|
|
}
|
|
}
|
|
lexer_advance(lexer)
|
|
}
|
|
}
|
|
|
|
lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) {
|
|
lexer_skip_whitespace(lexer)
|
|
if lexer.should_return_semicolon {
|
|
lexer.should_return_semicolon = false
|
|
return token_create(.Semicolon, TextRange { start = lexer.position, end = lexer.position })
|
|
}
|
|
|
|
defer lexer.last_token_kind = ret.kind
|
|
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
ret = token_create(.Invalid, crange)
|
|
should_advance := true
|
|
|
|
switch lexer.char {
|
|
case '+':
|
|
ret = token_create(.Add, crange)
|
|
if lexer.next == '+' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Increment, crange)
|
|
}
|
|
case '-':
|
|
ret = token_create(.Subtract, crange)
|
|
if lexer.next == '-' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Decrement, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Arrow, crange)
|
|
}
|
|
case '*': ret = token_create(.Multiply, crange)
|
|
case '/': ret = token_create(.Divide, crange)
|
|
case '%': ret = token_create(.Modulo, crange)
|
|
case '`': ret = token_create(.Exponent, crange)
|
|
case '=': ret = token_create(.Assign, crange)
|
|
case '!':
|
|
ret = token_create(.Not, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.NotEquals, crange)
|
|
}
|
|
case '<':
|
|
ret = token_create(.LessThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.LessThanOrEqual, crange)
|
|
} else if lexer.next == '<' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.BitwiseLeftShift, crange)
|
|
}
|
|
case '>':
|
|
ret = token_create(.GreaterThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.GreaterThanOrEqual, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.BitwiseRightShift, crange)
|
|
}
|
|
case '&': ret = token_create(.BitwiseAnd, crange)
|
|
case '|': ret = token_create(.BitwiseOr, crange)
|
|
case '^': ret = token_create(.BitwiseXOR, crange)
|
|
case '~': ret = token_create(.BitwiseNot, crange)
|
|
case '(': ret = token_create(.OpenParen, crange)
|
|
case ')': ret = token_create(.CloseParen, crange)
|
|
case '[': ret = token_create(.OpenBracket, crange)
|
|
case ']': ret = token_create(.CloseBracket, crange)
|
|
case '{': ret = token_create(.OpenBrace, crange)
|
|
case '}': ret = token_create(.CloseBrace, crange)
|
|
|
|
case '?': ret = token_create(.Question, crange)
|
|
case ':': ret = token_create(.Colon, crange)
|
|
case '.': ret = token_create(.Dot, crange)
|
|
case ';': ret = token_create(.Semicolon, crange)
|
|
|
|
case '"': ret = lexer_read_string(lexer, .String, '\"')
|
|
case '\'': ret = lexer_read_string(lexer, .Character, '\'')
|
|
case 'a'..='z': fallthrough
|
|
case 'A'..='Z': fallthrough
|
|
case '_':
|
|
ret = lexer_read_identifier(lexer)
|
|
should_advance = false
|
|
case '0'..='9':
|
|
ret = lexer_read_number(lexer)
|
|
should_advance = false
|
|
case 0:
|
|
ret = token_create(.EOF, crange)
|
|
should_advance = false
|
|
}
|
|
|
|
if should_advance {
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
lexer_advance(lexer)
|
|
|
|
str : [dynamic]u8
|
|
for lexer.char != outer {
|
|
if lexer.char == '\\' {
|
|
range := TextRange { start = lexer.position }
|
|
lexer_advance(lexer)
|
|
switch lexer.char {
|
|
case 'n': append(&str, '\n'); break
|
|
case 't': append(&str, '\t'); break
|
|
case 'b': append(&str, '\b'); break
|
|
case 'r': append(&str, '\r'); break
|
|
case '\\': append(&str, '\\'); break
|
|
case:
|
|
range.end = lexer.position
|
|
append(&g_message_list,
|
|
message_create(.Warning, fmt.aprintf("Invalid string/character escape: %c at %s", lexer.char, "TODO LOCATION"), range),
|
|
)
|
|
}
|
|
lexer_advance(lexer)
|
|
continue
|
|
}
|
|
append(&str, lexer.char)
|
|
|
|
lexer_advance(lexer)
|
|
}
|
|
crange.end = lexer.position
|
|
|
|
return token_create_u8(kind, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_identifier :: proc(lexer: ^Lexer) -> Token {
|
|
crange := TextRange { start = lexer.position }
|
|
|
|
str : [dynamic]u8
|
|
for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' {
|
|
append(&str, lexer.char)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
if compare_dyn_arr_string(&str, "fn") { return token_create(.Function, crange) }
|
|
else if compare_dyn_arr_string(&str, "struct") { return token_create(.Struct, crange) }
|
|
else if compare_dyn_arr_string(&str, "enum") { return token_create(.Enum, crange) }
|
|
else if compare_dyn_arr_string(&str, "union") { return token_create(.Union, crange) }
|
|
else if compare_dyn_arr_string(&str, "type") { return token_create(.Type, crange) }
|
|
else if compare_dyn_arr_string(&str, "use") { return token_create(.Use, crange) }
|
|
else if compare_dyn_arr_string(&str, "pub") { return token_create(.Pub, crange) }
|
|
else if compare_dyn_arr_string(&str, "let") { return token_create(.Let, crange) }
|
|
else if compare_dyn_arr_string(&str, "mut") { return token_create(.Mut, crange) }
|
|
else if compare_dyn_arr_string(&str, "as") { return token_create(.As, crange) }
|
|
else if compare_dyn_arr_string(&str, "in") { return token_create(.In, crange) }
|
|
else if compare_dyn_arr_string(&str, "else") { return token_create(.Else, crange) }
|
|
else if compare_dyn_arr_string(&str, "elif") { return token_create(.Elif, crange) }
|
|
else if compare_dyn_arr_string(&str, "for") { return token_create(.For, crange) }
|
|
else if compare_dyn_arr_string(&str, "break") { return token_create(.Break, crange) }
|
|
else if compare_dyn_arr_string(&str, "continue") { return token_create(.Continue, crange) }
|
|
else if compare_dyn_arr_string(&str, "switch") { return token_create(.Switch, crange) }
|
|
else if compare_dyn_arr_string(&str, "case") { return token_create(.Case, crange) }
|
|
else if compare_dyn_arr_string(&str, "ret") { return token_create(.Ret, crange) }
|
|
else if compare_dyn_arr_string(&str, "static") { return token_create(.Static, crange) }
|
|
else if compare_dyn_arr_string(&str, "defer") { return token_create(.Defer, crange) }
|
|
else if compare_dyn_arr_string(&str, "and") { return token_create(.And, crange) }
|
|
else if compare_dyn_arr_string(&str, "or") { return token_create(.Or, crange) }
|
|
|
|
return token_create_u8(.Identifier, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_number :: proc(lexer: ^Lexer) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
// FIXME: Implement binary
|
|
ReadMode :: enum {
|
|
Normal,
|
|
Hex,
|
|
}
|
|
read_mode := ReadMode.Normal
|
|
|
|
if lexer.char == '0' && lexer.next == 'x' {
|
|
read_mode = .Hex
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
whole_part : u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
whole_part = whole_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
whole_part = (whole_part << 4) | u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
whole_part = whole_part >> 4
|
|
}
|
|
|
|
if lexer.char == '.' {
|
|
lexer_advance(lexer)
|
|
|
|
// FIXME: Move this to another procedure because this is repeating lmfao
|
|
fractional_part : u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
fractional_part = fractional_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
append(&g_message_list, message_create(.Error, "Hexadecimal floating point numbers are not supported yet", crange))
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
fractional_part = fractional_part * 16 + u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
fractional_part = fractional_part / 16
|
|
}
|
|
|
|
fractional_part_clone := fractional_part
|
|
count := 0
|
|
for fractional_part_clone != 0 {
|
|
fractional_part_clone = fractional_part_clone / 10
|
|
count = count + 1
|
|
}
|
|
|
|
floating : f64 = 0
|
|
floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part)
|
|
|
|
return token_create_f64(.Float, floating, crange)
|
|
}
|
|
|
|
return token_create_u64(.Integer, whole_part, crange)
|
|
}
|
|
|