456 lines
12 KiB
Odin
456 lines
12 KiB
Odin
package main
|
|
|
|
import "core:fmt"
|
|
import "core:c/libc"
|
|
import "core:math"
|
|
|
|
Lexer :: struct {
|
|
data: ^[dynamic]u8,
|
|
read_position: u64,
|
|
position: TextPosition,
|
|
char, next: u8,
|
|
last_token_kind: TokenKind,
|
|
should_return_semicolon: bool,
|
|
}
|
|
|
|
lexer_create :: proc(data: ^[dynamic]u8) -> ^Lexer {
|
|
lexer := new(Lexer)
|
|
lexer^ = {
|
|
data = data,
|
|
read_position = 0,
|
|
position = TextPosition{line = 1, column = 1},
|
|
}
|
|
lexer_advance(lexer)
|
|
lexer_advance(lexer)
|
|
return lexer
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_advance :: proc(lexer: ^Lexer) {
|
|
lexer.char = lexer.next
|
|
if lexer.read_position < u64(len(lexer.data)) {
|
|
lexer.next = lexer.data[lexer.read_position]
|
|
} else {
|
|
lexer.next = 0
|
|
}
|
|
lexer.read_position += 1
|
|
if lexer.char == '\r' {
|
|
lexer_advance(lexer)
|
|
}
|
|
if lexer.char == '\n' {
|
|
lexer.position.line += 1
|
|
lexer.position.column = 1
|
|
} else {
|
|
lexer.position.column += 1
|
|
}
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool {
|
|
return(
|
|
lexer.last_token_kind == .CloseBrace ||
|
|
lexer.last_token_kind == .Semicolon ||
|
|
lexer.last_token_kind == .EOF ||
|
|
lexer.last_token_kind == .Invalid ||
|
|
lexer.last_token_kind == .OpenParen ||
|
|
lexer.last_token_kind == .OpenBrace ||
|
|
lexer.last_token_kind == .OpenBracket ||
|
|
lexer.last_token_kind == .CloseParen ||
|
|
lexer.last_token_kind == .CloseBrace ||
|
|
lexer.last_token_kind == .CloseBracket ||
|
|
lexer.last_token_kind == .Add ||
|
|
lexer.last_token_kind == .Subtract ||
|
|
lexer.last_token_kind == .Multiply ||
|
|
lexer.last_token_kind == .Divide ||
|
|
lexer.last_token_kind == .Modulo ||
|
|
lexer.last_token_kind == .Exponent ||
|
|
lexer.last_token_kind == .Assign ||
|
|
lexer.last_token_kind == .Not ||
|
|
lexer.last_token_kind == .BitwiseAnd ||
|
|
lexer.last_token_kind == .BitwiseOr ||
|
|
lexer.last_token_kind == .BitwiseXOR ||
|
|
lexer.last_token_kind == .BitwiseNot ||
|
|
lexer.last_token_kind == .LessThan ||
|
|
lexer.last_token_kind == .GreaterThan ||
|
|
lexer.last_token_kind == .BitwiseLeftShift ||
|
|
lexer.last_token_kind == .BitwiseRightShift ||
|
|
lexer.last_token_kind == .Comma \
|
|
)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_skip_whitespace :: proc(lexer: ^Lexer) {
|
|
// FIXME: Do the funny golang thing where newlines are semicolons based on some rules
|
|
for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' {
|
|
if lexer.char == '\n' {
|
|
if !lexer_should_not_emit_semicolon(lexer) {
|
|
lexer.should_return_semicolon = true
|
|
lexer_advance(lexer)
|
|
return
|
|
}
|
|
}
|
|
lexer_advance(lexer)
|
|
}
|
|
}
|
|
|
|
lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) {
|
|
lexer_skip_whitespace(lexer)
|
|
if lexer.char == '\\' {
|
|
lexer_advance(lexer)
|
|
for lexer.char != '\n' && lexer.char != 0 {
|
|
lexer_advance(lexer)
|
|
}
|
|
return lexer_next(lexer)
|
|
}
|
|
|
|
if lexer.should_return_semicolon {
|
|
lexer.should_return_semicolon = false
|
|
return token_create(.Semicolon, TextRange{start = lexer.position, end = lexer.position})
|
|
}
|
|
|
|
defer lexer.last_token_kind = ret.kind
|
|
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
ret = token_create(.Invalid, crange)
|
|
should_advance := true
|
|
|
|
switch lexer.char {
|
|
case '+':
|
|
ret = token_create(.Add, crange)
|
|
if lexer.next == '+' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Increment, crange)
|
|
}
|
|
case '-':
|
|
ret = token_create(.Subtract, crange)
|
|
if lexer.next == '-' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Decrement, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Arrow, crange)
|
|
}
|
|
case '*':
|
|
ret = token_create(.Multiply, crange)
|
|
case '/':
|
|
ret = token_create(.Divide, crange)
|
|
case '%':
|
|
ret = token_create(.Modulo, crange)
|
|
case '`':
|
|
ret = token_create(.Exponent, crange)
|
|
case '=':
|
|
ret = token_create(.Assign, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.Equals, crange)
|
|
}
|
|
case '!':
|
|
ret = token_create(.Not, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.NotEquals, crange)
|
|
}
|
|
case '<':
|
|
ret = token_create(.LessThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.LessThanOrEqual, crange)
|
|
} else if lexer.next == '<' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.BitwiseLeftShift, crange)
|
|
}
|
|
case '>':
|
|
ret = token_create(.GreaterThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.GreaterThanOrEqual, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
ret = token_create(.BitwiseRightShift, crange)
|
|
}
|
|
case '&':
|
|
ret = token_create(.BitwiseAnd, crange)
|
|
case '|':
|
|
ret = token_create(.BitwiseOr, crange)
|
|
case '^':
|
|
ret = token_create(.BitwiseXOR, crange)
|
|
case '~':
|
|
ret = token_create(.BitwiseNot, crange)
|
|
case '(':
|
|
ret = token_create(.OpenParen, crange)
|
|
case ')':
|
|
ret = token_create(.CloseParen, crange)
|
|
case '[':
|
|
ret = token_create(.OpenBracket, crange)
|
|
case ']':
|
|
ret = token_create(.CloseBracket, crange)
|
|
case '{':
|
|
ret = token_create(.OpenBrace, crange)
|
|
case '}':
|
|
ret = token_create(.CloseBrace, crange)
|
|
|
|
case '?':
|
|
ret = token_create(.Question, crange)
|
|
case ':':
|
|
ret = token_create(.Colon, crange)
|
|
case '.':
|
|
ret = token_create(.Dot, crange)
|
|
case ',':
|
|
ret = token_create(.Comma, crange)
|
|
case ';':
|
|
ret = token_create(.Semicolon, crange)
|
|
|
|
case '"':
|
|
ret = lexer_read_string(lexer, .String, '\"')
|
|
case '\'':
|
|
ret = lexer_read_string(lexer, .Character, '\'')
|
|
case 'a' ..= 'z':
|
|
fallthrough
|
|
case 'A' ..= 'Z':
|
|
fallthrough
|
|
case '_':
|
|
ret = lexer_read_identifier(lexer)
|
|
should_advance = false
|
|
case '0' ..= '9':
|
|
ret = lexer_read_number(lexer)
|
|
should_advance = false
|
|
case 0:
|
|
ret = token_create(.EOF, crange)
|
|
should_advance = false
|
|
}
|
|
|
|
if should_advance {
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
lexer_advance(lexer)
|
|
|
|
str: [dynamic]u8
|
|
for lexer.char != outer {
|
|
if lexer.char == '\\' {
|
|
range := TextRange {
|
|
start = lexer.position,
|
|
}
|
|
lexer_advance(lexer)
|
|
switch lexer.char {
|
|
case 'n':
|
|
append(&str, '\n');break
|
|
case 't':
|
|
append(&str, '\t');break
|
|
case 'b':
|
|
append(&str, '\b');break
|
|
case 'r':
|
|
append(&str, '\r');break
|
|
case '\\':
|
|
append(&str, '\\');break
|
|
case:
|
|
range.end = lexer.position
|
|
append(
|
|
&g_message_list,
|
|
message_create(
|
|
.Warning,
|
|
fmt.aprintf(
|
|
"Invalid string/character escape: %c at %s",
|
|
lexer.char,
|
|
"TODO LOCATION",
|
|
),
|
|
range,
|
|
),
|
|
)
|
|
}
|
|
lexer_advance(lexer)
|
|
continue
|
|
}
|
|
append(&str, lexer.char)
|
|
|
|
lexer_advance(lexer)
|
|
}
|
|
crange.end = lexer.position
|
|
|
|
return token_create_u8(kind, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_identifier :: proc(lexer: ^Lexer) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
}
|
|
|
|
str: [dynamic]u8
|
|
for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' {
|
|
append(&str, lexer.char)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
if compare_dyn_arr_string(&str, "fn") {
|
|
return token_create(.Function, crange)
|
|
} else if compare_dyn_arr_string(&str, "struct") {
|
|
return token_create(.Struct, crange)
|
|
} else if compare_dyn_arr_string(&str, "enum") {
|
|
return token_create(.Enum, crange)
|
|
} else if compare_dyn_arr_string(&str, "union") {
|
|
return token_create(.Union, crange)
|
|
} else if compare_dyn_arr_string(&str, "type") {
|
|
return token_create(.Type, crange)
|
|
} else if compare_dyn_arr_string(&str, "use") {
|
|
return token_create(.Use, crange)
|
|
} else if compare_dyn_arr_string(&str, "pub") {
|
|
return token_create(.Pub, crange)
|
|
} else if compare_dyn_arr_string(&str, "let") {
|
|
return token_create(.Let, crange)
|
|
} else if compare_dyn_arr_string(&str, "mut") {
|
|
return token_create(.Mut, crange)
|
|
} else if compare_dyn_arr_string(&str, "as") {
|
|
return token_create(.As, crange)
|
|
} else if compare_dyn_arr_string(&str, "bitwise_as") {
|
|
return token_create(.BitwiseAs, crange)
|
|
} else if compare_dyn_arr_string(&str, "in") {
|
|
return token_create(.In, crange)
|
|
} else if compare_dyn_arr_string(&str, "if") {
|
|
return token_create(.If, crange)
|
|
} else if compare_dyn_arr_string(&str, "else") {
|
|
return token_create(.Else, crange)
|
|
} else if compare_dyn_arr_string(&str, "elif") {
|
|
return token_create(.Elif, crange)
|
|
} else if compare_dyn_arr_string(&str, "for") {
|
|
return token_create(.For, crange)
|
|
} else if compare_dyn_arr_string(&str, "break") {
|
|
return token_create(.Break, crange)
|
|
} else if compare_dyn_arr_string(&str, "continue") {
|
|
return token_create(.Continue, crange)
|
|
} else if compare_dyn_arr_string(&str, "switch") {
|
|
return token_create(.Switch, crange)
|
|
} else if compare_dyn_arr_string(&str, "case") {
|
|
return token_create(.Case, crange)
|
|
} else if compare_dyn_arr_string(&str, "ret") {
|
|
return token_create(.Ret, crange)
|
|
} else if compare_dyn_arr_string(&str, "static") {
|
|
return token_create(.Static, crange)
|
|
} else if compare_dyn_arr_string(&str, "defer") {
|
|
return token_create(.Defer, crange)
|
|
} else if compare_dyn_arr_string(&str, "let") {
|
|
return token_create(.Let, crange)
|
|
} else if compare_dyn_arr_string(&str, "and") {
|
|
return token_create(.And, crange)
|
|
} else if compare_dyn_arr_string(&str, "or") {
|
|
return token_create(.Or, crange)
|
|
}
|
|
|
|
return token_create_u8(.Identifier, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_number :: proc(lexer: ^Lexer) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
// FIXME: Implement binary
|
|
ReadMode :: enum {
|
|
Normal,
|
|
Hex,
|
|
}
|
|
read_mode := ReadMode.Normal
|
|
|
|
if lexer.char == '0' && lexer.next == 'x' {
|
|
read_mode = .Hex
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
whole_part: u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
whole_part = whole_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
whole_part = (whole_part << 4) | u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
whole_part = whole_part >> 4
|
|
}
|
|
|
|
if lexer.char == '.' {
|
|
lexer_advance(lexer)
|
|
|
|
// FIXME: Move this to another procedure because this is repeating lmfao
|
|
fractional_part: u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
fractional_part = fractional_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
append(
|
|
&g_message_list,
|
|
message_create(
|
|
.Error,
|
|
"Hexadecimal floating point numbers are not supported yet",
|
|
crange,
|
|
),
|
|
)
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
fractional_part = fractional_part * 16 + u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
fractional_part = fractional_part / 16
|
|
}
|
|
|
|
fractional_part_clone := fractional_part
|
|
count := 0
|
|
for fractional_part_clone != 0 {
|
|
fractional_part_clone = fractional_part_clone / 10
|
|
count = count + 1
|
|
}
|
|
|
|
floating: f64 = 0
|
|
floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part)
|
|
|
|
return token_create_f64(.Float, floating, crange)
|
|
}
|
|
|
|
return token_create_u64(.Integer, whole_part, crange)
|
|
}
|