Compiler messages are now printed in color with their respective code, field access support has been added in the type checker. However, field access nodes that contain other field access nodes are not yet supported. Signed-off-by: Slendi <slendi@socopon.com>
469 lines
13 KiB
Odin
469 lines
13 KiB
Odin
package main
|
|
|
|
import "core:c/libc"
|
|
import "core:fmt"
|
|
import "core:math"
|
|
|
|
Lexer :: struct {
|
|
file_name: string,
|
|
data: ^[dynamic]u8,
|
|
read_position: u64,
|
|
position: TextPosition,
|
|
char, next: u8,
|
|
last_token_kind: TokenKind,
|
|
should_return_semicolon: bool,
|
|
}
|
|
|
|
lexer_create :: proc(data: ^[dynamic]u8, file_name: string) -> ^Lexer {
|
|
lexer := new(Lexer)
|
|
lexer^ = {
|
|
file_name = file_name,
|
|
data = data,
|
|
read_position = 0,
|
|
position = TextPosition{line = 1, column = 1},
|
|
}
|
|
lexer_advance(lexer)
|
|
lexer_advance(lexer)
|
|
return lexer
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_advance :: proc(lexer: ^Lexer) {
|
|
lexer.char = lexer.next
|
|
if lexer.read_position < u64(len(lexer.data)) {
|
|
lexer.next = lexer.data[lexer.read_position]
|
|
} else {
|
|
lexer.next = 0
|
|
}
|
|
lexer.read_position += 1
|
|
if lexer.char == '\r' {
|
|
lexer_advance(lexer)
|
|
}
|
|
if lexer.char == '\n' {
|
|
lexer.position.line += 1
|
|
lexer.position.column = 1
|
|
} else {
|
|
lexer.position.column += 1
|
|
}
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool {
|
|
return(
|
|
lexer.last_token_kind == .Semicolon ||
|
|
lexer.last_token_kind == .EOF ||
|
|
lexer.last_token_kind == .Invalid ||
|
|
lexer.last_token_kind == .OpenParen ||
|
|
lexer.last_token_kind == .OpenBrace ||
|
|
lexer.last_token_kind == .OpenBracket ||
|
|
lexer.last_token_kind == .Add ||
|
|
lexer.last_token_kind == .Subtract ||
|
|
lexer.last_token_kind == .Multiply ||
|
|
lexer.last_token_kind == .Divide ||
|
|
lexer.last_token_kind == .Modulo ||
|
|
lexer.last_token_kind == .Exponent ||
|
|
lexer.last_token_kind == .Assign ||
|
|
lexer.last_token_kind == .Not ||
|
|
lexer.last_token_kind == .BitwiseAnd ||
|
|
lexer.last_token_kind == .BitwiseOr ||
|
|
lexer.last_token_kind == .BitwiseXOR ||
|
|
lexer.last_token_kind == .BitwiseNot ||
|
|
lexer.last_token_kind == .LessThan ||
|
|
lexer.last_token_kind == .GreaterThan ||
|
|
lexer.last_token_kind == .BitwiseLeftShift ||
|
|
lexer.last_token_kind == .BitwiseRightShift ||
|
|
lexer.last_token_kind == .Comma \
|
|
)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_skip_whitespace :: proc(lexer: ^Lexer) {
|
|
for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' {
|
|
if lexer.char == '\n' {
|
|
if !lexer_should_not_emit_semicolon(lexer) {
|
|
lexer.should_return_semicolon = true
|
|
lexer_advance(lexer)
|
|
return
|
|
}
|
|
}
|
|
lexer_advance(lexer)
|
|
}
|
|
}
|
|
|
|
lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) {
|
|
lexer_skip_whitespace(lexer)
|
|
if lexer.char == '\\' {
|
|
lexer_advance(lexer)
|
|
for lexer.char != '\n' && lexer.char != 0 {
|
|
lexer_advance(lexer)
|
|
}
|
|
return lexer_next(lexer)
|
|
}
|
|
|
|
if lexer.should_return_semicolon {
|
|
lexer.should_return_semicolon = false
|
|
return token_create(
|
|
.Semicolon,
|
|
SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name},
|
|
)
|
|
}
|
|
|
|
defer lexer.last_token_kind = ret.kind
|
|
|
|
crange := SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name}
|
|
|
|
ret = token_create(.Invalid, crange)
|
|
should_advance := true
|
|
|
|
switch lexer.char {
|
|
case '+':
|
|
ret = token_create(.Add, crange)
|
|
if lexer.next == '+' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.Increment, crange)
|
|
}
|
|
case '-':
|
|
ret = token_create(.Subtract, crange)
|
|
if lexer.next == '-' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.Decrement, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.Arrow, crange)
|
|
}
|
|
case '*':
|
|
ret = token_create(.Multiply, crange)
|
|
case '/':
|
|
ret = token_create(.Divide, crange)
|
|
case '%':
|
|
ret = token_create(.Modulo, crange)
|
|
case '`':
|
|
ret = token_create(.Exponent, crange)
|
|
case '=':
|
|
ret = token_create(.Assign, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.Equals, crange)
|
|
}
|
|
case '!':
|
|
ret = token_create(.Not, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.NotEquals, crange)
|
|
}
|
|
case '<':
|
|
ret = token_create(.LessThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.LessThanOrEqual, crange)
|
|
} else if lexer.next == '<' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.BitwiseLeftShift, crange)
|
|
}
|
|
case '>':
|
|
ret = token_create(.GreaterThan, crange)
|
|
if lexer.next == '=' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.GreaterThanOrEqual, crange)
|
|
} else if lexer.next == '>' {
|
|
lexer_advance(lexer)
|
|
crange.range.end = lexer.position
|
|
ret = token_create(.BitwiseRightShift, crange)
|
|
}
|
|
case '&':
|
|
ret = token_create(.BitwiseAnd, crange)
|
|
case '|':
|
|
ret = token_create(.BitwiseOr, crange)
|
|
case '^':
|
|
ret = token_create(.BitwiseXOR, crange)
|
|
case '~':
|
|
ret = token_create(.BitwiseNot, crange)
|
|
case '(':
|
|
ret = token_create(.OpenParen, crange)
|
|
case ')':
|
|
ret = token_create(.CloseParen, crange)
|
|
case '[':
|
|
ret = token_create(.OpenBracket, crange)
|
|
case ']':
|
|
ret = token_create(.CloseBracket, crange)
|
|
case '{':
|
|
ret = token_create(.OpenBrace, crange)
|
|
case '}':
|
|
ret = token_create(.CloseBrace, crange)
|
|
|
|
case '?':
|
|
ret = token_create(.Question, crange)
|
|
case ':':
|
|
ret = token_create(.Colon, crange)
|
|
case '.':
|
|
ret = token_create(.Dot, crange)
|
|
case ',':
|
|
ret = token_create(.Comma, crange)
|
|
case ';':
|
|
ret = token_create(.Semicolon, crange)
|
|
|
|
case '"':
|
|
ret = lexer_read_string(lexer, .String, '\"')
|
|
case '\'':
|
|
ret = lexer_read_string(lexer, .Character, '\'')
|
|
case 'a' ..= 'z':
|
|
fallthrough
|
|
case 'A' ..= 'Z':
|
|
fallthrough
|
|
case '_':
|
|
ret = lexer_read_identifier(lexer)
|
|
should_advance = false
|
|
case '0' ..= '9':
|
|
ret = lexer_read_number(lexer)
|
|
should_advance = false
|
|
case 0:
|
|
ret = token_create(.EOF, crange)
|
|
should_advance = false
|
|
}
|
|
|
|
if should_advance {
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token {
|
|
crange := SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name}
|
|
|
|
lexer_advance(lexer)
|
|
|
|
str: [dynamic]u8
|
|
for lexer.char != outer {
|
|
if lexer.char == '\\' {
|
|
range := TextRange {
|
|
start = lexer.position,
|
|
}
|
|
lexer_advance(lexer)
|
|
switch lexer.char {
|
|
case 'n':
|
|
append(&str, '\n');break
|
|
case 't':
|
|
append(&str, '\t');break
|
|
case 'b':
|
|
append(&str, '\b');break
|
|
case 'r':
|
|
append(&str, '\r');break
|
|
case '\\':
|
|
append(&str, '\\');break
|
|
case:
|
|
range.end = lexer.position
|
|
append(
|
|
&g_message_list,
|
|
message_create(
|
|
.Warning,
|
|
fmt.aprintf("Invalid string/character escape: %c at %s", lexer.char, "TODO LOCATION"),
|
|
SourceLocation{range, lexer.file_name},
|
|
),
|
|
)
|
|
}
|
|
lexer_advance(lexer)
|
|
continue
|
|
}
|
|
append(&str, lexer.char)
|
|
|
|
lexer_advance(lexer)
|
|
}
|
|
crange.range.end = lexer.position
|
|
|
|
return token_create_u8(kind, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_identifier :: proc(lexer: ^Lexer) -> Token {
|
|
crange := SourceLocation{TextRange{start = lexer.position}, lexer.file_name}
|
|
|
|
str: [dynamic]u8
|
|
for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' {
|
|
append(&str, lexer.char)
|
|
crange.range.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
if compare_dyn_arr_string(&str, "fn") {
|
|
return token_create(.Function, crange)
|
|
} else if compare_dyn_arr_string(&str, "struct") {
|
|
return token_create(.Struct, crange)
|
|
} else if compare_dyn_arr_string(&str, "enum") {
|
|
return token_create(.Enum, crange)
|
|
} else if compare_dyn_arr_string(&str, "union") {
|
|
return token_create(.Union, crange)
|
|
} else if compare_dyn_arr_string(&str, "type") {
|
|
return token_create(.Type, crange)
|
|
} else if compare_dyn_arr_string(&str, "use") {
|
|
return token_create(.Use, crange)
|
|
} else if compare_dyn_arr_string(&str, "pub") {
|
|
return token_create(.Pub, crange)
|
|
} else if compare_dyn_arr_string(&str, "let") {
|
|
return token_create(.Let, crange)
|
|
} else if compare_dyn_arr_string(&str, "mut") {
|
|
return token_create(.Mut, crange)
|
|
} else if compare_dyn_arr_string(&str, "as") {
|
|
return token_create(.As, crange)
|
|
} else if compare_dyn_arr_string(&str, "bitwise_as") {
|
|
return token_create(.BitwiseAs, crange)
|
|
} else if compare_dyn_arr_string(&str, "in") {
|
|
return token_create(.In, crange)
|
|
} else if compare_dyn_arr_string(&str, "if") {
|
|
return token_create(.If, crange)
|
|
} else if compare_dyn_arr_string(&str, "else") {
|
|
return token_create(.Else, crange)
|
|
} else if compare_dyn_arr_string(&str, "elif") {
|
|
return token_create(.Elif, crange)
|
|
} else if compare_dyn_arr_string(&str, "for") {
|
|
return token_create(.For, crange)
|
|
} else if compare_dyn_arr_string(&str, "break") {
|
|
return token_create(.Break, crange)
|
|
} else if compare_dyn_arr_string(&str, "continue") {
|
|
return token_create(.Continue, crange)
|
|
} else if compare_dyn_arr_string(&str, "switch") {
|
|
return token_create(.Switch, crange)
|
|
} else if compare_dyn_arr_string(&str, "case") {
|
|
return token_create(.Case, crange)
|
|
} else if compare_dyn_arr_string(&str, "ret") {
|
|
return token_create(.Ret, crange)
|
|
} else if compare_dyn_arr_string(&str, "static") {
|
|
return token_create(.Static, crange)
|
|
} else if compare_dyn_arr_string(&str, "defer") {
|
|
return token_create(.Defer, crange)
|
|
} else if compare_dyn_arr_string(&str, "let") {
|
|
return token_create(.Let, crange)
|
|
} else if compare_dyn_arr_string(&str, "and") {
|
|
return token_create(.And, crange)
|
|
} else if compare_dyn_arr_string(&str, "or") {
|
|
return token_create(.Or, crange)
|
|
}
|
|
|
|
return token_create_u8(.Identifier, str, crange)
|
|
}
|
|
|
|
@(private = "file")
|
|
lexer_read_number :: proc(lexer: ^Lexer) -> Token {
|
|
crange := TextRange {
|
|
start = lexer.position,
|
|
end = lexer.position,
|
|
}
|
|
|
|
ReadMode :: enum {
|
|
Normal,
|
|
Hex,
|
|
Binary,
|
|
}
|
|
read_mode := ReadMode.Normal
|
|
|
|
if lexer.char == '0' && lexer.next == 'x' {
|
|
read_mode = .Hex
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
} else if lexer.char == '0' && lexer.next == 'b' {
|
|
read_mode = .Binary
|
|
lexer_advance(lexer)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
|
|
whole_part: u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
whole_part = whole_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
whole_part = (whole_part << 4) | u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
whole_part = whole_part >> 4
|
|
} else if read_mode == .Binary {
|
|
for lexer.char == '0' || lexer.char == '1' {
|
|
whole_part = (whole_part << 1) + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
}
|
|
|
|
if lexer.char == '.' {
|
|
lexer_advance(lexer)
|
|
|
|
// FIXME: Move this to another procedure because this is repeating lmfao
|
|
fractional_part: u64 = 0
|
|
if read_mode == .Normal {
|
|
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
|
|
fractional_part = fractional_part * 10 + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
} else if read_mode == .Hex {
|
|
append(
|
|
&g_message_list,
|
|
message_create(
|
|
.Error,
|
|
"Hexadecimal floating point numbers are not supported yet",
|
|
SourceLocation{crange, lexer.file_name},
|
|
),
|
|
)
|
|
lowered := libc.tolower(i32(lexer.char))
|
|
for libc.isxdigit(lowered) != 0 && lexer.char > 0 {
|
|
digit := lowered - '0'
|
|
if libc.isdigit(lowered) == 0 {
|
|
digit = lowered - 'a' + 10
|
|
}
|
|
fractional_part = fractional_part * 16 + u64(digit)
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
fractional_part = fractional_part / 16
|
|
} else if read_mode == .Binary {
|
|
append(
|
|
&g_message_list,
|
|
message_create(
|
|
.Error,
|
|
"Binary floating point numbers are not supported yet",
|
|
SourceLocation{crange, lexer.file_name},
|
|
),
|
|
)
|
|
for lexer.char == '0' || lexer.char == '1' {
|
|
fractional_part = (fractional_part << 1) + u64(lexer.char) - '0'
|
|
crange.end = lexer.position
|
|
lexer_advance(lexer)
|
|
}
|
|
}
|
|
|
|
fractional_part_clone := fractional_part
|
|
count := 0
|
|
for fractional_part_clone != 0 {
|
|
fractional_part_clone = fractional_part_clone / 10
|
|
count = count + 1
|
|
}
|
|
|
|
floating: f64 = 0
|
|
floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part)
|
|
|
|
return token_create_f64(.Float, floating, SourceLocation{crange, lexer.file_name})
|
|
}
|
|
|
|
return token_create_u64(.Integer, whole_part, SourceLocation{crange, lexer.file_name})
|
|
}
|