speedcat/src/lexer.odin
Slendi a966ff45a3 Improve messages, implement* field access
Compiler messages are now printed in color with their respective code,
field access support has been added in the type checker. However, field
access nodes that contain other field access nodes are not yet
supported.

Signed-off-by: Slendi <slendi@socopon.com>
2024-04-08 11:42:39 +03:00

469 lines
13 KiB
Odin

package main
import "core:c/libc"
import "core:fmt"
import "core:math"
Lexer :: struct {
file_name: string,
data: ^[dynamic]u8,
read_position: u64,
position: TextPosition,
char, next: u8,
last_token_kind: TokenKind,
should_return_semicolon: bool,
}
lexer_create :: proc(data: ^[dynamic]u8, file_name: string) -> ^Lexer {
lexer := new(Lexer)
lexer^ = {
file_name = file_name,
data = data,
read_position = 0,
position = TextPosition{line = 1, column = 1},
}
lexer_advance(lexer)
lexer_advance(lexer)
return lexer
}
@(private = "file")
lexer_advance :: proc(lexer: ^Lexer) {
lexer.char = lexer.next
if lexer.read_position < u64(len(lexer.data)) {
lexer.next = lexer.data[lexer.read_position]
} else {
lexer.next = 0
}
lexer.read_position += 1
if lexer.char == '\r' {
lexer_advance(lexer)
}
if lexer.char == '\n' {
lexer.position.line += 1
lexer.position.column = 1
} else {
lexer.position.column += 1
}
}
@(private = "file")
lexer_should_not_emit_semicolon :: proc(lexer: ^Lexer) -> bool {
return(
lexer.last_token_kind == .Semicolon ||
lexer.last_token_kind == .EOF ||
lexer.last_token_kind == .Invalid ||
lexer.last_token_kind == .OpenParen ||
lexer.last_token_kind == .OpenBrace ||
lexer.last_token_kind == .OpenBracket ||
lexer.last_token_kind == .Add ||
lexer.last_token_kind == .Subtract ||
lexer.last_token_kind == .Multiply ||
lexer.last_token_kind == .Divide ||
lexer.last_token_kind == .Modulo ||
lexer.last_token_kind == .Exponent ||
lexer.last_token_kind == .Assign ||
lexer.last_token_kind == .Not ||
lexer.last_token_kind == .BitwiseAnd ||
lexer.last_token_kind == .BitwiseOr ||
lexer.last_token_kind == .BitwiseXOR ||
lexer.last_token_kind == .BitwiseNot ||
lexer.last_token_kind == .LessThan ||
lexer.last_token_kind == .GreaterThan ||
lexer.last_token_kind == .BitwiseLeftShift ||
lexer.last_token_kind == .BitwiseRightShift ||
lexer.last_token_kind == .Comma \
)
}
@(private = "file")
lexer_skip_whitespace :: proc(lexer: ^Lexer) {
for lexer.char == ' ' || lexer.char == '\t' || lexer.char == '\r' || lexer.char == '\n' {
if lexer.char == '\n' {
if !lexer_should_not_emit_semicolon(lexer) {
lexer.should_return_semicolon = true
lexer_advance(lexer)
return
}
}
lexer_advance(lexer)
}
}
lexer_next :: proc(lexer: ^Lexer) -> (ret: Token) {
lexer_skip_whitespace(lexer)
if lexer.char == '\\' {
lexer_advance(lexer)
for lexer.char != '\n' && lexer.char != 0 {
lexer_advance(lexer)
}
return lexer_next(lexer)
}
if lexer.should_return_semicolon {
lexer.should_return_semicolon = false
return token_create(
.Semicolon,
SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name},
)
}
defer lexer.last_token_kind = ret.kind
crange := SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name}
ret = token_create(.Invalid, crange)
should_advance := true
switch lexer.char {
case '+':
ret = token_create(.Add, crange)
if lexer.next == '+' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.Increment, crange)
}
case '-':
ret = token_create(.Subtract, crange)
if lexer.next == '-' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.Decrement, crange)
} else if lexer.next == '>' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.Arrow, crange)
}
case '*':
ret = token_create(.Multiply, crange)
case '/':
ret = token_create(.Divide, crange)
case '%':
ret = token_create(.Modulo, crange)
case '`':
ret = token_create(.Exponent, crange)
case '=':
ret = token_create(.Assign, crange)
if lexer.next == '=' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.Equals, crange)
}
case '!':
ret = token_create(.Not, crange)
if lexer.next == '=' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.NotEquals, crange)
}
case '<':
ret = token_create(.LessThan, crange)
if lexer.next == '=' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.LessThanOrEqual, crange)
} else if lexer.next == '<' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.BitwiseLeftShift, crange)
}
case '>':
ret = token_create(.GreaterThan, crange)
if lexer.next == '=' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.GreaterThanOrEqual, crange)
} else if lexer.next == '>' {
lexer_advance(lexer)
crange.range.end = lexer.position
ret = token_create(.BitwiseRightShift, crange)
}
case '&':
ret = token_create(.BitwiseAnd, crange)
case '|':
ret = token_create(.BitwiseOr, crange)
case '^':
ret = token_create(.BitwiseXOR, crange)
case '~':
ret = token_create(.BitwiseNot, crange)
case '(':
ret = token_create(.OpenParen, crange)
case ')':
ret = token_create(.CloseParen, crange)
case '[':
ret = token_create(.OpenBracket, crange)
case ']':
ret = token_create(.CloseBracket, crange)
case '{':
ret = token_create(.OpenBrace, crange)
case '}':
ret = token_create(.CloseBrace, crange)
case '?':
ret = token_create(.Question, crange)
case ':':
ret = token_create(.Colon, crange)
case '.':
ret = token_create(.Dot, crange)
case ',':
ret = token_create(.Comma, crange)
case ';':
ret = token_create(.Semicolon, crange)
case '"':
ret = lexer_read_string(lexer, .String, '\"')
case '\'':
ret = lexer_read_string(lexer, .Character, '\'')
case 'a' ..= 'z':
fallthrough
case 'A' ..= 'Z':
fallthrough
case '_':
ret = lexer_read_identifier(lexer)
should_advance = false
case '0' ..= '9':
ret = lexer_read_number(lexer)
should_advance = false
case 0:
ret = token_create(.EOF, crange)
should_advance = false
}
if should_advance {
lexer_advance(lexer)
}
return
}
@(private = "file")
lexer_read_string :: proc(lexer: ^Lexer, kind: TokenKind, outer: u8) -> Token {
crange := SourceLocation{TextRange{start = lexer.position, end = lexer.position}, lexer.file_name}
lexer_advance(lexer)
str: [dynamic]u8
for lexer.char != outer {
if lexer.char == '\\' {
range := TextRange {
start = lexer.position,
}
lexer_advance(lexer)
switch lexer.char {
case 'n':
append(&str, '\n');break
case 't':
append(&str, '\t');break
case 'b':
append(&str, '\b');break
case 'r':
append(&str, '\r');break
case '\\':
append(&str, '\\');break
case:
range.end = lexer.position
append(
&g_message_list,
message_create(
.Warning,
fmt.aprintf("Invalid string/character escape: %c at %s", lexer.char, "TODO LOCATION"),
SourceLocation{range, lexer.file_name},
),
)
}
lexer_advance(lexer)
continue
}
append(&str, lexer.char)
lexer_advance(lexer)
}
crange.range.end = lexer.position
return token_create_u8(kind, str, crange)
}
@(private = "file")
lexer_read_identifier :: proc(lexer: ^Lexer) -> Token {
crange := SourceLocation{TextRange{start = lexer.position}, lexer.file_name}
str: [dynamic]u8
for libc.isalnum(i32(lexer.char)) != 0 || lexer.char == '_' {
append(&str, lexer.char)
crange.range.end = lexer.position
lexer_advance(lexer)
}
if compare_dyn_arr_string(&str, "fn") {
return token_create(.Function, crange)
} else if compare_dyn_arr_string(&str, "struct") {
return token_create(.Struct, crange)
} else if compare_dyn_arr_string(&str, "enum") {
return token_create(.Enum, crange)
} else if compare_dyn_arr_string(&str, "union") {
return token_create(.Union, crange)
} else if compare_dyn_arr_string(&str, "type") {
return token_create(.Type, crange)
} else if compare_dyn_arr_string(&str, "use") {
return token_create(.Use, crange)
} else if compare_dyn_arr_string(&str, "pub") {
return token_create(.Pub, crange)
} else if compare_dyn_arr_string(&str, "let") {
return token_create(.Let, crange)
} else if compare_dyn_arr_string(&str, "mut") {
return token_create(.Mut, crange)
} else if compare_dyn_arr_string(&str, "as") {
return token_create(.As, crange)
} else if compare_dyn_arr_string(&str, "bitwise_as") {
return token_create(.BitwiseAs, crange)
} else if compare_dyn_arr_string(&str, "in") {
return token_create(.In, crange)
} else if compare_dyn_arr_string(&str, "if") {
return token_create(.If, crange)
} else if compare_dyn_arr_string(&str, "else") {
return token_create(.Else, crange)
} else if compare_dyn_arr_string(&str, "elif") {
return token_create(.Elif, crange)
} else if compare_dyn_arr_string(&str, "for") {
return token_create(.For, crange)
} else if compare_dyn_arr_string(&str, "break") {
return token_create(.Break, crange)
} else if compare_dyn_arr_string(&str, "continue") {
return token_create(.Continue, crange)
} else if compare_dyn_arr_string(&str, "switch") {
return token_create(.Switch, crange)
} else if compare_dyn_arr_string(&str, "case") {
return token_create(.Case, crange)
} else if compare_dyn_arr_string(&str, "ret") {
return token_create(.Ret, crange)
} else if compare_dyn_arr_string(&str, "static") {
return token_create(.Static, crange)
} else if compare_dyn_arr_string(&str, "defer") {
return token_create(.Defer, crange)
} else if compare_dyn_arr_string(&str, "let") {
return token_create(.Let, crange)
} else if compare_dyn_arr_string(&str, "and") {
return token_create(.And, crange)
} else if compare_dyn_arr_string(&str, "or") {
return token_create(.Or, crange)
}
return token_create_u8(.Identifier, str, crange)
}
@(private = "file")
lexer_read_number :: proc(lexer: ^Lexer) -> Token {
crange := TextRange {
start = lexer.position,
end = lexer.position,
}
ReadMode :: enum {
Normal,
Hex,
Binary,
}
read_mode := ReadMode.Normal
if lexer.char == '0' && lexer.next == 'x' {
read_mode = .Hex
lexer_advance(lexer)
crange.end = lexer.position
lexer_advance(lexer)
} else if lexer.char == '0' && lexer.next == 'b' {
read_mode = .Binary
lexer_advance(lexer)
crange.end = lexer.position
lexer_advance(lexer)
}
whole_part: u64 = 0
if read_mode == .Normal {
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
whole_part = whole_part * 10 + u64(lexer.char) - '0'
crange.end = lexer.position
lexer_advance(lexer)
}
} else if read_mode == .Hex {
lowered := libc.tolower(i32(lexer.char))
for libc.isxdigit(lowered) != 0 && lexer.char > 0 && lexer.char != '.' {
digit := lowered - '0'
if libc.isdigit(lowered) == 0 {
digit = lowered - 'a' + 10
}
whole_part = (whole_part << 4) | u64(digit)
crange.end = lexer.position
lexer_advance(lexer)
}
whole_part = whole_part >> 4
} else if read_mode == .Binary {
for lexer.char == '0' || lexer.char == '1' {
whole_part = (whole_part << 1) + u64(lexer.char) - '0'
crange.end = lexer.position
lexer_advance(lexer)
}
}
if lexer.char == '.' {
lexer_advance(lexer)
// FIXME: Move this to another procedure because this is repeating lmfao
fractional_part: u64 = 0
if read_mode == .Normal {
for libc.isdigit(i32(lexer.char)) != 0 && lexer.char > 0 {
fractional_part = fractional_part * 10 + u64(lexer.char) - '0'
crange.end = lexer.position
lexer_advance(lexer)
}
} else if read_mode == .Hex {
append(
&g_message_list,
message_create(
.Error,
"Hexadecimal floating point numbers are not supported yet",
SourceLocation{crange, lexer.file_name},
),
)
lowered := libc.tolower(i32(lexer.char))
for libc.isxdigit(lowered) != 0 && lexer.char > 0 {
digit := lowered - '0'
if libc.isdigit(lowered) == 0 {
digit = lowered - 'a' + 10
}
fractional_part = fractional_part * 16 + u64(digit)
crange.end = lexer.position
lexer_advance(lexer)
}
fractional_part = fractional_part / 16
} else if read_mode == .Binary {
append(
&g_message_list,
message_create(
.Error,
"Binary floating point numbers are not supported yet",
SourceLocation{crange, lexer.file_name},
),
)
for lexer.char == '0' || lexer.char == '1' {
fractional_part = (fractional_part << 1) + u64(lexer.char) - '0'
crange.end = lexer.position
lexer_advance(lexer)
}
}
fractional_part_clone := fractional_part
count := 0
for fractional_part_clone != 0 {
fractional_part_clone = fractional_part_clone / 10
count = count + 1
}
floating: f64 = 0
floating = f64(fractional_part) / math.pow_f64(10, f64(count)) + f64(whole_part)
return token_create_f64(.Float, floating, SourceLocation{crange, lexer.file_name})
}
return token_create_u64(.Integer, whole_part, SourceLocation{crange, lexer.file_name})
}