[LEX] base code is done

This commit is contained in:
Tiago Sousa 2023-05-03 10:39:50 +01:00
parent 842c344ef1
commit 734906e46c
3 changed files with 96 additions and 4 deletions

5
src/example.toml Normal file
View file

@ -0,0 +1,5 @@
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
date = 2010-04-23
time = 21:30:00

View file

@ -2,27 +2,106 @@ import ply.lex as lex
tokens = [ tokens = [
"ID", "ID",
"STR" "STR",
"DATE", "DATE",
"TIME", "TIME",
"DATETIME", "DATETIME",
"INT", "INT",
"FLOAT", # need to implement exponents check https://toml.io/en/
"HEX", "HEX",
"BIN", "BIN",
"OCT", "OCT",
"FLOAT", # need to implement exponents check https://toml.io/en/
"INF", "INF",
"NAN", "NAN",
"COMMENT", "COMMENT",
] ]
# STR needs to be the first one to catch
def t_STR(t):
r"\"[^\"]+\" "
return t
# needs to check if datetime is valid
def t_DATETIME(t):
r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[+-]\d{2}:\d{2})"
return t
# needs to check if date is valid
def t_DATE(t):
r"\d{4}-\d{2}-\d{2}"
return t
# needs to check if time is valid
def t_TIME(t):
r"\d{2}:\d{2}:\d{2}(\.\d{1,6})?"
return t
# needs number grouping (example : flt8 = 224_617.445_991_228)
def t_FLOAT(t):
r"[+-]?\d+(\.\d+)?([eE][-+]?\d+)?"
return t
# needs number grouping (example : int6 = 5_349_221)
def t_INT(t):
r"[-+]?\d+"
return t
# needs number grouping (example : hex3 = 0xdead_beef)
def t_HEX(t):
r"0x[0-9a-fA-F]+"
return t
def t_BIN(t):
r"0b[01]+"
return t
def t_OCT(t):
r"0o[0-7]+"
return t
def t_INF(t):
r"[-+]?inf"
return t
def t_NAN(t):
r"[+-]?nan"
# booleans are always lowercase
def t_BOOL(t):
r"(true|false)"
return t
# ID needs to be the last so it doesnt catch everything (literally)
def t_ID(t):
r"[\w.]+"
return t
def t_COMMENT(t): def t_COMMENT(t):
r"#.*" r"\#.*"
pass pass
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
t_ignore = "\n\t " t_ignore = "\n\t "
literals = "[]{},=" literals = "[]{},="
lexer = lex.lex() lexer = lex.lex()

8
src/tokenizer.py Normal file
View file

@ -0,0 +1,8 @@
from lexer import lexer
f = open("example.toml","r")
lexer.input(f.read())
for tok in lexer:
print(tok)