[LEX] base code is done
This commit is contained in:
parent
842c344ef1
commit
734906e46c
3 changed files with 96 additions and 4 deletions
5
src/example.toml
Normal file
5
src/example.toml
Normal file
|
@ -0,0 +1,5 @@
|
|||
title = "TOML Example"
|
||||
[owner]
|
||||
name = "Tom Preston-Werner"
|
||||
date = 2010-04-23
|
||||
time = 21:30:00
|
87
src/lexer.py
87
src/lexer.py
|
@ -2,27 +2,106 @@ import ply.lex as lex
|
|||
|
||||
tokens = [
|
||||
"ID",
|
||||
"STR"
|
||||
"STR",
|
||||
"DATE",
|
||||
"TIME",
|
||||
"DATETIME",
|
||||
"INT",
|
||||
"FLOAT", # need to implement exponents check https://toml.io/en/
|
||||
"HEX",
|
||||
"BIN",
|
||||
"OCT",
|
||||
"FLOAT", # need to implement exponents check https://toml.io/en/
|
||||
"INF",
|
||||
"NAN",
|
||||
"COMMENT",
|
||||
]
|
||||
|
||||
|
||||
# STR needs to be the first one to catch
|
||||
def t_STR(t):
|
||||
r"\"[^\"]+\" "
|
||||
return t
|
||||
|
||||
|
||||
# needs to check if datetime is valid
|
||||
def t_DATETIME(t):
|
||||
r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[+-]\d{2}:\d{2})"
|
||||
return t
|
||||
|
||||
|
||||
# needs to check if date is valid
|
||||
def t_DATE(t):
|
||||
r"\d{4}-\d{2}-\d{2}"
|
||||
return t
|
||||
|
||||
|
||||
# needs to check if time is valid
|
||||
def t_TIME(t):
|
||||
r"\d{2}:\d{2}:\d{2}(\.\d{1,6})?"
|
||||
return t
|
||||
|
||||
|
||||
# needs number grouping (example : flt8 = 224_617.445_991_228)
|
||||
def t_FLOAT(t):
|
||||
r"[+-]?\d+(\.\d+)?([eE][-+]?\d+)?"
|
||||
return t
|
||||
|
||||
|
||||
# needs number grouping (example : int6 = 5_349_221)
|
||||
def t_INT(t):
|
||||
r"[-+]?\d+"
|
||||
return t
|
||||
|
||||
|
||||
# needs number grouping (example : hex3 = 0xdead_beef)
|
||||
def t_HEX(t):
|
||||
r"0x[0-9a-fA-F]+"
|
||||
return t
|
||||
|
||||
|
||||
def t_BIN(t):
|
||||
r"0b[01]+"
|
||||
return t
|
||||
|
||||
|
||||
def t_OCT(t):
|
||||
r"0o[0-7]+"
|
||||
return t
|
||||
|
||||
|
||||
def t_INF(t):
|
||||
r"[-+]?inf"
|
||||
return t
|
||||
|
||||
|
||||
def t_NAN(t):
|
||||
r"[+-]?nan"
|
||||
|
||||
|
||||
# booleans are always lowercase
|
||||
def t_BOOL(t):
|
||||
r"(true|false)"
|
||||
return t
|
||||
|
||||
|
||||
# ID needs to be the last so it doesnt catch everything (literally)
|
||||
def t_ID(t):
|
||||
r"[\w.]+"
|
||||
return t
|
||||
|
||||
|
||||
def t_COMMENT(t):
|
||||
r"#.*"
|
||||
r"\#.*"
|
||||
pass
|
||||
|
||||
|
||||
def t_error(t):
|
||||
print("Illegal character '%s'" % t.value[0])
|
||||
t.lexer.skip(1)
|
||||
|
||||
|
||||
t_ignore = "\n\t "
|
||||
|
||||
literals = "[]{},="
|
||||
|
||||
|
||||
lexer = lex.lex()
|
||||
|
|
8
src/tokenizer.py
Normal file
8
src/tokenizer.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from lexer import lexer
|
||||
|
||||
f = open("example.toml","r")
|
||||
|
||||
lexer.input(f.read())
|
||||
|
||||
for tok in lexer:
|
||||
print(tok)
|
Loading…
Reference in a new issue