diff --git a/src/example.toml b/src/example.toml new file mode 100644 index 0000000..2f6dc19 --- /dev/null +++ b/src/example.toml @@ -0,0 +1,5 @@ +title = "TOML Example" +[owner] +name = "Tom Preston-Werner" +date = 2010-04-23 +time = 21:30:00 diff --git a/src/lexer.py b/src/lexer.py index b98974a..b04c224 100644 --- a/src/lexer.py +++ b/src/lexer.py @@ -2,27 +2,106 @@ import ply.lex as lex tokens = [ "ID", - "STR" + "STR", "DATE", "TIME", "DATETIME", "INT", - "FLOAT", # need to implement exponents check https://toml.io/en/ "HEX", "BIN", "OCT", + "FLOAT", # need to implement exponents check https://toml.io/en/ "INF", "NAN", "COMMENT", ] + +# STR needs to be the first one to catch +def t_STR(t): + r"\"[^\"]+\" " + return t + + +# needs to check if datetime is valid +def t_DATETIME(t): + r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[+-]\d{2}:\d{2})" + return t + + +# needs to check if date is valid +def t_DATE(t): + r"\d{4}-\d{2}-\d{2}" + return t + + +# needs to check if time is valid +def t_TIME(t): + r"\d{2}:\d{2}:\d{2}(\.\d{1,6})?" + return t + + +# needs number grouping (example : flt8 = 224_617.445_991_228) +def t_FLOAT(t): + r"[+-]?\d+(\.\d+)?([eE][-+]?\d+)?" + return t + + +# needs number grouping (example : int6 = 5_349_221) +def t_INT(t): + r"[-+]?\d+" + return t + + +# needs number grouping (example : hex3 = 0xdead_beef) +def t_HEX(t): + r"0x[0-9a-fA-F]+" + return t + + +def t_BIN(t): + r"0b[01]+" + return t + + +def t_OCT(t): + r"0o[0-7]+" + return t + + +def t_INF(t): + r"[-+]?inf" + return t + + +def t_NAN(t): + r"[+-]?nan" + + +# booleans are always lowercase +def t_BOOL(t): + r"(true|false)" + return t + + +# ID needs to be the last so it doesnt catch everything (literally) +def t_ID(t): + r"[\w.]+" + return t + + def t_COMMENT(t): - r"#.*" + r"\#.*" pass + +def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + t_ignore = "\n\t " literals = "[]{},=" - lexer = lex.lex() diff --git a/src/tokenizer.py b/src/tokenizer.py new file mode 100644 index 0000000..aa69fbf --- /dev/null +++ b/src/tokenizer.py @@ -0,0 +1,8 @@ +from lexer import lexer + +f = open("example.toml","r") + +lexer.input(f.read()) + +for tok in lexer: + print(tok)