diff --git a/src/grammar b/src/grammar index cef1038..f427601 100644 --- a/src/grammar +++ b/src/grammar @@ -16,8 +16,8 @@ table : '[' ID ']' | '[' '[' ID ']' ']' object : key '=' value - | key '=' array -- - | key '=' dict -- talvez coloque isto numa regra separada chamada inlinetable + | key '=' array + | key '=' dict array : '[' aCont ']' | '[' ']' @@ -35,12 +35,20 @@ dict : '{' dictCont '}' dictCont : dictCont ',' dictElem | dictElem -dictElem : object +dictElem : key '=' value + | key '=' array + | key '=' dict key : ID | STR - | FLOAT + | DATE | INT + | FLOAT + | HEX + | BIN + | OCT + | INF + | NAN value : STR | DATE diff --git a/src/lexer.py b/src/lexer.py index ddee0de..735bcf9 100644 --- a/src/lexer.py +++ b/src/lexer.py @@ -38,7 +38,11 @@ def t_TIME(t): # needs number grouping (example : flt8 = 224_617.445_991_228) def t_FLOAT(t): - r"[+-]?\d+(\.\d+)?([eE][-+]?\d+)?" + r"[+-]?\d+(\s*\.\s*\d+)?([eE][-+]?\d+)?" + #case where float appears on the left side with spaces in between + if t.value.__contains(' '): + t.type = "ID" + t.value = [s.strip(' ') for s in t.value.split('.')] return t @@ -71,6 +75,7 @@ def t_INF(t): def t_NAN(t): r"[+-]?nan" + return t # booleans are always lowercase @@ -82,6 +87,7 @@ def t_BOOL(t): # ID needs to be the last so it doesnt catch everything (literally) def t_ID(t): r"(([\w_]+)|(\"[\w_]+\"|\'[\w_]+\')\s*\.\s*([\w_]+|\"[\w_]+\"|\'[\w_]+\'))(\s*\.\s*([\w_]+|\"[\w_]+\"|\'[\w_]+\'))*" + t.value = [s.strip(" \"'") for s in t.value.split('.')] return t diff --git a/src/parser.py b/src/parser.py index 44e430d..88bf63d 100644 --- a/src/parser.py +++ b/src/parser.py @@ -26,7 +26,7 @@ def p_tomlEntries_object(p): def p_table_simple(p): """table : '[' ID ']'""" p.parser.syntax_error = False - headers = p[2].split('.') + headers = p[2] temp = p.parser.root_dict for header in headers[:-1]: if header not in temp: @@ -40,7 +40,7 @@ def p_table_simple(p): temp[headers[-1]] = {} temp = temp[headers[-1]] else: - print('Cannot define the same table twice') + print("Cannot define the same table twice") p.parser.syntax_error = True p.parser.current_header = temp @@ -49,7 +49,7 @@ def p_table_simple(p): def p_table_array(p): """table : '[' '[' ID ']' ']'""" p.parser.syntax_error = False - headers = p[3].split('.') + headers = p[3].split(".") temp = p.parser.root_dict for header in headers[:-1]: if header not in temp: @@ -59,7 +59,7 @@ def p_table_array(p): temp[headers[-1]] = [{}] else: if not isinstance(temp[headers[-1]], list): - print('Error, type of object is not list') + print("Error, type of object is not list") p.parser.syntax_error = True temp[headers[-1]].append({}) temp = temp[headers[-1]][-1] @@ -72,7 +72,7 @@ def p_object(p): | key '=' dict""" if p.parser.syntax_error: return - headers = p[1].split('.') + headers = p[1].split(".") temp = p.parser.current_header for header in headers[:-1]: if header not in temp: @@ -135,7 +135,7 @@ def p_dictElem_object(p): """dictElem : key '=' value | key '=' array | key '=' dict""" - headers = p[1].split('.') + headers = p[1] p[0] = {} temp = p[0] for header in headers[:-1]: @@ -145,25 +145,24 @@ def p_dictElem_object(p): temp[headers[-1]] = p[3] +# id comes from the lexer as a list of headers def p_key_id(p): """key : ID""" p[0] = p[1] - -def p_key_str(p): - """key : STR""" - p[0] = p[1] - - -def p_key_float(p): - """key : FLOAT""" - p[0] = p[1] - - -def p_key_int(p): - """key : INT""" - p[0] = p[1] - +# the rest of the cases are the specific cases where the key as the same format as a float/int/etc +# so we need make them a singleton list. +def p_key_rest(p): + """key : STR + | DATE + | INT + | FLOAT + | HEX + | BIN + | OCT + | INF + | NAN""" + p[0] = [p[1]] def p_value_str(p): """value : STR""" @@ -235,7 +234,7 @@ parser.root_dict = dict() parser.current_header = parser.root_dict parser.syntax_error = False -f = open('example.toml', 'r') +f = open("example.toml", "r") parser.parse(f.read()) print(json.dumps(parser.root_dict, indent=2))