[PARSER/LEXER] id in lexer now returns a list with the headers, fixed

parser code.
2023-05-26 16:24:37 +01:00 · 2023-05-26 16:24:37 +01:00 · 6e465cab3d
commit 6e465cab3d
parent a4e60dd218
3 changed files with 40 additions and 27 deletions
--- a/src/grammar
+++ b/src/grammar
@ -16,8 +16,8 @@ table : '[' ID ']'
      | '[' '[' ID ']' ']'

 object : key '=' value
-       | key '=' array -- 
-       | key '=' dict -- talvez coloque isto numa regra separada chamada inlinetable
+       | key '=' array 
+       | key '=' dict

 array : '[' aCont ']'
      | '[' ']'
@ -35,12 +35,20 @@ dict : '{' dictCont '}'
 dictCont : dictCont ',' dictElem
         | dictElem

-dictElem : object
+dictElem : key '=' value
+         | key '=' array 
+         | key '=' dict

 key : ID
    | STR
-    | FLOAT
+    | DATE
    | INT
+    | FLOAT
+    | HEX
+    | BIN
+    | OCT
+    | INF
+    | NAN

 value : STR
      | DATE
--- a/src/lexer.py
+++ b/src/lexer.py
@ -38,7 +38,11 @@ def t_TIME(t):

 # needs number grouping (example : flt8 = 224_617.445_991_228)
 def t_FLOAT(t):
-    r"[+-]?\d+(\.\d+)?([eE][-+]?\d+)?"
+    r"[+-]?\d+(\s*\.\s*\d+)?([eE][-+]?\d+)?"
+    #case where float appears on the left side with spaces in between
+    if t.value.__contains(' '):
+        t.type = "ID"
+        t.value = [s.strip(' ')  for s in t.value.split('.')]
    return t


@ -71,6 +75,7 @@ def t_INF(t):

 def t_NAN(t):
    r"[+-]?nan"
+    return t


 # booleans are always lowercase
@ -82,6 +87,7 @@ def t_BOOL(t):
 # ID needs to be the last so it doesnt catch everything (literally)
 def t_ID(t):
    r"(([\w_]+)|(\"[\w_]+\"|\'[\w_]+\')\s*\.\s*([\w_]+|\"[\w_]+\"|\'[\w_]+\'))(\s*\.\s*([\w_]+|\"[\w_]+\"|\'[\w_]+\'))*"
+    t.value = [s.strip(" \"'") for s in t.value.split('.')]
    return t


--- a/src/parser.py
+++ b/src/parser.py
@ -26,7 +26,7 @@ def p_tomlEntries_object(p):
 def p_table_simple(p):
    """table : '[' ID ']'"""
    p.parser.syntax_error = False
-    headers = p[2].split('.')
+    headers = p[2]
    temp = p.parser.root_dict
    for header in headers[:-1]:
        if header not in temp:
@ -40,7 +40,7 @@ def p_table_simple(p):
        temp[headers[-1]] = {}
        temp = temp[headers[-1]]
    else:
-        print('Cannot define the same table twice')
+        print("Cannot define the same table twice")
        p.parser.syntax_error = True
    p.parser.current_header = temp

@ -49,7 +49,7 @@ def p_table_simple(p):
 def p_table_array(p):
    """table : '[' '[' ID ']' ']'"""
    p.parser.syntax_error = False
-    headers = p[3].split('.')
+    headers = p[3].split(".")
    temp = p.parser.root_dict
    for header in headers[:-1]:
        if header not in temp:
@ -59,7 +59,7 @@ def p_table_array(p):
        temp[headers[-1]] = [{}]
    else:
        if not isinstance(temp[headers[-1]], list):
-            print('Error, type of object is not list')
+            print("Error, type of object is not list")
            p.parser.syntax_error = True
        temp[headers[-1]].append({})
    temp = temp[headers[-1]][-1]
@ -72,7 +72,7 @@ def p_object(p):
    | key '=' dict"""
    if p.parser.syntax_error:
        return
-    headers = p[1].split('.')
+    headers = p[1].split(".")
    temp = p.parser.current_header
    for header in headers[:-1]:
        if header not in temp:
@ -135,7 +135,7 @@ def p_dictElem_object(p):
    """dictElem : key '=' value
    | key '=' array
    | key '=' dict"""
-    headers = p[1].split('.')
+    headers = p[1]
    p[0] = {}
    temp = p[0]
    for header in headers[:-1]:
@ -145,25 +145,24 @@ def p_dictElem_object(p):
    temp[headers[-1]] = p[3]


+# id comes from the lexer as a list of headers
 def p_key_id(p):
    """key : ID"""
    p[0] = p[1]

-
-def p_key_str(p):
-    """key : STR"""
-    p[0] = p[1]
-
-
-def p_key_float(p):
-    """key : FLOAT"""
-    p[0] = p[1]
-
-
-def p_key_int(p):
-    """key : INT"""
-    p[0] = p[1]
-
+# the rest of the cases are the specific cases where the key as the same format as a float/int/etc
+# so we need make them a singleton list.
+def p_key_rest(p):
+    """key : STR
+    | DATE
+    | INT
+    | FLOAT
+    | HEX
+    | BIN
+    | OCT
+    | INF
+    | NAN"""
+    p[0] = [p[1]]

 def p_value_str(p):
    """value : STR"""
@ -235,7 +234,7 @@ parser.root_dict = dict()
 parser.current_header = parser.root_dict
 parser.syntax_error = False

-f = open('example.toml', 'r')
+f = open("example.toml", "r")
 parser.parse(f.read())

 print(json.dumps(parser.root_dict, indent=2))