From 98d359e7fe6e5bad9271b1de8ae9711a0255fc9f Mon Sep 17 00:00:00 2001
From: tiago <tiagao2001@hotmail.com>
Date: Sat, 27 May 2023 23:45:25 +0100
Subject: [PATCH] cometi muitos crimes principalmente gramaticais #commie

---
 src/example.toml |  2 --
 src/lexer.py     | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 src/parser.py    | 22 +++++++++++++---------
 3 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/src/example.toml b/src/example.toml
index a4b0e7f..dd4069f 100644
--- a/src/example.toml
+++ b/src/example.toml
@@ -90,5 +90,3 @@ smooth = false
 points = [ { x = 1, y = 2, z = 3 },
            { x = 7, y = 8, z = 9 },
            { x = 2, y = 4, z = 8 } ]
-
-
diff --git a/src/lexer.py b/src/lexer.py
index 50ad46b..572e98c 100644
--- a/src/lexer.py
+++ b/src/lexer.py
@@ -1,6 +1,8 @@
 from ply import lex
 
 tokens = [
+    "TABLE",
+    "ARRTABLE",
     "ID",
     "STR",
     "MLSTR",
@@ -19,10 +21,51 @@ tokens = [
     "NEWLINE",
 ]
 
-def t_NEWLINE(t):
+states = (('array','inclusive'),
+          ('dict','inclusive'))
+
+def t_TABLE(t):
+    r"\[\s*(([\w_-]+|\"[\w_-]+\"|\'[\w_-]+\')(\s*\.\s*([\w_-]+|\"[\w_-]+\"|\'[\w_-]+\'))*)\s*\]"
+    header = t.lexer.lexmatch.groups()[1]
+    header = [s.strip(" \"\'") for s in header.split('.')]
+    t.value = header
+    return t 
+
+def t_ARRTABLE(t):
+    r"\[\s*\[\s*(([\w_-]+|\"[\w_-]+\"|\'[\w_-]+\')(\s*\.\s*([\w_-]+|\"[\w_-]+\"|\'[\w_-]+\'))*)\s*\]\s*\]"
+    header = t.lexer.lexmatch.groups()[6]
+    header = [s.strip(" \"\'") for s in header.split('.')]
+    t.value = header
+    return t 
+
+def t_INITIAL_NEWLINE(t):
     r"\n+\s*"
     return t
 
+def t_INITIAL_open_array(t):
+    r"\["
+    t.type = "["
+    t.lexer.push_state('array')
+    return t
+
+def t_array_close_array(t):
+    r"\]"
+    t.type = "]"
+    t.lexer.pop_state()
+    return t 
+
+def t_INITIAL_open_dict(t):
+    r"\{"
+    t.type = "{"
+    t.lexer.push_state('dict')
+    return t
+
+def t_dict_close_dict(t):
+    r"\}"
+    t.type = "}"
+    t.lexer.pop_state()
+    return t 
+
 # needs to check if datetime is valid
 def t_DATETIME(t):
     r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(\.\d{1,6})?(Z|[+-]\d{2}:\d{2})"
@@ -120,7 +163,8 @@ def t_error(t):
 
 
 t_ignore = "\t "
+t_array_dict_ignore = "\t\n "
 
-literals = "[]{},="
+literals = ",="
 
 lexer = lex.lex()
diff --git a/src/parser.py b/src/parser.py
index 0a05862..b9e60c0 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -4,7 +4,8 @@ import json
 
 
 def p_toml(p):
-    """toml : content"""
+    """toml : newlines content
+            | content"""
 
 
 def p_content_multi(p):
@@ -16,19 +17,22 @@ def p_content_single(p):
 
 
 def p_tomlEntries_table(p):
-    """tomlEntries : table NEWLINE"""
+    """tomlEntries : table newlines"""
 
 
 def p_tomlEntries_object(p):
-    """tomlEntries : object NEWLINE"""
+    """tomlEntries : object newlines"""
 
+def p_newlines(p):
+    """newlines : newlines NEWLINE
+                | NEWLINE"""
 
 def p_table_simple(p):
-    """table : '[' ID ']'"""
+    """table : TABLE"""
     p.parser.current_inline_tables = []
     p.parser.current_tables = []
     p.parser.syntax_error = False
-    headers = p[2]
+    headers = p[1]
     temp = p.parser.root_dict
     for header in headers[:-1]:
         if header not in temp:
@@ -45,16 +49,16 @@ def p_table_simple(p):
         print("Cannot define the same table twice")
         p.parser.syntax_error = True
     p.parser.current_header = temp
-    p.parser.current_header_name = p[2]
+    p.parser.current_header_name = p[1]
 
 
 # isto ta errado
 def p_table_array(p):
-    """table : '[' '[' ID ']' ']'"""
+    """table : ARRTABLE"""
     p.parser.current_inline_tables = []
     p.parser.current_tables = []
     p.parser.syntax_error = False
-    headers = p[3]
+    headers = p[1]
     temp = p.parser.root_dict
     for header in headers[:-1]:
         if header not in temp:
@@ -69,7 +73,7 @@ def p_table_array(p):
         temp[headers[-1]].append({})
     temp = temp[headers[-1]][-1]
     p.parser.current_header = temp
-    p.parser.current_header_name = p[3]
+    p.parser.current_header_name = p[1]
 
 
 def p_object(p):