74 lines
1.9 KiB
Python
74 lines
1.9 KiB
Python
import re
|
|
from dataclasses import dataclass
|
|
|
|
processes = {}
|
|
|
|
alinea_a = {}
|
|
|
|
|
|
@dataclass
|
|
class Process:
|
|
name: str
|
|
grau: str
|
|
proc_number: int
|
|
|
|
|
|
@dataclass
|
|
class Line:
|
|
folder: int
|
|
year: int
|
|
month: int
|
|
day: int
|
|
name: str
|
|
dad: str
|
|
mom: str
|
|
associated_processes: list[Process]
|
|
|
|
|
|
def main():
|
|
load()
|
|
print(processes)
|
|
|
|
|
|
def load():
|
|
regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(([^ \n]* *)*)::"
|
|
dataset = open("processos.txt", "r")
|
|
lines = dataset.readlines()
|
|
for line in lines:
|
|
if line.strip() == "":
|
|
continue
|
|
matched_regex = re.match(regex, line)
|
|
if matched_regex is not None:
|
|
folder = int(matched_regex.group(1))
|
|
year = int(matched_regex.group(2))
|
|
month = int(matched_regex.group(3))
|
|
day = int(matched_regex.group(4))
|
|
name = matched_regex.group(5)
|
|
dad = matched_regex.group(6)
|
|
mom = matched_regex.group(7)
|
|
|
|
proc_str = matched_regex.group(8)
|
|
regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *"
|
|
proc_match = re.findall(regex2, proc_str)
|
|
|
|
proc_list = []
|
|
if proc_match is not None:
|
|
for match in proc_match:
|
|
proc_list.append(
|
|
Process(match[0].strip(), match[1].strip(), int(match[2]))
|
|
)
|
|
|
|
line_obj = Line(folder, year, month, day, name, dad, mom, proc_list)
|
|
if year not in processes:
|
|
processes[year] = {}
|
|
if month not in processes[year]:
|
|
processes[year][month] = {}
|
|
if day not in processes[year][month]:
|
|
processes[year][month][day] = []
|
|
processes[year][month][day].append(line_obj)
|
|
|
|
dataset.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|