import re from dataclasses import dataclass processes = {} alinea_a = {} @dataclass class Process: name: str grau: str proc_number: int @dataclass class Line: folder: int year: int month: int day: int name: str dad: str mom: str associated_processes: list[Process] def main(): load() print(processes) def load(): regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(([^ \n]* *)*)::" dataset = open("processos.txt", "r") lines = dataset.readlines() for line in lines: if line.strip() == "": continue matched_regex = re.match(regex, line) if matched_regex is not None: folder = int(matched_regex.group(1)) year = int(matched_regex.group(2)) month = int(matched_regex.group(3)) day = int(matched_regex.group(4)) name = matched_regex.group(5) dad = matched_regex.group(6) mom = matched_regex.group(7) proc_str = matched_regex.group(8) regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *" proc_match = re.findall(regex2, proc_str) proc_list = [] if proc_match is not None: for match in proc_match: proc_list.append( Process(match[0].strip(), match[1].strip(), int(match[2])) ) line_obj = Line(folder, year, month, day, name, dad, mom, proc_list) if year not in processes: processes[year] = {} if month not in processes[year]: processes[year][month] = {} if day not in processes[year][month]: processes[year][month][day] = [] processes[year][month][day].append(line_obj) dataset.close() if __name__ == "__main__": main()