import re from dataclasses import dataclass alinea_a = {} @dataclass class Process: name: str grau: str proc_number: int @dataclass class Line: folder: int year: int month: int day: int name: str dad: str mom: str associated_processes: list[Process] processes : dict[int,dict[int,dict[int,list[Line]]]] = {} def main(): load() freq_proc_ano() def freq_proc_ano(): freq = {} for year in processes.keys(): for month in processes[year].keys(): for day in processes[year][month].keys(): if year not in freq: freq[year] = 0 freq[year] += 1 def freq_nomes_apelidos(): print("a") def load(): regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::" dataset = open("processos.txt", "r") lines = dataset.readlines() for line in lines: if line.strip() == "": continue matched_regex = re.match(regex, line) if matched_regex is not None: folder = int(matched_regex.group(1)) year = int(matched_regex.group(2)) month = int(matched_regex.group(3)) day = int(matched_regex.group(4)) name = matched_regex.group(5) dad = matched_regex.group(6) mom = matched_regex.group(7) proc_str = matched_regex.group(8) regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *" proc_match = re.findall(regex2, proc_str) proc_list = [] if proc_match is not None: for match in proc_match: proc_list.append( Process(match[0].strip(), match[1].strip(), int(match[2])) ) line_obj = Line(folder, year, month, day, name, dad, mom, proc_list) if year not in processes.keys(): processes[year] = {} if month not in processes[year].keys(): processes[year][month] = {} if day not in processes[year][month].keys(): processes[year][month][day] = [] processes[year][month][day].append(line_obj) dataset.close() if __name__ == "__main__": main()