import re from dataclasses import dataclass import json alinea_a = {} @dataclass class Process: name: str grau: str proc_number: int @dataclass class Line: folder: int year: int month: int day: int name: str dad: str mom: str associated_processes: list[Process] processes: dict[int, dict[int, dict[int, list[Line]]]] = {} def main(): global processes processes = load(None) freq_proc_ano() freq_nomes() freq_relacao() write_proc_to_json_file(None) def freq_proc_ano(): freq = {} for year in processes.keys(): for month in processes[year].keys(): for day in processes[year][month].keys(): if year not in freq: freq[year] = 0 freq[year] += 1 return freq def freq_nomes(): prop = {} apelidos = {} for year in processes.keys(): for month in processes[year].keys(): for day in processes[year][month].keys(): for entry in processes[year][month][day]: spl = entry.name.split(" ") if spl[0] not in prop: prop[spl[0]] = 1 else: prop[spl[0]] += 1 if spl[-1] not in apelidos: apelidos[spl[-1]] = 1 else: apelidos[spl[-1]] += 1 ret = {"prop": prop, "apelidos": apelidos} return ret def freq_relacao(): freq = {} for year in processes.keys(): for month in processes[year].keys(): for day in processes[year][month].keys(): for entry in processes[year][month][day]: for proc in entry.associated_processes: if proc.grau not in freq: freq[proc.grau] = 1 else: freq[proc.grau] += 1 return freq def write_proc_to_json_file(num_of_lines: int | None): js = load(num_of_lines) with open("processos.json", "w") as file: file.write(json.dumps(js, default=vars, indent=4)) def load(num_of_lines: int | None): output = {} regex = ( r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::" ) dataset = open("processos.txt", "r") lines = dataset.readlines() if num_of_lines is not None: lines = lines[:num_of_lines] for line in lines: if line.strip() == "": continue matched_regex = re.match(regex, line) if matched_regex is not None: folder = int(matched_regex.group(1)) year = int(matched_regex.group(2)) month = int(matched_regex.group(3)) day = int(matched_regex.group(4)) name = matched_regex.group(5).split(",")[0] dad = matched_regex.group(6).split(",")[0] mom = matched_regex.group(7).split(",")[0] proc_str = matched_regex.group(8) regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *" proc_match = re.findall(regex2, proc_str) proc_list = [] if proc_match is not None: for match in proc_match: proc_list.append( Process(match[0].strip(), match[1].strip(), int(match[2])) ) line_obj = Line(folder, year, month, day, name, dad, mom, proc_list) if year not in output.keys(): output[year] = {} if month not in output[year].keys(): output[year][month] = {} if day not in output[year][month].keys(): output[year][month][day] = [] output[year][month][day].append(line_obj) dataset.close() return output if __name__ == "__main__": main()