diff --git a/TPC3/main.py b/TPC3/main.py index f609303..2a95ce5 100644 --- a/TPC3/main.py +++ b/TPC3/main.py @@ -1,5 +1,6 @@ import re from dataclasses import dataclass +import json alinea_a = {} @@ -23,11 +24,18 @@ class Line: mom: str associated_processes: list[Process] -processes : dict[int,dict[int,dict[int,list[Line]]]] = {} + +processes: dict[int, dict[int, dict[int, list[Line]]]] = {} + def main(): - load() + global processes + processes = load(None) freq_proc_ano() + freq_nomes() + freq_relacao() + write_proc_to_json_file(None) + def freq_proc_ano(): freq = {} @@ -37,14 +45,62 @@ def freq_proc_ano(): if year not in freq: freq[year] = 0 freq[year] += 1 + return freq -def freq_nomes_apelidos(): - print("a") -def load(): - regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::" +def freq_nomes(): + prop = {} + apelidos = {} + for year in processes.keys(): + for month in processes[year].keys(): + for day in processes[year][month].keys(): + for entry in processes[year][month][day]: + spl = entry.name.split(" ") + if spl[0] not in prop: + prop[spl[0]] = 1 + else: + prop[spl[0]] += 1 + + if spl[-1] not in apelidos: + apelidos[spl[-1]] = 1 + else: + apelidos[spl[-1]] += 1 + ret = {"prop": prop, "apelidos": apelidos} + return ret + + +def freq_relacao(): + freq = {} + for year in processes.keys(): + for month in processes[year].keys(): + for day in processes[year][month].keys(): + for entry in processes[year][month][day]: + for proc in entry.associated_processes: + if proc.grau not in freq: + freq[proc.grau] = 1 + else: + freq[proc.grau] += 1 + + return freq + + +def write_proc_to_json_file(num_of_lines: int | None): + js = load(num_of_lines) + with open("processos.json", "w") as file: + file.write(json.dumps(js, default=vars, indent=4)) + + +def load(num_of_lines: int | None): + output = {} + regex = ( + r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::" + ) dataset = open("processos.txt", "r") lines = dataset.readlines() + + if num_of_lines is not None: + lines = lines[:num_of_lines] + for line in lines: if line.strip() == "": continue @@ -54,14 +110,14 @@ def load(): year = int(matched_regex.group(2)) month = int(matched_regex.group(3)) day = int(matched_regex.group(4)) - name = matched_regex.group(5) - dad = matched_regex.group(6) - mom = matched_regex.group(7) + name = matched_regex.group(5).split(",")[0] + dad = matched_regex.group(6).split(",")[0] + mom = matched_regex.group(7).split(",")[0] proc_str = matched_regex.group(8) regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *" proc_match = re.findall(regex2, proc_str) - + proc_list = [] if proc_match is not None: for match in proc_match: @@ -70,15 +126,15 @@ def load(): ) line_obj = Line(folder, year, month, day, name, dad, mom, proc_list) - if year not in processes.keys(): - processes[year] = {} - if month not in processes[year].keys(): - processes[year][month] = {} - if day not in processes[year][month].keys(): - processes[year][month][day] = [] - processes[year][month][day].append(line_obj) - + if year not in output.keys(): + output[year] = {} + if month not in output[year].keys(): + output[year][month] = {} + if day not in output[year][month].keys(): + output[year][month][day] = [] + output[year][month][day].append(line_obj) dataset.close() + return output if __name__ == "__main__":