TPC3 - Feito

This commit is contained in:
Afonso Franco 2023-03-17 13:19:07 +00:00
parent 6ec53c039b
commit d88032ae0d
Signed by: afonso
SSH key fingerprint: SHA256:JiuxZNdA5bRWXPMUJChI0AQ75yC+cXY4xM0IaVwEVys

View file

@ -1,5 +1,6 @@
import re import re
from dataclasses import dataclass from dataclasses import dataclass
import json
alinea_a = {} alinea_a = {}
@ -23,11 +24,18 @@ class Line:
mom: str mom: str
associated_processes: list[Process] associated_processes: list[Process]
processes : dict[int,dict[int,dict[int,list[Line]]]] = {}
processes: dict[int, dict[int, dict[int, list[Line]]]] = {}
def main(): def main():
load() global processes
processes = load(None)
freq_proc_ano() freq_proc_ano()
freq_nomes()
freq_relacao()
write_proc_to_json_file(None)
def freq_proc_ano(): def freq_proc_ano():
freq = {} freq = {}
@ -37,14 +45,62 @@ def freq_proc_ano():
if year not in freq: if year not in freq:
freq[year] = 0 freq[year] = 0
freq[year] += 1 freq[year] += 1
return freq
def freq_nomes_apelidos():
print("a")
def load(): def freq_nomes():
regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::" prop = {}
apelidos = {}
for year in processes.keys():
for month in processes[year].keys():
for day in processes[year][month].keys():
for entry in processes[year][month][day]:
spl = entry.name.split(" ")
if spl[0] not in prop:
prop[spl[0]] = 1
else:
prop[spl[0]] += 1
if spl[-1] not in apelidos:
apelidos[spl[-1]] = 1
else:
apelidos[spl[-1]] += 1
ret = {"prop": prop, "apelidos": apelidos}
return ret
def freq_relacao():
freq = {}
for year in processes.keys():
for month in processes[year].keys():
for day in processes[year][month].keys():
for entry in processes[year][month][day]:
for proc in entry.associated_processes:
if proc.grau not in freq:
freq[proc.grau] = 1
else:
freq[proc.grau] += 1
return freq
def write_proc_to_json_file(num_of_lines: int | None):
js = load(num_of_lines)
with open("processos.json", "w") as file:
file.write(json.dumps(js, default=vars, indent=4))
def load(num_of_lines: int | None):
output = {}
regex = (
r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::"
)
dataset = open("processos.txt", "r") dataset = open("processos.txt", "r")
lines = dataset.readlines() lines = dataset.readlines()
if num_of_lines is not None:
lines = lines[:num_of_lines]
for line in lines: for line in lines:
if line.strip() == "": if line.strip() == "":
continue continue
@ -54,14 +110,14 @@ def load():
year = int(matched_regex.group(2)) year = int(matched_regex.group(2))
month = int(matched_regex.group(3)) month = int(matched_regex.group(3))
day = int(matched_regex.group(4)) day = int(matched_regex.group(4))
name = matched_regex.group(5) name = matched_regex.group(5).split(",")[0]
dad = matched_regex.group(6) dad = matched_regex.group(6).split(",")[0]
mom = matched_regex.group(7) mom = matched_regex.group(7).split(",")[0]
proc_str = matched_regex.group(8) proc_str = matched_regex.group(8)
regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *" regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *"
proc_match = re.findall(regex2, proc_str) proc_match = re.findall(regex2, proc_str)
proc_list = [] proc_list = []
if proc_match is not None: if proc_match is not None:
for match in proc_match: for match in proc_match:
@ -70,15 +126,15 @@ def load():
) )
line_obj = Line(folder, year, month, day, name, dad, mom, proc_list) line_obj = Line(folder, year, month, day, name, dad, mom, proc_list)
if year not in processes.keys(): if year not in output.keys():
processes[year] = {} output[year] = {}
if month not in processes[year].keys(): if month not in output[year].keys():
processes[year][month] = {} output[year][month] = {}
if day not in processes[year][month].keys(): if day not in output[year][month].keys():
processes[year][month][day] = [] output[year][month][day] = []
processes[year][month][day].append(line_obj) output[year][month][day].append(line_obj)
dataset.close() dataset.close()
return output
if __name__ == "__main__": if __name__ == "__main__":