import re
from dataclasses import dataclass


alinea_a = {}


@dataclass
class Process:
    name: str
    grau: str
    proc_number: int


@dataclass
class Line:
    folder: int
    year: int
    month: int
    day: int
    name: str
    dad: str
    mom: str
    associated_processes: list[Process]

processes : dict[int,dict[int,dict[int,list[Line]]]] = {}

def main():
    load()
    freq_proc_ano()

def freq_proc_ano():
    freq = {}
    for year in processes.keys():
        for month in processes[year].keys():
            for day in processes[year][month].keys():
                if year not in freq:
                    freq[year] = 0
                freq[year] += 1

def freq_nomes_apelidos():
    print("a")

def load():
    regex = r"(\d+)::(\d+)-(\d+)-(\d+)::([A-Za-z ]+)::([A-Za-z ]+)::([A-Za-z ]+)::(.*)::"
    dataset = open("processos.txt", "r")
    lines = dataset.readlines()
    for line in lines:
        if line.strip() == "":
            continue
        matched_regex = re.match(regex, line)
        if matched_regex is not None:
            folder = int(matched_regex.group(1))
            year = int(matched_regex.group(2))
            month = int(matched_regex.group(3))
            day = int(matched_regex.group(4))
            name = matched_regex.group(5)
            dad = matched_regex.group(6)
            mom = matched_regex.group(7)

            proc_str = matched_regex.group(8)
            regex2 = r"([A-Za-z ]+),([A-Za-z ]+)\. Proc\.(\d+)\. *"
            proc_match = re.findall(regex2, proc_str)
    
            proc_list = []
            if proc_match is not None:
                for match in proc_match:
                    proc_list.append(
                        Process(match[0].strip(), match[1].strip(), int(match[2]))
                    )

            line_obj = Line(folder, year, month, day, name, dad, mom, proc_list)
            if year not in processes.keys():
                processes[year] = {}
            if month not in processes[year].keys():
                processes[year][month] = {}
            if day not in processes[year][month].keys():
                processes[year][month][day] = []
            processes[year][month][day].append(line_obj)

    dataset.close()


if __name__ == "__main__":
    main()