import re import json def parse_header(header): regex = re.compile(r"(\w+)(?:{(?:(\d+),)?(\d+)}(?:::(\w+))?)?") capture_list = regex.findall(header.strip()) header_order = [] lists = {} funcs = {} for id, min, max, func in capture_list: header_order.append(id) if max != "": lists[id] = (int(min) if min else max, int(max)) if func != "": funcs[id] = func return header_order, lists, funcs def parse_data(lines, header_order, lists, funcs): data_regex = "" for id in header_order: if id in lists: min, max = lists[id] if min == max: num = '{' + str(max) + '}' else: num = '{' + str(min) + ',' + str(max) + '}' data_regex += rf"(?P<{id}>([^,]+,?){num}),?" else: data_regex += rf"(?P<{id}>[^,]+),?" data_regex = re.compile(data_regex) data = [] for line in lines: for match in data_regex.finditer(line.strip()): data.append(match.groupdict()) for elem in data: for id in header_order: if id in lists: elem[id] = [int(num) for num in re.findall(r"\d+",elem[id])] if id in funcs: if funcs[id] == "sum": elem[id] = sum(elem[id]) elif funcs[id] == "media": elem[id] = sum(elem[id]) / len(elem[id]) return data def main(): for i in range(2,6): test_file = f"alunos{i}" f = open(f"{test_file}.csv", "r") lines = f.readlines() header_order, lists, funcs = parse_header(lines[0]) data = parse_data(lines[1:], header_order, lists, funcs) json_file = open(f"{test_file}.json","w") json.dump(data,json_file,indent=4,ensure_ascii=False) if __name__ == "__main__": main()