PL2023/TPC4/tpc4.py

66 lines
1.9 KiB
Python
Raw Normal View History

2023-03-16 12:21:08 +00:00
import re
import json
def parse_header(header):
regex = re.compile(r"(\w+)(?:{(?:(\d+),)?(\d+)}(?:::(\w+))?)?")
capture_list = regex.findall(header.strip())
header_order = []
lists = {}
funcs = {}
for id, min, max, func in capture_list:
header_order.append(id)
if max != "":
lists[id] = (int(min) if min else max, int(max))
if func != "":
funcs[id] = func
return header_order, lists, funcs
def parse_data(lines, header_order, lists, funcs):
data_regex = ""
for id in header_order:
if id in lists:
min, max = lists[id]
if min == max:
num = '{' + str(max) + '}'
else:
num = '{' + str(min) + ',' + str(max) + '}'
data_regex += rf"(?P<{id}>([^,]+,?){num}),?"
else:
data_regex += rf"(?P<{id}>[^,]+),?"
data_regex = re.compile(data_regex)
data = []
for line in lines:
for match in data_regex.finditer(line.strip()):
data.append(match.groupdict())
for elem in data:
for id in header_order:
if id in lists:
elem[id] = [int(num) for num in re.findall(r"\d+",elem[id])]
if id in funcs:
if funcs[id] == "sum":
elem[id] = sum(elem[id])
elif funcs[id] == "media":
elem[id] = sum(elem[id]) / len(elem[id])
return data
def main():
for i in range(2,6):
test_file = f"alunos{i}"
f = open(f"{test_file}.csv", "r")
lines = f.readlines()
header_order, lists, funcs = parse_header(lines[0])
data = parse_data(lines[1:], header_order, lists, funcs)
json_file = open(f"{test_file}.json","w")
json.dump(data,json_file,indent=4,ensure_ascii=False)
if __name__ == "__main__":
main()