65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
import re
|
|
import json
|
|
|
|
def parse_header(header):
|
|
regex = re.compile(r"(\w+)(?:{(?:(\d+),)?(\d+)}(?:::(\w+))?)?")
|
|
capture_list = regex.findall(header.strip())
|
|
|
|
header_order = []
|
|
lists = {}
|
|
funcs = {}
|
|
|
|
for id, min, max, func in capture_list:
|
|
header_order.append(id)
|
|
if max != "":
|
|
lists[id] = (int(min) if min else max, int(max))
|
|
if func != "":
|
|
funcs[id] = func
|
|
return header_order, lists, funcs
|
|
|
|
|
|
def parse_data(lines, header_order, lists, funcs):
|
|
data_regex = ""
|
|
for id in header_order:
|
|
if id in lists:
|
|
min, max = lists[id]
|
|
if min == max:
|
|
num = '{' + str(max) + '}'
|
|
else:
|
|
num = '{' + str(min) + ',' + str(max) + '}'
|
|
data_regex += rf"(?P<{id}>([^,]+,?){num}),?"
|
|
else:
|
|
data_regex += rf"(?P<{id}>[^,]+),?"
|
|
data_regex = re.compile(data_regex)
|
|
|
|
data = []
|
|
for line in lines:
|
|
for match in data_regex.finditer(line.strip()):
|
|
data.append(match.groupdict())
|
|
|
|
for elem in data:
|
|
for id in header_order:
|
|
if id in lists:
|
|
elem[id] = [int(num) for num in re.findall(r"\d+",elem[id])]
|
|
if id in funcs:
|
|
if funcs[id] == "sum":
|
|
elem[id] = sum(elem[id])
|
|
elif funcs[id] == "media":
|
|
elem[id] = sum(elem[id]) / len(elem[id])
|
|
|
|
return data
|
|
|
|
|
|
def main():
|
|
for i in range(2,6):
|
|
test_file = f"alunos{i}"
|
|
f = open(f"{test_file}.csv", "r")
|
|
lines = f.readlines()
|
|
header_order, lists, funcs = parse_header(lines[0])
|
|
|
|
data = parse_data(lines[1:], header_order, lists, funcs)
|
|
json_file = open(f"{test_file}.json","w")
|
|
json.dump(data,json_file,indent=4,ensure_ascii=False)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|