diff --git a/TPC1/tpc1.py b/TPC1/tpc1.py index 3f4805f..f50a4e0 100644 --- a/TPC1/tpc1.py +++ b/TPC1/tpc1.py @@ -1,14 +1,15 @@ +import matplotlib.pyplot as plt + # idade -> i, sexo -> s, tensao -> t, colesterol -> c, batimento -> b, doenca -> d # estrutura de dados: [{"i": 1 , "s" : F | M}] data = [] - def readFile(filename): - f = open(filename, "r") + f = open(filename, 'r') lines = f.readlines() lines.pop(0) for line in lines: - values = line.split(",") + values = line.split(',') idade = int(values[0]) sexo = values[1] tensao = int(values[2]) @@ -16,29 +17,97 @@ def readFile(filename): batimento = int(values[4]) temDoenca = bool(values[5]) data_entry = { - "i": idade, - "s": sexo, - "t": tensao, - "c": colesterol, - "b": batimento, - "d": temDoenca + 'i': idade, + 's': sexo, + 't': tensao, + 'c': colesterol, + 'b': batimento, + 'd': temDoenca } data.append(data_entry) -# distrib colesterol: fazer uma lista em que o index dos elementos e a divisao inteira dos valores -# distribuicoes vao ser dicionarios +def distrib_sexo(): + dist = {} + size_of_data = len(data) + for entry in data: + if entry['d']: + if dist.keys().__contains__(entry['s']): + dist[entry['s']] += 1 + else: + dist[entry['s']] = 1 + for (k,v) in dist.items(): + percentage = v/size_of_data + dist[k] = percentage + return dist +def distrib_etaria(): + dist = {} + size_of_data = len(data) + for entry in data: + if entry['i'] >= 30 and entry['d']: + lim_inf = (entry['i'] // 5) * 5 + lim_sup = lim_inf + 4 + position = str(lim_inf) + '-' + str(lim_sup) + if dist.keys().__contains__(position): + dist[position] += 1 + else: + dist[position] = 1 + + for (k,v) in dist.items(): + percentage = v/size_of_data + dist[k] = percentage + return dist + + +def distrib_colesterol(): + dist = {} + size_of_data = len(data) + + for entry in data: + lim_inf = (entry['c'] // 10) * 10 + lim_sup = lim_inf + 9 + position = str(lim_inf) + '-' + str(lim_sup) + if dist.keys().__contains__(position): + dist[position] += 1 + else: + dist[position] = 1 + + for (k,v) in dist.items(): + percentage = v/size_of_data + dist[k] = percentage + return dist + + +def show_distrib(type,dist): + title_key = "Key" + title_val = "Valor" + match type: + case 0: + title_key = "Sexo" + case 1: + title_key = "Faixa Etaria" + case 2: + title_key = "Colesterol" + max_key_len = max(max(len(str(k)) for k in dist.keys()),len(title_key)) + max_val_len = max(max(len(str(v)) for v in dist.values()),len(title_val)) + + print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') + print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len)) + print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') + for title_key, title_val in sorted(dist.items()): + print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len)) + print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') + def main(): readFile("myheart.csv") - min = 200 - for entry in data: - if entry['i'] < min: - min = entry["i"] - print(f"idade min -> {min}") - + dist_sexo = distrib_sexo() + dist_etaria = distrib_etaria() + dist_colesterol = distrib_colesterol() + show_distrib(1,dist_etaria) + if __name__ == "__main__": main()