import matplotlib.pyplot as plt # idade -> i, sexo -> s, tensao -> t, colesterol -> c, batimento -> b, doenca -> d # estrutura de dados: [{"i": 1 , "s" : F | M}] data = [] def readFile(filename): f = open(filename, 'r') lines = f.readlines() lines.pop(0) for line in lines: values = line.split(',') idade = int(values[0]) sexo = values[1] tensao = int(values[2]) colesterol = int(values[3]) batimento = int(values[4]) temDoenca = bool(values[5]) data_entry = { 'i': idade, 's': sexo, 't': tensao, 'c': colesterol, 'b': batimento, 'd': temDoenca } data.append(data_entry) def distrib_sexo(): dist = {} size_of_data = len(data) for entry in data: if entry['d']: if dist.keys().__contains__(entry['s']): dist[entry['s']] += 1 else: dist[entry['s']] = 1 for (k,v) in dist.items(): percentage = v/size_of_data dist[k] = percentage return dist def distrib_etaria(): dist = {} size_of_data = len(data) for entry in data: if entry['i'] >= 30 and entry['d']: lim_inf = (entry['i'] // 5) * 5 lim_sup = lim_inf + 4 position = str(lim_inf) + '-' + str(lim_sup) if dist.keys().__contains__(position): dist[position] += 1 else: dist[position] = 1 for (k,v) in dist.items(): percentage = v/size_of_data dist[k] = percentage return dist def distrib_colesterol(): dist = {} size_of_data = len(data) for entry in data: lim_inf = (entry['c'] // 10) * 10 lim_sup = lim_inf + 9 position = str(lim_inf) + '-' + str(lim_sup) if dist.keys().__contains__(position): dist[position] += 1 else: dist[position] = 1 for (k,v) in dist.items(): percentage = v/size_of_data dist[k] = percentage return dist def show_distrib(type,dist): title_key = "Key" title_val = "Valor" match type: case 0: title_key = "Sexo" case 1: title_key = "Faixa Etaria" case 2: title_key = "Colesterol" max_key_len = max(max(len(str(k)) for k in dist.keys()),len(title_key)) max_val_len = max(max(len(str(v)) for v in dist.values()),len(title_val)) print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len)) print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') for title_key, title_val in sorted(dist.items()): print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len)) print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+') def main(): readFile("myheart.csv") dist_sexo = distrib_sexo() dist_etaria = distrib_etaria() dist_colesterol = distrib_colesterol() show_distrib(1,dist_etaria) if __name__ == "__main__": main()