113 lines
3.2 KiB
Python
113 lines
3.2 KiB
Python
import matplotlib.pyplot as plt
|
|
|
|
# idade -> i, sexo -> s, tensao -> t, colesterol -> c, batimento -> b, doenca -> d
|
|
# estrutura de dados: [{"i": 1 , "s" : F | M}]
|
|
data = []
|
|
|
|
def readFile(filename):
|
|
f = open(filename, 'r')
|
|
lines = f.readlines()
|
|
lines.pop(0)
|
|
for line in lines:
|
|
values = line.split(',')
|
|
idade = int(values[0])
|
|
sexo = values[1]
|
|
tensao = int(values[2])
|
|
colesterol = int(values[3])
|
|
batimento = int(values[4])
|
|
temDoenca = bool(values[5])
|
|
data_entry = {
|
|
'i': idade,
|
|
's': sexo,
|
|
't': tensao,
|
|
'c': colesterol,
|
|
'b': batimento,
|
|
'd': temDoenca
|
|
}
|
|
data.append(data_entry)
|
|
|
|
def distrib_sexo():
|
|
dist = {}
|
|
size_of_data = len(data)
|
|
for entry in data:
|
|
if entry['d']:
|
|
if dist.keys().__contains__(entry['s']):
|
|
dist[entry['s']] += 1
|
|
else:
|
|
dist[entry['s']] = 1
|
|
for (k,v) in dist.items():
|
|
percentage = v/size_of_data
|
|
dist[k] = percentage
|
|
return dist
|
|
|
|
|
|
def distrib_etaria():
|
|
dist = {}
|
|
size_of_data = len(data)
|
|
|
|
for entry in data:
|
|
if entry['i'] >= 30 and entry['d']:
|
|
lim_inf = (entry['i'] // 5) * 5
|
|
lim_sup = lim_inf + 4
|
|
position = str(lim_inf) + '-' + str(lim_sup)
|
|
if dist.keys().__contains__(position):
|
|
dist[position] += 1
|
|
else:
|
|
dist[position] = 1
|
|
|
|
for (k,v) in dist.items():
|
|
percentage = v/size_of_data
|
|
dist[k] = percentage
|
|
return dist
|
|
|
|
|
|
def distrib_colesterol():
|
|
dist = {}
|
|
size_of_data = len(data)
|
|
|
|
for entry in data:
|
|
lim_inf = (entry['c'] // 10) * 10
|
|
lim_sup = lim_inf + 9
|
|
position = str(lim_inf) + '-' + str(lim_sup)
|
|
if dist.keys().__contains__(position):
|
|
dist[position] += 1
|
|
else:
|
|
dist[position] = 1
|
|
|
|
for (k,v) in dist.items():
|
|
percentage = v/size_of_data
|
|
dist[k] = percentage
|
|
return dist
|
|
|
|
|
|
def show_distrib(type,dist):
|
|
title_key = "Key"
|
|
title_val = "Valor"
|
|
match type:
|
|
case 0:
|
|
title_key = "Sexo"
|
|
case 1:
|
|
title_key = "Faixa Etaria"
|
|
case 2:
|
|
title_key = "Colesterol"
|
|
max_key_len = max(max(len(str(k)) for k in dist.keys()),len(title_key))
|
|
max_val_len = max(max(len(str(v)) for v in dist.values()),len(title_val))
|
|
|
|
print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+')
|
|
print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len))
|
|
print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+')
|
|
for title_key, title_val in sorted(dist.items()):
|
|
print('| {:{}} | {:{}} |'.format(title_key, max_key_len, title_val, max_val_len))
|
|
print('+' + '-'*(max_key_len+2) + '+' + '-'*(max_val_len+2) + '+')
|
|
|
|
|
|
def main():
|
|
readFile("myheart.csv")
|
|
dist_sexo = distrib_sexo()
|
|
dist_etaria = distrib_etaria()
|
|
dist_colesterol = distrib_colesterol()
|
|
show_distrib(1,dist_etaria)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|