-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathprocess.py
More file actions
42 lines (29 loc) · 1.19 KB
/
process.py
File metadata and controls
42 lines (29 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as PANDA
import matplotlib.pyplot as plt
import math as Math
def pearsons_correlation(a, b):
return sum([x*y for x,y in zip(a,b)])/Math.sqrt(sum([x*x for x in a])*sum([y*y for y in b]))
def corr_heatmap(data_frame, size=11):
correlation = data_frame.corr()
fig, heatmap = plt.subplots(figsize=(size, size))
heatmap.matshow(correlation)
plt.xticks(range(len(correlation.columns)), correlation.columns)
plt.yticks(range(len(correlation.columns)), correlation.columns)
plt.show("heat map")
if __name__ == "__main__":
data_frame = PANDA.read_csv('pima-data.csv')
if data_frame.isnull().values.any():
print("data isn't consistence....")
else:
corr_heatmap(data_frame)
print("Enter column name to delete: ")
columns = input().split()
for column in columns:
del data_frame[column]
data_frame.head()
print('After Cleaning......')
corr_heatmap(data_frame)
print(data_frame.head())
map_diabetes = {True: 1, False: 0}
data_frame['diabetes'] = data_frame['diabetes'].map(map_diabetes)
data_frame.to_csv('cleaned_data.csv', index=False, encoding='utf-8')