-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02SkinFeatureRanking.py
More file actions
136 lines (109 loc) · 5.29 KB
/
02SkinFeatureRanking.py
File metadata and controls
136 lines (109 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np
def linearRegressionScore(X, y):
# using linear regression score function to compute the r2
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)
r_squared = model.score(X, y)
return r_squared
def LR(data):
### normalize the confounders, where u is the mean of the training samples
dataCopy = data.copy()
try:
dataCopyX = dataCopy.drop(columns = ["prediction", "label"])
except:
dataCopyX = dataCopy.drop(columns = ["prediction"])
model = make_pipeline(StandardScaler(), LinearRegression())
# print(dataCopyX.columns)
residual = model.fit(dataCopyX, dataCopy["prediction"])
dataCopy["LRPrediction"] = residual.predict(dataCopyX)
return dataCopy
def LRWithCol(data, col):
### normalize the confounders, where u is the mean of the training samples
dataCopy = data.copy()
dataCopyX = dataCopy.drop(columns = [col])
model = make_pipeline(StandardScaler(), LinearRegression())
residual = model.fit(dataCopyX, dataCopy[col])
dataCopy["LRPrediction"] = residual.predict(dataCopyX)
return dataCopy
def evaluation(predictionAge):
from sklearn.metrics import mean_absolute_error, mean_squared_error
mean_squared_error = mean_squared_error(predictionAge["Age"].values, predictionAge["prediction"].values)
mean_absolute_error = mean_absolute_error(predictionAge["Age"].values, predictionAge["prediction"].values)
r2_score = linearRegressionScore(predictionAge[["Age"]], predictionAge[["prediction"]])
dataCorr = predictionAge.drop(columns=["Age"]).corrwith(predictionAge['Age'], method='pearson')
return mean_squared_error, mean_absolute_error, r2_score, dataCorr.values[0]
def AgeAccelerationResidual(
control,
case,
method,
):
model = make_pipeline(StandardScaler(), LinearRegression())
control = control.astype(float)
case = case.astype(float)
dataX = control.drop(columns = ["prediction", "LRPrediction"])
caseX = case.drop(columns = ["prediction", "LRPrediction"])
controlModel = model.fit(dataX, control["prediction"])
case[method] = case["prediction"] - controlModel.predict(caseX)
control[method] = control["prediction"] - control["LRPrediction"]
return control, case
def AgeAcc(predictionResult, cofunder):
label = cofunder[["Label"]].astype(int)
prediction = label.join(predictionResult).dropna()
controlOriginal = prediction[prediction.Label.eq(0)].drop(columns="Label")
caseOriginal = prediction[prediction.Label.eq(1)].drop(columns="Label")
caseOriginalShape = caseOriginal.shape[0]
if caseOriginalShape == 0 :
caseOriginal = controlOriginal
cofunder = cofunder.drop(columns = ["Label", "Cohort"])
Full = cofunder
controlFull = controlOriginal.join(Full)
controlFull = LR(controlFull)
caseFull = controlFull
caseFull = caseOriginal.join(Full)
caseFull = LR(caseFull)
controlFull, caseFull = AgeAccelerationResidual(controlFull, caseFull, "Full")
controlFull["Tag"] = "Control"
caseFull["Tag"] = "Case"
ageGapFullAdjusted = pd.concat([controlFull, caseFull])
if caseOriginalShape == 0 :
ageGapFullAdjusted = controlFull
return ageGapFullAdjusted[["Full", "Tag"]]
def AgeAccPerGO(data4Stage2, cofunder):
AgeGapList = []
for col in data4Stage2.columns:
Full = cofunder.drop(columns = ["Label", "Cohort"])
# print("confounder: ", Full.columns)
controlFull = data4Stage2[[col]].join(Full).dropna()
controlFull = LRWithCol(controlFull, col)
controlFull = controlFull.astype(float)
controlFull["Full"] = controlFull[col] - controlFull["LRPrediction"]
controlFull = controlFull[["Full"]].rename(columns={"Full": col})
# print(col)
AgeGapList.append(controlFull)
AgeGapGO = pd.concat(AgeGapList, axis=1)
AgeGapGO.to_csv("./AgeAccPerGO.csv")
return AgeGapGO
def skinFeatureRanking():
AgeAccControl = AgeAcc()
AgeGapGO = AgeAccPerGO()
control = AgeAccControl[AgeAccControl["Tag"].eq("Control")][["Full"]].join(AgeGapGO)
correlationControl = control.drop(columns= ["Full"]).corrwith(control["Full"])
correlationControl = pd.DataFrame(data=correlationControl.values,
index=correlationControl.index,
columns=["Rho"])
goDescription = pd.read_csv('./GO_Description.csv', index_col = 0)
goDescription = dict(zip(goDescription.names, map(list,list(goDescription))))
correlationControl["Description"] = [goDescription[keys][0] for keys in correlationControl.index]
#check the correlation -+
correlationControl['Direction'] = np.where(correlationControl["Rho"]<0, "Negtive", "Positive")
correlationControl["RhoAbs"] = correlationControl["Rho"].abs()
correlationControl = correlationControl.sort_values(by=["RhoAbs"], ascending=False)
print(correlationControl.nlargest(20,"RhoAbs"))
print(correlationControl[correlationControl.Direction.eq("Positive")].describe())
print(correlationControl[correlationControl.Direction.eq("Negtive")].describe())
return correlationControl