Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions Exercise09.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from scipy.stats import norm
# Challenge 1

data1=pd.read_csv('ponzr1.csv')
#control group
y0=data1[data1['mutation']=='WT']['ponzr1Counts']
def null(p,obs):
B0=p[0]
sigma=p[1]

expected=B0
nll=-1*norm(expected,sigma).logpdf(obs).sum()
return nll

initialGuess1=np.array([1,1])
fitNull=minimize(null,initialGuess1,method="Nelder-Mead",options={'disp':True},args=y0)
fitNull.x
#Current function value: 70.113942
#Iterations: 102
#Function evaluations: 196
#Out[11]: array([ 2395.09997602, 268.39391949])

# Treatments groups
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
le.fit(data1['mutation'])
data1['Index']=le.transform(data1['mutation'])

def alter(p,obs1,obs2):
B0=p[0]
B1=p[1]
sigma=p[2]
expected=B0+B1*obs1
nll=-1*norm(expected,sigma).logpdf(obs2).sum()
return nll

# Treatment of M124K
x_1=data1[(data1['mutation']=='WT')|(data1['mutation']=='M124K')]['Index']
y_1=data1[(data1['mutation']=='WT')|(data1['mutation']=='M124K')]['ponzr1Counts']
initialGuess_1=np.array([1,1,1])
fitalter1=minimize(alter,initialGuess_1,method="Nelder-Mead",options={'disp':True},args=(x_1,y_1))
fitalter1.x
#Current function value: 145.377954
#Iterations: 237
#Function evaluations: 437
#array([ 2311.55001063, 27.84998965, 347.22019726])

#Treatment of V456D
x_2=data1[(data1['mutation']=='WT')|(data1['mutation']=='V456D')]['Index']
y_2=data1[(data1['mutation']=='WT')|(data1['mutation']=='V456D')]['ponzr1Counts']
initialGuess_2=np.array([1,1,1])
fitalter2=minimize(alter,initialGuess_2,method="Nelder-Mead",options={'disp':True},args=(x_2,y_2))
fitalter2.x
#Current function value: 147.589560
#Iterations: 214
#Function evaluations: 388
#array([ -731.49997517, 1042.19999021, 387.819272])

#Treatment of I213N
x_3=data1[(data1['mutation']=='WT')|(data1['mutation']=='I213N')]['Index']
y_3=data1[(data1['mutation']=='WT')|(data1['mutation']=='I213N')]['ponzr1Counts']
initialGuess_3=np.array([1,1,1])
fitalter3=minimize(alter,initialGuess_3,method="Nelder-Mead",options={'disp':True},args=(x_3,y_3))
fitalter3.x
#Current function value: 138.708653
#Iterations: 222
#Function evaluations: 398
#array([2378.19997632, 5.63334186, 248.76137334])

# T-test
from scipy.stats import chi2
D1=2*(fitNull.fun-fitalter1.fun)
1-chi2.cdf(x=D1,df=1) # p-value=1.0
# Treatment of M124K: no effect

D2=2*(fitNull.fun-fitalter2.fun)
1-chi2.cdf(x=D2,df=1) # p-value=1.0
# Treatment of V456D: no effect

D3=2*(fitNull.fun-fitalter3.fun)
1-chi2.cdf(x=D3,df=1) # p-value=1.0
# Treatment of I213N: no effect

# I don't know what happened to my code that I cannot get the right answer
#Feel free to comment on the code. Thanks.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For each t-test, the null hypothesis should include both the WT and mutation. For example, for the test of I213N, the data should include WT and I213N. The null assumes the mutation does not affect the counts whereas the alternative hypothesis assumes mutation has an effect on the counts. Both tests are based on the same dataset


# Challenge 2

data2=pd.read_csv('MmarinumGrowth.csv')
def NNlike(p,obs1,obs2):
B0=p[0]
B1=p[1]
sigma=p[2]
expected=B0*(obs1/(obs1+B1))
nll=-1*norm(expected,sigma).logpdf(obs2).sum()
return nll

initialGuess=np.array([1,1,1])
fitalter=minimize(NNlike,initialGuess_3,method="Nelder-Mead",options={'disp':True},args=(data2['S'],data2['u']))
fitalter.x
#Optimization terminated successfully.
#Current function value: -30.894287
#Iterations: 134
#Function evaluations: 242
#array([ 1.45896358, 42.57991424, 0.04348732])

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

# Challenge 3

data3=pd.read_csv('leafDecomp.csv')

# d=a model
def null(p,obs):
B0=p[0]
sigma=p[1]

expected=B0
nll=-1*norm(expected,sigma).logpdf(obs).sum()
return nll

initialGuess1=np.array([1,1])
fitNull=minimize(null,initialGuess1,method="Nelder-Mead",options={'disp':True},args=data3['decomp'])
fitNull.x
#Optimization terminated successfully.
#Current function value: 228.502110
#Iterations: 96
#Function evaluations: 181
#array([ 589.93609886, 165.61950781])

# d=a+bMs model
def alter1(p,obs1,obs2):
B0=p[0]
B1=p[1]
sigma=p[2]
expected=B0+B1*obs1
nll=-1*norm(expected,sigma).logpdf(obs2).sum()
return nll

initialGuess2=np.array([1,1,1])
fitalter1=minimize(alter1,initialGuess2,method="Nelder-Mead",options={'disp':True},args=(data3['Ms'],data3['decomp']))
fitalter1.x
#Optimization terminated successfully.
#Current function value: 189.327541
#Iterations: 176
#Function evaluations: 319
#array([ 316.78109063, 6.33344384, 54.07759617])

# d=a+bMs+cMs2 model
def alter2(p,obs1,obs2):
B0=p[0]
B1=p[1]
B2=p[2]
sigma=p[3]
expected=B0+B1*obs1+B2*obs1*obs1
nll=-1*norm(expected,sigma).logpdf(obs2).sum()
return nll

initialGuess3=np.array([100,10,0,10])
fitalter2=minimize(alter2,initialGuess3,method="Nelder-Mead",options={'disp':True},args=(data3['Ms'],data3['decomp']))
fitalter2.x
#Optimization terminated successfully.
#Current function value: 130.127114
#Iterations: 233
#Function evaluations: 398
#array([ 1.87044779e+02, 1.53036958e+01, -1.04062616e-01, 9.96401605e+00])

# Model comparison

D1=2*(fitNull.fun-fitalter1.fun)
1-chi2.cdf(x=D1,df=1)
# linear model is better than null model.

D2=2*(fitalter1.fun-fitalter2.fun)
1-chi2.cdf(x=D2,df=2)
# A hump-shape model is better than linear model.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job