Skip to content
162 changes: 162 additions & 0 deletions exercise9.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
########Question 1########
#load packages and file
import pandas
import scipy
from scipy import optimize
from scipy import stats
from scipy.stats import norm
import numpy
from plotnine import *
#note: used sed in cygwin to change mutation names to numbers
#WT=0, M124K=1, V456D=2, I213N=3
ponzr = pandas.read_csv("ponzr1.csv")
#subset WT and mutation 1
subset1 = ponzr.loc[ponzr.mutation.isin(['0','1']),:]
#subset WT and mutation 2
subset2 = ponzr.loc[ponzr.mutation.isin(['0','2']),:]
#subset WT and mutation 3
subset3 = ponzr.loc[ponzr.mutation.isin(['0','3']),:]

#creating nll function
#null hypothesis
def null(p,obs):
B0=p[0]
sigma=p[1]

expected=B0
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum()
return nll
#alternate hypothesis
def alter(p,obs):
B0=p[0]
B1=p[1]
sigma=p[2]

expected=B0+B1*obs.mutation
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum()
return nll

#minimizing nll for first treatment (M124K)
initialGuess=numpy.array([2000,300])
alterGuess=numpy.array([2000,-55,300])
fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1)
fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset1)
print(fitNull)
print(fitAlter)
#likelihood ratio test chi2 for M124K
D=2*(fitNull.fun-fitAlter.fun)
#resulting p-value M124K
1-scipy.stats.chi2.cdf(x=D,df=1)

#minimizing nll for V456D
initialGuess=numpy.array([2000,300])
alterGuess=numpy.array([2000,-55,300])
fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2)
fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset2)
print(fitNull)
print(fitAlter)
#likelihood ratio test chi2 for V456D
D=2*(fitNull.fun-fitAlter.fun)
1-scipy.stats.chi2.cdf(x=D,df=1)
#resulting p-value V456D
print(D)

#minimizing nll for I213N
initialGuess=numpy.array([2000,300])
alterGuess=numpy.array([2000,-55,300])
fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3)
fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset3)
print(fitNull)
print(fitAlter)
#likelihood ratio test chi2 I213N
D=2*(fitNull.fun-fitAlter.fun)
1-scipy.stats.chi2.cdf(x=D,df=1)
#p-value for I213N
print(D)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

#Question 2#
#load packages
import pandas
import numpy
import scipy
from scipy.optimize import minimize
from scipy.stats import norm

#load dataset
mgrowth = pandas.read_csv("MmarinumGrowth.csv")

#define custom likelihood function
def monod(p,obs):
umax = p[0]
Ks = p[1]
sigma = p[2]
expected= umax*obs.S/(obs.S+Ks)
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.u).sum()
return nll

#parameter estimates
monodGuess=numpy.array([1,1,1])
fitMonod=minimize(monod,monodGuess,method='Nelder-Mead',options={'disp': True},args=mgrowth)
print(fitMonod.x)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

#####Question 3#####
#load packages
import pandas
import scipy
from scipy import optimize
from scipy import stats
from scipy.stats import norm
import numpy
from plotnine import *
#load dataset
leafy = pandas.read_csv("leafDecomp.csv")

#define custom likelihood functions
def constant(p,obs):
B0=p[0]
sigma=p[1]

expected=B0
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum()
return nll

#alternate hypotheses
def linear(p,obs):
B0=p[0]
B1=p[1]
sigma=p[2]

expected=B0+B1*obs.Ms
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum()
return nll

def humped(p,obs):
B0=p[0]
B1=p[1]
B2=p[2]
sigma=p[3]

expected=B0+B1*obs.Ms+B2*(obs.Ms)**2
nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum()
return nll

#parameter estimates
constantGuess=numpy.array([1,1])
linearGuess=numpy.array([1,1,1])
humpedGuess=numpy.array([200,10,-0.2,1])
fitConstant=scipy.optimize.minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leafy)
fitLinear=scipy.optimize.minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leafy)
fitHumped=scipy.optimize.minimize(humped,humpedGuess,method="Nelder-Mead",options={'disp':True},args=leafy)
print(fitConstant.x)
print(fitLinear.x)
print(fitHumped.x)
#comparison of models
D=2*(fitConstant.fun-fitLinear.fun)
1-scipy.stats.chi2.cdf(x=D,df=1)

E=2*(fitLinear.fun-fitHumped.fun)
1-scipy.stats.chi2.cdf(x=E,df=1)

F=2*(fitConstant.fun-fitHumped.fun)
1-scipy.stats.chi2.cdf(x=F,df=2)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good job

80 changes: 40 additions & 40 deletions ponzr1.csv
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
"mutation","ponzr1Counts"
"WT",2387
"WT",2365
"WT",2761
"WT",2466
"WT",2003
"WT",2704
"WT",2266
"WT",2012
"WT",2221
"WT",2766
"M124K",2322
"M124K",1900
"M124K",2948
"M124K",2041
"M124K",1834
"M124K",2567
"M124K",3096
"M124K",2221
"M124K",1986
"M124K",2479
"V456D",1858
"V456D",1253
"V456D",1469
"V456D",831
"V456D",1018
"V456D",1745
"V456D",1737
"V456D",2122
"V456D",747
"V456D",749
"I213N",2207
"I213N",2590
"I213N",2640
"I213N",2389
"I213N",2425
"I213N",1975
"I213N",2097
"I213N",2525
"I213N",2683
"I213N",2251
"0",2387
"0",2365
"0",2761
"0",2466
"0",2003
"0",2704
"0",2266
"0",2012
"0",2221
"0",2766
"1",2322
"1",1900
"1",2948
"1",2041
"1",1834
"1",2567
"1",3096
"1",2221
"1",1986
"1",2479
"2",1858
"2",1253
"2",1469
"2",831
"2",1018
"2",1745
"2",1737
"2",2122
"2",747
"2",749
"3",2207
"3",2590
"3",2640
"3",2389
"3",2425
"3",1975
"3",2097
"3",2525
"3",2683
"3",2251