diff --git a/exercise9.py b/exercise9.py new file mode 100755 index 0000000..239af9f --- /dev/null +++ b/exercise9.py @@ -0,0 +1,162 @@ +########Question 1######## +#load packages and file +import pandas +import scipy +from scipy import optimize +from scipy import stats +from scipy.stats import norm +import numpy +from plotnine import * +#note: used sed in cygwin to change mutation names to numbers +#WT=0, M124K=1, V456D=2, I213N=3 +ponzr = pandas.read_csv("ponzr1.csv") +#subset WT and mutation 1 +subset1 = ponzr.loc[ponzr.mutation.isin(['0','1']),:] +#subset WT and mutation 2 +subset2 = ponzr.loc[ponzr.mutation.isin(['0','2']),:] +#subset WT and mutation 3 +subset3 = ponzr.loc[ponzr.mutation.isin(['0','3']),:] + +#creating nll function +#null hypothesis +def null(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum() + return nll +#alternate hypothesis +def alter(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.mutation + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum() + return nll + +#minimizing nll for first treatment (M124K) +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 for M124K +D=2*(fitNull.fun-fitAlter.fun) +#resulting p-value M124K +1-scipy.stats.chi2.cdf(x=D,df=1) + +#minimizing nll for V456D +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset2) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 for V456D +D=2*(fitNull.fun-fitAlter.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) +#resulting p-value V456D +print(D) + +#minimizing nll for I213N +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset3) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 I213N +D=2*(fitNull.fun-fitAlter.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) +#p-value for I213N +print(D) + +#Question 2# +#load packages +import pandas +import numpy +import scipy +from scipy.optimize import minimize +from scipy.stats import norm + +#load dataset +mgrowth = pandas.read_csv("MmarinumGrowth.csv") + +#define custom likelihood function +def monod(p,obs): + umax = p[0] + Ks = p[1] + sigma = p[2] + expected= umax*obs.S/(obs.S+Ks) + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.u).sum() + return nll + +#parameter estimates +monodGuess=numpy.array([1,1,1]) +fitMonod=minimize(monod,monodGuess,method='Nelder-Mead',options={'disp': True},args=mgrowth) +print(fitMonod.x) + +#####Question 3##### +#load packages +import pandas +import scipy +from scipy import optimize +from scipy import stats +from scipy.stats import norm +import numpy +from plotnine import * +#load dataset +leafy = pandas.read_csv("leafDecomp.csv") + +#define custom likelihood functions +def constant(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll + +#alternate hypotheses +def linear(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.Ms + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll + +def humped(p,obs): + B0=p[0] + B1=p[1] + B2=p[2] + sigma=p[3] + + expected=B0+B1*obs.Ms+B2*(obs.Ms)**2 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll + +#parameter estimates +constantGuess=numpy.array([1,1]) +linearGuess=numpy.array([1,1,1]) +humpedGuess=numpy.array([200,10,-0.2,1]) +fitConstant=scipy.optimize.minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +fitLinear=scipy.optimize.minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +fitHumped=scipy.optimize.minimize(humped,humpedGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +print(fitConstant.x) +print(fitLinear.x) +print(fitHumped.x) +#comparison of models +D=2*(fitConstant.fun-fitLinear.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) + +E=2*(fitLinear.fun-fitHumped.fun) +1-scipy.stats.chi2.cdf(x=E,df=1) + +F=2*(fitConstant.fun-fitHumped.fun) +1-scipy.stats.chi2.cdf(x=F,df=2) + diff --git a/ponzr1.csv b/ponzr1.csv index d7589d4..0db1eab 100644 --- a/ponzr1.csv +++ b/ponzr1.csv @@ -1,41 +1,41 @@ "mutation","ponzr1Counts" -"WT",2387 -"WT",2365 -"WT",2761 -"WT",2466 -"WT",2003 -"WT",2704 -"WT",2266 -"WT",2012 -"WT",2221 -"WT",2766 -"M124K",2322 -"M124K",1900 -"M124K",2948 -"M124K",2041 -"M124K",1834 -"M124K",2567 -"M124K",3096 -"M124K",2221 -"M124K",1986 -"M124K",2479 -"V456D",1858 -"V456D",1253 -"V456D",1469 -"V456D",831 -"V456D",1018 -"V456D",1745 -"V456D",1737 -"V456D",2122 -"V456D",747 -"V456D",749 -"I213N",2207 -"I213N",2590 -"I213N",2640 -"I213N",2389 -"I213N",2425 -"I213N",1975 -"I213N",2097 -"I213N",2525 -"I213N",2683 -"I213N",2251 +"0",2387 +"0",2365 +"0",2761 +"0",2466 +"0",2003 +"0",2704 +"0",2266 +"0",2012 +"0",2221 +"0",2766 +"1",2322 +"1",1900 +"1",2948 +"1",2041 +"1",1834 +"1",2567 +"1",3096 +"1",2221 +"1",1986 +"1",2479 +"2",1858 +"2",1253 +"2",1469 +"2",831 +"2",1018 +"2",1745 +"2",1737 +"2",2122 +"2",747 +"2",749 +"3",2207 +"3",2590 +"3",2640 +"3",2389 +"3",2425 +"3",1975 +"3",2097 +"3",2525 +"3",2683 +"3",2251