From 2359b43a949769ac4fb63c548440183ceea61799 Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Fri, 27 Oct 2017 10:59:43 -0400 Subject: [PATCH 01/10] first commit --- exercise9.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 exercise9.py diff --git a/exercise9.py b/exercise9.py new file mode 100755 index 0000000..4e5a05d --- /dev/null +++ b/exercise9.py @@ -0,0 +1,8 @@ +#Question 1 + + + + +#Question 2# +import pandas +import scipy From 17be60ada0694a4e9bc74f975dd83bc83d924873 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 27 Oct 2017 11:20:18 -0400 Subject: [PATCH 02/10] Added subsets for Q1 --- exercise9.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/exercise9.py b/exercise9.py index 4e5a05d..357c3be 100755 --- a/exercise9.py +++ b/exercise9.py @@ -1,6 +1,17 @@ #Question 1 - - +import pandas +import scipy +ponzr = pandas.read_csv("ponzr1.csv") +#subset WT and mutation 1 +subset1 = ponzr.loc[ponzr.mutation.isin(['WT','M124K']),:] +#subset WT and mutation 2 +subset2 = ponzr.loc[ponzr.mutation.isin(['WT','V456D']),:] +#subset WT and mutation 3 +subset3 = ponzr.loc[ponzr.mutation.isin(['WT','I213N']),:] +initialGuess=numpy.array([1,1,1]) +fitNull=minimize(fun1a,initialGuess,method="Nelder-Mead",options={'disp':True},args=q1) +fitAlter=minimize(fun1b,intialGuess,method="Nelder-Mead",options={'disp':True},args=q1) +1-scipy.stats.chi2.cdf(x=D,df=1) #Question 2# From e7344bac535ef09c23826daf43f53a82fd577e55 Mon Sep 17 00:00:00 2001 From: Katherine Date: Tue, 31 Oct 2017 00:10:54 -0400 Subject: [PATCH 03/10] Trying to do likelihood calcs in Q1 --- exercise9.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/exercise9.py b/exercise9.py index 357c3be..f9338e3 100755 --- a/exercise9.py +++ b/exercise9.py @@ -1,6 +1,11 @@ #Question 1 +#load packages and file import pandas import scipy +from scipy import optimize +from scipy import stats +from scipy.stats import norm +import numpy ponzr = pandas.read_csv("ponzr1.csv") #subset WT and mutation 1 subset1 = ponzr.loc[ponzr.mutation.isin(['WT','M124K']),:] @@ -8,9 +13,25 @@ subset2 = ponzr.loc[ponzr.mutation.isin(['WT','V456D']),:] #subset WT and mutation 3 subset3 = ponzr.loc[ponzr.mutation.isin(['WT','I213N']),:] +#creating nll +def nllike(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*ponzr.ponzr1Counts + nll= -1*scipy.stats.norm(expected,sigma).logpdf(ponzr.mutation).sum() + return nll + print(nllike) + +#minimizing nll initialGuess=numpy.array([1,1,1]) -fitNull=minimize(fun1a,initialGuess,method="Nelder-Mead",options={'disp':True},args=q1) -fitAlter=minimize(fun1b,intialGuess,method="Nelder-Mead",options={'disp':True},args=q1) +fitNull=scipy.optimize.minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp':True},args=1) +fitAlter=scipy.optimize.minimize(nllike,intialGuess,method="Nelder-Mead",options={'disp':True},args=q1) + +#likelihood ratio test chi2 + +D=2*(fitNull.fun-fitAlter.fun) 1-scipy.stats.chi2.cdf(x=D,df=1) From f45125fe02cc9a74ed28666b5038f13f39580426 Mon Sep 17 00:00:00 2001 From: Katherine Date: Tue, 31 Oct 2017 17:35:18 -0400 Subject: [PATCH 04/10] Changed treatments from letters to digits --- ponzr1.csv | 80 +++++++++++++++++++++++++++--------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/ponzr1.csv b/ponzr1.csv index d7589d4..0db1eab 100644 --- a/ponzr1.csv +++ b/ponzr1.csv @@ -1,41 +1,41 @@ "mutation","ponzr1Counts" -"WT",2387 -"WT",2365 -"WT",2761 -"WT",2466 -"WT",2003 -"WT",2704 -"WT",2266 -"WT",2012 -"WT",2221 -"WT",2766 -"M124K",2322 -"M124K",1900 -"M124K",2948 -"M124K",2041 -"M124K",1834 -"M124K",2567 -"M124K",3096 -"M124K",2221 -"M124K",1986 -"M124K",2479 -"V456D",1858 -"V456D",1253 -"V456D",1469 -"V456D",831 -"V456D",1018 -"V456D",1745 -"V456D",1737 -"V456D",2122 -"V456D",747 -"V456D",749 -"I213N",2207 -"I213N",2590 -"I213N",2640 -"I213N",2389 -"I213N",2425 -"I213N",1975 -"I213N",2097 -"I213N",2525 -"I213N",2683 -"I213N",2251 +"0",2387 +"0",2365 +"0",2761 +"0",2466 +"0",2003 +"0",2704 +"0",2266 +"0",2012 +"0",2221 +"0",2766 +"1",2322 +"1",1900 +"1",2948 +"1",2041 +"1",1834 +"1",2567 +"1",3096 +"1",2221 +"1",1986 +"1",2479 +"2",1858 +"2",1253 +"2",1469 +"2",831 +"2",1018 +"2",1745 +"2",1737 +"2",2122 +"2",747 +"2",749 +"3",2207 +"3",2590 +"3",2640 +"3",2389 +"3",2425 +"3",1975 +"3",2097 +"3",2525 +"3",2683 +"3",2251 From d9b0fbaeea3a4a689854accc480dd1ebc50a3fbf Mon Sep 17 00:00:00 2001 From: Katherine Date: Tue, 31 Oct 2017 17:35:41 -0400 Subject: [PATCH 05/10] Question 1 completed (I think) --- exercise9.py | 67 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/exercise9.py b/exercise9.py index f9338e3..f3092e2 100755 --- a/exercise9.py +++ b/exercise9.py @@ -1,4 +1,4 @@ -#Question 1 +########Question 1######## #load packages and file import pandas import scipy @@ -6,34 +6,71 @@ from scipy import stats from scipy.stats import norm import numpy +from plotnine import * +#note: used cygwin to change mutation names to numbers +#WT=0, M124K=1, V456D=2, I213N=3 ponzr = pandas.read_csv("ponzr1.csv") #subset WT and mutation 1 -subset1 = ponzr.loc[ponzr.mutation.isin(['WT','M124K']),:] +subset1 = ponzr.loc[ponzr.mutation.isin(['0','1']),:] #subset WT and mutation 2 -subset2 = ponzr.loc[ponzr.mutation.isin(['WT','V456D']),:] +subset2 = ponzr.loc[ponzr.mutation.isin(['0','2']),:] #subset WT and mutation 3 -subset3 = ponzr.loc[ponzr.mutation.isin(['WT','I213N']),:] -#creating nll -def nllike(p,obs): +subset3 = ponzr.loc[ponzr.mutation.isin(['0','3']),:] + +#creating nll function +#null hypothesis +def null(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum() + return nll +#alternate hypothesis +def alter(p,obs): B0=p[0] B1=p[1] sigma=p[2] - expected=B0+B1*ponzr.ponzr1Counts - nll= -1*scipy.stats.norm(expected,sigma).logpdf(ponzr.mutation).sum() + expected=B0+B1*obs.mutation + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.ponzr1Counts).sum() return nll - print(nllike) - -#minimizing nll -initialGuess=numpy.array([1,1,1]) -fitNull=scipy.optimize.minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp':True},args=1) -fitAlter=scipy.optimize.minimize(nllike,intialGuess,method="Nelder-Mead",options={'disp':True},args=q1) -#likelihood ratio test chi2 +#minimizing nll for first treatment (M124K) +initialGuess=numpy.array([1,1,1]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 for M124K +D=2*(fitNull.fun-fitAlter.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) +#resulting p-value M124K +print(D) +#minimizing nll for V456D +initialGuess=numpy.array([1,1,1]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2) +fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 for V456D D=2*(fitNull.fun-fitAlter.fun) 1-scipy.stats.chi2.cdf(x=D,df=1) +#resulting p-value V456D +print(D) +#minimizing nll for I213N +initialGuess=numpy.array([1,1,1]) +fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3) +fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3) +print(fitNull) +print(fitAlter) +#likelihood ratio test chi2 I213N +D=2*(fitNull.fun-fitAlter.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) +#p-value for I213N +print(D) #Question 2# import pandas From a2b9b62be9e95d3b91ab60e424b9f32f66ed7aa4 Mon Sep 17 00:00:00 2001 From: Katherine Date: Wed, 1 Nov 2017 12:46:32 -0400 Subject: [PATCH 06/10] Fixed #1. It works! --- exercise9.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/exercise9.py b/exercise9.py index f3092e2..8555728 100755 --- a/exercise9.py +++ b/exercise9.py @@ -37,21 +37,23 @@ def alter(p,obs): return nll #minimizing nll for first treatment (M124K) -initialGuess=numpy.array([1,1,1]) +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) -fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset1) print(fitNull) print(fitAlter) #likelihood ratio test chi2 for M124K D=2*(fitNull.fun-fitAlter.fun) -1-scipy.stats.chi2.cdf(x=D,df=1) #resulting p-value M124K -print(D) +1-scipy.stats.chi2.cdf(x=D,df=1) + #minimizing nll for V456D -initialGuess=numpy.array([1,1,1]) +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2) -fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset2) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset2) print(fitNull) print(fitAlter) #likelihood ratio test chi2 for V456D @@ -61,9 +63,10 @@ def alter(p,obs): print(D) #minimizing nll for I213N -initialGuess=numpy.array([1,1,1]) +initialGuess=numpy.array([2000,300]) +alterGuess=numpy.array([2000,-55,300]) fitNull=scipy.optimize.minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3) -fitAlter=scipy.optimize.minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset3) +fitAlter=scipy.optimize.minimize(alter,alterGuess,method="Nelder-Mead",options={'disp':True},args=subset3) print(fitNull) print(fitAlter) #likelihood ratio test chi2 I213N From 5c5adb50db7d62287f02b8ecc57ea0dc585f228a Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Wed, 1 Nov 2017 20:16:27 -0400 Subject: [PATCH 07/10] importing files --- exercise9.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exercise9.py b/exercise9.py index 4e5a05d..3e7624f 100755 --- a/exercise9.py +++ b/exercise9.py @@ -6,3 +6,6 @@ #Question 2# import pandas import scipy +from scipy.optimize import minimize +from scipy.stats import norm +mgrowth = open("MmarinumGrowth.csv", "r") From b28c1d2caaf5898b571417744858e0a8be681944 Mon Sep 17 00:00:00 2001 From: Katherine Date: Wed, 1 Nov 2017 20:34:22 -0400 Subject: [PATCH 08/10] Added first half of Question 3 --- exercise9.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/exercise9.py b/exercise9.py index 8555728..240d1b2 100755 --- a/exercise9.py +++ b/exercise9.py @@ -7,7 +7,7 @@ from scipy.stats import norm import numpy from plotnine import * -#note: used cygwin to change mutation names to numbers +#note: used sed in cygwin to change mutation names to numbers #WT=0, M124K=1, V456D=2, I213N=3 ponzr = pandas.read_csv("ponzr1.csv") #subset WT and mutation 1 @@ -48,7 +48,6 @@ def alter(p,obs): #resulting p-value M124K 1-scipy.stats.chi2.cdf(x=D,df=1) - #minimizing nll for V456D initialGuess=numpy.array([2000,300]) alterGuess=numpy.array([2000,-55,300]) @@ -78,3 +77,53 @@ def alter(p,obs): #Question 2# import pandas import scipy + + +#####Question 3##### +#load packages +import pandas +import scipy +from scipy import optimize +from scipy import stats +from scipy.stats import norm +import numpy +from plotnine import * +#load dataset +leafy = pandas.read_csv("leafDecomp.csv") + +#define custom likelihood functions +def constant(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll + +#alternate hypotheses +def linear(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.Ms + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll +def humped(p,obs): + B0=p[0] + B1=p[1] + B2=p[2] + sigma=p[3] + + expected=B0+B1*obs.Ms+B2*(obs.Ms)^2 + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.decomp).sum() + return nll + +#parameter estimates +###NOTE TO MATI- HUMPED FUNCTION DOESN'T WORK CURRENTLY#### +constantGuess=numpy.array([1,1]) +linearGuess=numpy.array([1,1,1]) +humpedGuess=numpy.array([200,10,-.2,1]) +fitConstant=scipy.optimize.minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +fitLinear=scipy.optimize.minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +fitHumped=scipy.optimize.minimize(humped,humpedGuess,method="Nelder-Mead",options={'disp':True},args=leafy) From 7b970e4fd50be207dcbef77199a9a4bb08aa22b5 Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Fri, 3 Nov 2017 01:10:02 -0400 Subject: [PATCH 09/10] question 2 function trial --- exercise9.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/exercise9.py b/exercise9.py index e531d3a..b5aed9e 100755 --- a/exercise9.py +++ b/exercise9.py @@ -76,18 +76,20 @@ def alter(p,obs): #Question 2# import pandas -import numpy import scipy from scipy.optimize import minimize from scipy.stats import norm -mgrowth = open("MmarinumGrowth.csv", "r") -def (y,S,umax,Ks): - sigma = y[0] - u = umax*S/(S+Ks) - nll = -1*norm(u,sigma).logpdf(S.u).sum() +mgrowth = pandas.read_csv("MmarinumGrowth.csv") +def monod(p,obs): + umax = p[0] + Ks = p[1] + sigma = p[2] + expected= umax*obs.S/(obs.S+Ks) + nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.u).sum() return nll -guess=numpy.array([1,1,1]) -fit=minimize(sim,guess,method='Nelder-Mead',options={'disp': True},args=17) +monodGuess=numpy.array([1,1,1]) +fitMonod=minimize(monod,monodGuess,method='Nelder-Mead',options={'disp': True},args=mgrowth) +print(monod) #####Question 3##### #load packages import pandas @@ -130,14 +132,22 @@ def humped(p,obs): return nll #parameter estimates -###NOTE TO MATI- HUMPED FUNCTION DOESN'T WORK CURRENTLY#### constantGuess=numpy.array([1,1]) linearGuess=numpy.array([1,1,1]) humpedGuess=numpy.array([200,10,-0.2,1]) fitConstant=scipy.optimize.minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leafy) fitLinear=scipy.optimize.minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leafy) fitHumped=scipy.optimize.minimize(humped,humpedGuess,method="Nelder-Mead",options={'disp':True},args=leafy) +print(fitConstant) +print(fitLinear) +print(fitHumped) +#comparison of models +D=2*(fitConstant.fun-fitLinear.fun) +1-scipy.stats.chi2.cdf(x=D,df=1) +E=2*(fitLinear.fun-fitHumped.fun) +1-scipy.stats.chi2.cdf(x=E,df=1) - +F=2*(fitConstant.fun-fitHumped.fun) +1-scipy.stats.chi2.cdf(x=F,df=2) From fa5d8c450cc5e0e8e5298c6f869de52dbc96020a Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Fri, 3 Nov 2017 01:22:27 -0400 Subject: [PATCH 10/10] question 2 completed, parameter values printed --- exercise9.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/exercise9.py b/exercise9.py index b5aed9e..239af9f 100755 --- a/exercise9.py +++ b/exercise9.py @@ -75,11 +75,17 @@ def alter(p,obs): print(D) #Question 2# +#load packages import pandas +import numpy import scipy from scipy.optimize import minimize from scipy.stats import norm + +#load dataset mgrowth = pandas.read_csv("MmarinumGrowth.csv") + +#define custom likelihood function def monod(p,obs): umax = p[0] Ks = p[1] @@ -87,9 +93,12 @@ def monod(p,obs): expected= umax*obs.S/(obs.S+Ks) nll= -1*scipy.stats.norm(expected,sigma).logpdf(obs.u).sum() return nll + +#parameter estimates monodGuess=numpy.array([1,1,1]) fitMonod=minimize(monod,monodGuess,method='Nelder-Mead',options={'disp': True},args=mgrowth) -print(monod) +print(fitMonod.x) + #####Question 3##### #load packages import pandas @@ -138,9 +147,9 @@ def humped(p,obs): fitConstant=scipy.optimize.minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leafy) fitLinear=scipy.optimize.minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leafy) fitHumped=scipy.optimize.minimize(humped,humpedGuess,method="Nelder-Mead",options={'disp':True},args=leafy) -print(fitConstant) -print(fitLinear) -print(fitHumped) +print(fitConstant.x) +print(fitLinear.x) +print(fitHumped.x) #comparison of models D=2*(fitConstant.fun-fitLinear.fun) 1-scipy.stats.chi2.cdf(x=D,df=1)