From 319c38644d176a8792c72eb737e9d415f3a2a0ce Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Wed, 1 Nov 2017 12:57:33 -0400 Subject: [PATCH 1/6] subsetting data in part 1 --- part1script.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 part1script.py diff --git a/part1script.py b/part1script.py new file mode 100644 index 0000000..2024c37 --- /dev/null +++ b/part1script.py @@ -0,0 +1,26 @@ +#Exercise9, 10/27/17 + +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + + +data = pandas.read_csv('ponzr1.csv', header=0,sep=',') + +sub1=data.loc[data.mutation.isin(['WT','M124K']),:] +sub2=data.loc[data.mutation.isin(['WT','V456D']),:] +sub3=data.loc[data.mutation.isin(['WT','I213N']),:] + +def nllike(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +initialGuess=numpyarray([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=df) +print(fit.x) \ No newline at end of file From 8159890e931fdd1b0dbe5b42bca88877ce0f0f17 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Wed, 1 Nov 2017 15:52:54 -0400 Subject: [PATCH 2/6] NLL value for part of null model --- part1script.py | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/part1script.py b/part1script.py index 2024c37..04717db 100644 --- a/part1script.py +++ b/part1script.py @@ -6,21 +6,53 @@ from scipy.stats import norm from plotnine import * - data = pandas.read_csv('ponzr1.csv', header=0,sep=',') +#subset data into three different data frames sub1=data.loc[data.mutation.isin(['WT','M124K']),:] sub2=data.loc[data.mutation.isin(['WT','V456D']),:] sub3=data.loc[data.mutation.isin(['WT','I213N']),:] -def nllike(p,obs): + +#Make new data frame with 'group' column (your x=0 or x=1) +#var2=pandas.DataFrame({'y':var1.col2name, 'x':}) +sub1frame=pandas.DataFrame({'y':sub1.ponzr1Counts,'x':0}) +sub2frame=pandas.DataFrame({'y':sub2.ponzr1Counts,'x':0}) +sub3frame=pandas.DataFrame({'y':sub3.ponzr1Counts,'x':0}) +#Designate 'treatment' group as x=1 +#var2.loc[var1.col1name=='name of treatment group', 'x']=1 +sub1frame.loc[sub1.mutation=='M124K','x']=1 +sub2frame.loc[sub2.mutation=='V456D','x']=1 +sub3frame.loc[sub3.mutation=='I213N','x']=1 +#print(sub3frame) + +# Define function y=B0+error +def nllikeNull(p,obs): B0=p[0] sigma=p[1] expected=B0 nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll + print(nll) + +### estimate parameters by minimizing the NLL +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub1frame) +print(fit.fun) + +# Define function y=B0+B1*treaterror +def nllike(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] -initialGuess=numpyarray([1,1,1]) -fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=df) -print(fit.x) \ No newline at end of file + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + + +### estimate parameters by minimizing the NLL +initialGuess=numpy.array([1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=obs) +print(fit.fun) \ No newline at end of file From bf2513b94601ea6b3f81c855923b7c179afbfbf8 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Wed, 1 Nov 2017 16:19:30 -0400 Subject: [PATCH 3/6] NLL values for all models and mutations --- part1script.py | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/part1script.py b/part1script.py index 04717db..e7fdbb3 100644 --- a/part1script.py +++ b/part1script.py @@ -26,7 +26,7 @@ sub3frame.loc[sub3.mutation=='I213N','x']=1 #print(sub3frame) -# Define function y=B0+error +#### Define null function def nllikeNull(p,obs): B0=p[0] sigma=p[1] @@ -34,25 +34,45 @@ def nllikeNull(p,obs): expected=B0 nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll - print(nll) -### estimate parameters by minimizing the NLL +# estimate parameters by minimizing the NLL for sub1frame initialGuess=numpy.array([1,1]) fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub1frame) -print(fit.fun) +nullM124K= fit.fun #M124K +#gives NLL value for affect of mutation 1 in null model +#print(nullM124K) -# Define function y=B0+B1*treaterror +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub2frame) +nullV456D= fit.fun #V456D + +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub3frame) +nullI213N = fit.fun #I213N + +#### Define function y=B0+B1*treat+error def nllike(p,obs): B0=p[0] B1=p[1] sigma=p[2] - expected=B0+B1*obs.x - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + expectedAlt=B0+B1*obs.x + nll=-1*norm(expectedAlt,sigma).logpdf(obs.y).sum() return nll +#estimate parameters by minimizing the NLL +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub1frame) +altM124K = fit.fun #M124K + +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub2frame) +altV456D = fit.fun #V456D + +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub3frame) +altI213N = fit.fun #I213N + + + -### estimate parameters by minimizing the NLL -initialGuess=numpy.array([1,1]) -fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=obs) -print(fit.fun) \ No newline at end of file From a1e9069f905d577b638d44dbcdeb89ded8b5eb81 Mon Sep 17 00:00:00 2001 From: Michelle Corley Date: Thu, 2 Nov 2017 13:12:23 -0400 Subject: [PATCH 4/6] completed part 1 with correct p values calculated --- part1script.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/part1script.py b/part1script.py index e7fdbb3..04225d0 100644 --- a/part1script.py +++ b/part1script.py @@ -73,6 +73,22 @@ def nllike(p,obs): fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub3frame) altI213N = fit.fun #I213N - +### Calculating D values +DvalM124K = 2*(nullM124K-altM124K) +print("D-value for M124K= ", DvalM124K) + +DvalV456D = 2*(nullV456D-altV456D) +print("D-value for V456D = ", DvalV456D) + +DvalI213N = 2*(nullI213N-altI213N) +print("D-value for I213N = ", DvalI213N) + +### Chi-squared dist test values +pval1=1-scipy.stats.chi2.cdf(x=DvalM124K,df=1) +pval2=1-scipy.stats.chi2.cdf(x=DvalV456D,df=1) +pval3=1-scipy.stats.chi2.cdf(x=DvalI213N,df=1) +print(pval1) +print(pval2) +print(pval3) From c23c309fdeccb7fe0e95fe1dbe1f38f53ec372f2 Mon Sep 17 00:00:00 2001 From: Tim Burton Date: Thu, 2 Nov 2017 21:22:25 -0400 Subject: [PATCH 5/6] part 3 completed --- part3script.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 part3script.py diff --git a/part3script.py b/part3script.py new file mode 100644 index 0000000..e5c1669 --- /dev/null +++ b/part3script.py @@ -0,0 +1,56 @@ +import numpy as np +import pandas as pd +import scipy as sp +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + +leaf=pd.read_csv("leafDecomp.csv") + + +ggplot(leaf, aes(x="Ms", y="decomp")) + geom_point() + + +def nllike_null (p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + +def nllike_linear(p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a+b*obs.Ms + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + +def nllike_hump(p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a+b*obs.Ms+c*obs.Ms**2 + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + + +guess_hump=np.array([1,100,500,50]) +fit_null=minimize(nllike_null,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) +fit_linear=minimize(nllike_linear,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) +fit_hump=minimize(nllike_hump,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) + +chi2_linear=2*(fit_null.fun-fit_linear.fun) +1-sp.stats.chi2.cdf(x=chi2_linear,df=1) + +chi2_hump=2*(fit_null.fun-fit_hump.fun) +1-sp.stats.chi2.cdf(x=chi2_hump,df=2) + +#based on the chi2 test, the hump-shaped model is the best fit for the leaf decomposition data. From 732d0a522f43685eb332cdb9406b82080770ab01 Mon Sep 17 00:00:00 2001 From: Patrick Doherty Date: Thu, 2 Nov 2017 23:31:22 -0400 Subject: [PATCH 6/6] All done, similar to handout on max like --- part2script.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100755 part2script.py diff --git a/part2script.py b/part2script.py new file mode 100755 index 0000000..19ca8f7 --- /dev/null +++ b/part2script.py @@ -0,0 +1,17 @@ +###Exercise 9 Part 2 +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * +data=pandas.read_csv("MmarinumGrowth.csv") +def Grate(p,obs): + um=p[0] + K=p[1] + sigma=p[2] + ue=um*(obs.S/(obs.S+K)) + nll=-1*norm(ue,sigma).logpdf(obs.u).sum() + return nll +probswrong=numpy.array([1,1,1]) +best=minimize(Grate,probswrong,method="Nelder-Mead",options={"disp":True},args=data) +print (best.x)