From 908b37dea774e99b32cf06a6e3b2d69758285197 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Fri, 27 Oct 2017 11:24:07 -0400 Subject: [PATCH 01/15] Ex9Q1 initial Python script --- Ex9Q1.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 Ex9Q1.py diff --git a/Ex9Q1.py b/Ex9Q1.py new file mode 100755 index 0000000..817f675 --- /dev/null +++ b/Ex9Q1.py @@ -0,0 +1,31 @@ +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * +data=pandas.read_csv('ponzr1.csv') +subset1=data.loc[data.mutation.isin(['WT','M124K']),:] +subset2=data.loc[data.mutation.isin(['WT','V456D']),:] +subset3=data.loc[data.mutation.isin(['WT','I213N']),:] +def null (p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +def alter(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +initialGuess=numpy.array([1,1,1]) +#args=df is throwing an error of not defined +fitNull=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) +fitAlter=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) +D=2*(fitNull.fun-fitAlter.fun) +1-chi2.cdf(x=D,df=1) + +#Chi-squared distribution with one deg of free +#1-scipy.stats.chi2.cdf(x=D,df=1) \ No newline at end of file From 489b59e1ffdfa6e7e8f15206b7448454a7b9d273 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Wed, 1 Nov 2017 13:58:13 -0400 Subject: [PATCH 02/15] Progress on Exercise 9, Q1 and Q2 --- Ex9Q1.py | 21 ++++++++++----------- Ex9Q2.py | 8 ++++++++ 2 files changed, 18 insertions(+), 11 deletions(-) create mode 100755 Ex9Q2.py diff --git a/Ex9Q1.py b/Ex9Q1.py index 817f675..b0c25b1 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -3,10 +3,12 @@ from scipy.optimize import minimize from scipy.stats import norm from plotnine import * -data=pandas.read_csv('ponzr1.csv') -subset1=data.loc[data.mutation.isin(['WT','M124K']),:] -subset2=data.loc[data.mutation.isin(['WT','V456D']),:] -subset3=data.loc[data.mutation.isin(['WT','I213N']),:] +data=pandas.read_csv("ponzr1.csv", sep=',') +data.shape +data.columns +#subset1=data.loc[data.mutation.isin(['WT','M124K']),:] +#subset2=data.loc[data.mutation.isin(['WT','V456D']),:] +#subset3=data.loc[data.mutation.isin(['WT','I213N']),:] def null (p,obs): B0=p[0] sigma=p[1] @@ -21,11 +23,8 @@ def alter(p,obs): nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll initialGuess=numpy.array([1,1,1]) -#args=df is throwing an error of not defined -fitNull=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) -fitAlter=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) -D=2*(fitNull.fun-fitAlter.fun) -1-chi2.cdf(x=D,df=1) - -#Chi-squared distribution with one deg of free +df=1 +#fitNull=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) +#fitAlter=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) +#D=2*(fitNull.fun-fitAlter.fun) #1-scipy.stats.chi2.cdf(x=D,df=1) \ No newline at end of file diff --git a/Ex9Q2.py b/Ex9Q2.py new file mode 100755 index 0000000..54e59f0 --- /dev/null +++ b/Ex9Q2.py @@ -0,0 +1,8 @@ +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * +data=pandas.read_csv("MmarinumGrowth.csv", sep=',') +data.shape +data.columns From 0b13bfd5f996bc11fdbe64c417dbaac1593c5b1d Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Wed, 1 Nov 2017 14:02:11 -0400 Subject: [PATCH 03/15] Fixed a minor typo --- Ex9Q1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Ex9Q1.py b/Ex9Q1.py index b0c25b1..e5ef480 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -6,7 +6,9 @@ data=pandas.read_csv("ponzr1.csv", sep=',') data.shape data.columns -#subset1=data.loc[data.mutation.isin(['WT','M124K']),:] +mutation=[0] +ponzr1Counts=[1] +subset1=data.loc[data.mutation.isin(['WT','M124K']),:] #subset2=data.loc[data.mutation.isin(['WT','V456D']),:] #subset3=data.loc[data.mutation.isin(['WT','I213N']),:] def null (p,obs): From d63068585cf6f197cd2b127973f6196562bc72cd Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 09:56:17 -0400 Subject: [PATCH 04/15] Fixed one bug --- Ex9Q1.py | 12 ++++-------- Ex9Q2.py | 11 +++++++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/Ex9Q1.py b/Ex9Q1.py index e5ef480..4661291 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -2,15 +2,12 @@ import pandas from scipy.optimize import minimize from scipy.stats import norm -from plotnine import * data=pandas.read_csv("ponzr1.csv", sep=',') data.shape data.columns -mutation=[0] -ponzr1Counts=[1] subset1=data.loc[data.mutation.isin(['WT','M124K']),:] -#subset2=data.loc[data.mutation.isin(['WT','V456D']),:] -#subset3=data.loc[data.mutation.isin(['WT','I213N']),:] +subset2=data.loc[data.mutation.isin(['WT','V456D']),:] +subset3=data.loc[data.mutation.isin(['WT','I213N']),:] def null (p,obs): B0=p[0] sigma=p[1] @@ -25,8 +22,7 @@ def alter(p,obs): nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll initialGuess=numpy.array([1,1,1]) -df=1 -#fitNull=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) -#fitAlter=minimize(subset1,initialGuess,method="Nelder-Mead",options={'disp':True},args=df) +fitNull=minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +fitAlter=minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) #D=2*(fitNull.fun-fitAlter.fun) #1-scipy.stats.chi2.cdf(x=D,df=1) \ No newline at end of file diff --git a/Ex9Q2.py b/Ex9Q2.py index 54e59f0..ebd9e53 100755 --- a/Ex9Q2.py +++ b/Ex9Q2.py @@ -1,8 +1,15 @@ import numpy import pandas -from scipy.optimize import minimize +from scipy.optimize import curve_fit from scipy.stats import norm from plotnine import * -data=pandas.read_csv("MmarinumGrowth.csv", sep=',') +data=pandas.read_csv("MmarinumGrowth.csv", names=['S','u']) data.shape data.columns +S=[0] +u=[1] +#define x as S from csv +x=numpy.array("S") +#define y as u from csv +y=numpy.array("u") +#u=umax*(S/(S+K)) \ No newline at end of file From 7abf2e2e50e7839a3680bc732336082ad8759354 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 11:01:08 -0400 Subject: [PATCH 05/15] Reworking the def's --- Ex9Q1.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/Ex9Q1.py b/Ex9Q1.py index 4661291..56458c3 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -8,21 +8,22 @@ subset1=data.loc[data.mutation.isin(['WT','M124K']),:] subset2=data.loc[data.mutation.isin(['WT','V456D']),:] subset3=data.loc[data.mutation.isin(['WT','I213N']),:] -def null (p,obs): - B0=p[0] - sigma=p[1] - expected=B0 - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -def alter(p,obs): - B0=p[0] - B1=p[1] - sigma=p[2] - expected=B0+B1*obs.x - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -initialGuess=numpy.array([1,1,1]) -fitNull=minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) -fitAlter=minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) + +#def mean (p,obs): + # B0=y[0] + # sigma=y[1] + # expected=B0 + # nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + # return nll +#def like (p,obs): + # B0=p[0] + # B1=p[1] + # sigma=p[2] + # expected=B0+B1*obs.x + # nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + # return nll +#initialGuess=numpy.array([1,1,1]) +#fitNull=minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) +#fitAlter=minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) #D=2*(fitNull.fun-fitAlter.fun) #1-scipy.stats.chi2.cdf(x=D,df=1) \ No newline at end of file From 8b69fb26eb27cfa01281f0d0c0bda72ceb1ed96f Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 2 Nov 2017 13:33:40 -0400 Subject: [PATCH 06/15] updated part 1... this should be fine now --- Ex9Q1.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/Ex9Q1.py b/Ex9Q1.py index 56458c3..c54b169 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -5,9 +5,9 @@ data=pandas.read_csv("ponzr1.csv", sep=',') data.shape data.columns -subset1=data.loc[data.mutation.isin(['WT','M124K']),:] -subset2=data.loc[data.mutation.isin(['WT','V456D']),:] -subset3=data.loc[data.mutation.isin(['WT','I213N']),:] +#subset1=data.loc[data.mutation.isin(['WT','M124K']),:] +#subset2=data.loc[data.mutation.isin(['WT','V456D']),:] +#subset3=data.loc[data.mutation.isin(['WT','I213N']),:] #def mean (p,obs): # B0=y[0] @@ -26,4 +26,51 @@ #fitNull=minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) #fitAlter=minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) #D=2*(fitNull.fun-fitAlter.fun) -#1-scipy.stats.chi2.cdf(x=D,df=1) \ No newline at end of file +#1-scipy.stats.chi2.cdf(x=D,df=1) + +#to fix the y problem that was arising all you needed to do was change y to p because p is the argument not y. + +#likelihood function for null +def null(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#likelihood function for when mutation effects expression +def mut(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#t-test between control and M124K mutation +#data for control vs first mutation (M124K) +data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] +data1.columns=['x', 'y'] +data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) + +#parameters with null model +initialGuess=numpy.array([2000,1]) +null=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +#print parameters +print(null.x) +#print nll +print(null.fun) + +#parameters with mutation model +initialGuess=numpy.array([2000,1000, 1]) +mut=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +#print parameters +print(mut.x) +#print nll +print(mut_fit.fun) + +#difference in nll calculation +D=2*(null.fun-mut.fun) +#test for statistical significance +1-scipy.stats.chi2.cdf(x=D,df=1) From 46abff75648c786f67e2a8628ad4f0c486f20445 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 2 Nov 2017 13:47:12 -0400 Subject: [PATCH 07/15] part 3 script...should be good --- Ex9Q3.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Ex9Q3.py diff --git a/Ex9Q3.py b/Ex9Q3.py new file mode 100644 index 0000000..3f7afeb --- /dev/null +++ b/Ex9Q3.py @@ -0,0 +1,80 @@ +#add'leafDecomp.csv' +ld=pandas.read_csv("leafDecomp.csv",header=0) + +#make new dataframe with x and y as the headers +leaves=ld +leaves.columns=['x', 'y'] +leaves.head() + +#liklihood func. for constant decomp +def constant(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum + return nll + +#set intial guess +constantGuess=numpy.array([600,1]) + +#estimate parameters +constantFit=minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(constantFit.x) +#print nll +print(constantFit.fun) + +#liklihood func. for linear decomp +def linear(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#set intial guesses +linearGuess=numpy.array([10,6,1]) +#estimate parameters +linearFit=minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(linearFit.x) +#print nll +print(linearFit.fun) + +#liklihood function for hump decomp. +def hump(p,obs): + B0=p[0] + B1=p[1] + B2=p[2] + sigma=p[3] + + expected=B0+B1*obs.x+B2*(obs.x)^2 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#set initial guesses +humpGuess=numpy.array([200,10,-.2,1]) + +#estimate parameters +humpFit=minimize(hump,humpGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(humpyFit.x) +print(humpyFit.fun)#nll + +#difference in nll(constant vs linear) +first_D=2*(constantFit.fun-linearFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=first_D,df=1) + +#difference in nll(linear vs hump) +second_D=2*(linearFit.fun-humpFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=second_D,df=1) + +#difference in nll(constant vs hump) +third_D=2*(constantFit.fun-humpFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=third_D,df=2) From 7a133317e2da31f81acd399220418fd0ddbc26d4 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 2 Nov 2017 13:54:55 -0400 Subject: [PATCH 08/15] updated part 1 --- Ex9Q1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ex9Q1.py b/Ex9Q1.py index c54b169..dec8024 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -68,7 +68,7 @@ def mut(p,obs): #print parameters print(mut.x) #print nll -print(mut_fit.fun) +print(mut.fun) #difference in nll calculation D=2*(null.fun-mut.fun) From 89472e268f8729a94d77cd64da86136eb56ab21f Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 2 Nov 2017 13:57:22 -0400 Subject: [PATCH 09/15] update-scipy imported --- Ex9Q1.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Ex9Q1.py b/Ex9Q1.py index dec8024..943ee92 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -1,5 +1,6 @@ import numpy import pandas +import scipy from scipy.optimize import minimize from scipy.stats import norm data=pandas.read_csv("ponzr1.csv", sep=',') From 47d69e5ba6e771e9a461b749c2fc7bb003801bfa Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 15:02:17 -0400 Subject: [PATCH 10/15] Exercise 9, Q1 and Q2 are done --- Ex9Q1 | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Ex9Q1-1.py | 57 +++++++++++++++++++++++++++++++++++++++ Ex9Q1.py | 78 +++++++++++++++++++++++++++--------------------------- Ex9Q2.py | 30 ++++++++++++++------- 4 files changed, 192 insertions(+), 49 deletions(-) create mode 100755 Ex9Q1 create mode 100755 Ex9Q1-1.py diff --git a/Ex9Q1 b/Ex9Q1 new file mode 100755 index 0000000..5af64cd --- /dev/null +++ b/Ex9Q1 @@ -0,0 +1,76 @@ +import numpy +import pandas +import scipy +from scipy.optimize import minimize +from scipy.stats import norm +data=pandas.read_csv("ponzr1.csv", sep=',') +data.shape +data.columns +#likelihood function for null +def null(p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#likelihood function for when mutation effects expression +def mut(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#t-test between control and mutations +#data for control vs first mutation (M124K) +data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] +data1.columns=['x', 'y'] +data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) +#data for control vs second mutation (V456D) +data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] +data2.columns=['x', 'y'] +data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) +#data for control vs third mutation (I213N) +data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] +data3.columns=['x', 'y'] +data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) + +#parameters with null model; could we have used a for loop here? +initialGuess=numpy.array([2000,1]) +null1=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +null2=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +null3=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) +#print parameters +print(null1.x) +print(null2.x) +print(null3.x) +#print nll +print(null1.fun) +print(null2.fun) +print(null3.fun) + +#parameters with mutation model; ditto about the for loop? +initialGuess=numpy.array([2000,1000, 1]) +mut1=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +mut2=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +mut3=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) + +#print parameters +print(mut1.x) +print(mut2.x) +print(mut3.x) + +#print nll +print(mut1.fun) +print(mut2.fun) +print(mut3.fun) + +#difference in nll calculation +D1=2*(null1.fun-mut1.fun) +D2=2*(null2.fun-mut2.fun) +D3=2*(null3.fun-mut3.fun) + +#test for statistical significance +print ("The effect of M124K", 1-scipy.stats.chi2.cdf(x=D1,df=1)) +print ("The effect of V456D", 1-scipy.stats.chi2.cdf(x=D2,df=1)) +print ("The effect of I213N", 1-scipy.stats.chi2.cdf(x=D3,df=1)) diff --git a/Ex9Q1-1.py b/Ex9Q1-1.py new file mode 100755 index 0000000..3951328 --- /dev/null +++ b/Ex9Q1-1.py @@ -0,0 +1,57 @@ +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +data=pandas.read_csv("ponzr1.csv", sep=',') +data.shape +data.columns +#likelihood function for null +def null(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#likelihood function for when mutation effects expression +def mut(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#t-test between control and M124K mutation +#data for control vs first mutation (M124K) +data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] +data1.columns=['x', 'y'] +data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) +#data for control vs second mutation (V456D) +data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] +data2.columns=['x', 'y'] +data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) +#data for control vs third mutation (I213N) +data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] +data3.columns=['x', 'y'] +data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) +#parameters with null model +initialGuess=numpy.array([2000,1]) +null=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +#print parameters +print(null.x) +#print nll +print(null.fun) + +#parameters with mutation model +initialGuess=numpy.array([2000,1000, 1]) +mut=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +#print parameters +print(mut.x) +#print nll +print(mut.fun) + +#difference in nll calculation +D=2*(null.fun-mut.fun) +#test for statistical significance +1-scipy.stats.chi2.cdf(x=D,df=1) diff --git a/Ex9Q1.py b/Ex9Q1.py index c54b169..5af64cd 100755 --- a/Ex9Q1.py +++ b/Ex9Q1.py @@ -1,76 +1,76 @@ import numpy import pandas +import scipy from scipy.optimize import minimize from scipy.stats import norm data=pandas.read_csv("ponzr1.csv", sep=',') data.shape data.columns -#subset1=data.loc[data.mutation.isin(['WT','M124K']),:] -#subset2=data.loc[data.mutation.isin(['WT','V456D']),:] -#subset3=data.loc[data.mutation.isin(['WT','I213N']),:] - -#def mean (p,obs): - # B0=y[0] - # sigma=y[1] - # expected=B0 - # nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - # return nll -#def like (p,obs): - # B0=p[0] - # B1=p[1] - # sigma=p[2] - # expected=B0+B1*obs.x - # nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - # return nll -#initialGuess=numpy.array([1,1,1]) -#fitNull=minimize(null,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) -#fitAlter=minimize(alter,initialGuess,method="Nelder-Mead",options={'disp':True},args=subset1) -#D=2*(fitNull.fun-fitAlter.fun) -#1-scipy.stats.chi2.cdf(x=D,df=1) - -#to fix the y problem that was arising all you needed to do was change y to p because p is the argument not y. - #likelihood function for null def null(p,obs): B0=p[0] sigma=p[1] - expected=B0 nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll - #likelihood function for when mutation effects expression def mut(p,obs): B0=p[0] B1=p[1] sigma=p[2] - expected=B0+B1*obs.x nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll -#t-test between control and M124K mutation +#t-test between control and mutations #data for control vs first mutation (M124K) data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] data1.columns=['x', 'y'] data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) +#data for control vs second mutation (V456D) +data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] +data2.columns=['x', 'y'] +data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) +#data for control vs third mutation (I213N) +data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] +data3.columns=['x', 'y'] +data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) -#parameters with null model +#parameters with null model; could we have used a for loop here? initialGuess=numpy.array([2000,1]) -null=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +null1=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +null2=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +null3=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) #print parameters -print(null.x) +print(null1.x) +print(null2.x) +print(null3.x) #print nll -print(null.fun) +print(null1.fun) +print(null2.fun) +print(null3.fun) -#parameters with mutation model +#parameters with mutation model; ditto about the for loop? initialGuess=numpy.array([2000,1000, 1]) -mut=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +mut1=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +mut2=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +mut3=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) + #print parameters -print(mut.x) +print(mut1.x) +print(mut2.x) +print(mut3.x) + #print nll -print(mut_fit.fun) +print(mut1.fun) +print(mut2.fun) +print(mut3.fun) #difference in nll calculation -D=2*(null.fun-mut.fun) +D1=2*(null1.fun-mut1.fun) +D2=2*(null2.fun-mut2.fun) +D3=2*(null3.fun-mut3.fun) + #test for statistical significance -1-scipy.stats.chi2.cdf(x=D,df=1) +print ("The effect of M124K", 1-scipy.stats.chi2.cdf(x=D1,df=1)) +print ("The effect of V456D", 1-scipy.stats.chi2.cdf(x=D2,df=1)) +print ("The effect of I213N", 1-scipy.stats.chi2.cdf(x=D3,df=1)) diff --git a/Ex9Q2.py b/Ex9Q2.py index ebd9e53..4ba0af0 100755 --- a/Ex9Q2.py +++ b/Ex9Q2.py @@ -1,15 +1,25 @@ +#load packages import numpy import pandas -from scipy.optimize import curve_fit +import scipy +from scipy.optimize import minimize from scipy.stats import norm -from plotnine import * -data=pandas.read_csv("MmarinumGrowth.csv", names=['S','u']) + +#load dataset +data=pandas.read_csv("MmarinumGrowth.csv") data.shape data.columns -S=[0] -u=[1] -#define x as S from csv -x=numpy.array("S") -#define y as u from csv -y=numpy.array("u") -#u=umax*(S/(S+K)) \ No newline at end of file + +#define that funky function +def Monod(p,obs): + max=p[0] + K=p[1] + sigma=p[2] + expected=max*(obs.S/(obs.S+K)) + nll=-1*norm(expected,sigma).logpdf(obs.u).sum() + return nll +#run the fit function +initialGuess=numpy.array([1,1,1]) +fit=minimize(Monod,initialGuess,method="Nelder-Mead",options={'disp':True},args=data) +print(fit.x) +#oh yeah, thanks YY! \ No newline at end of file From 0a9cb54c45ccc224546751d4b959991d28442b35 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 15:07:39 -0400 Subject: [PATCH 11/15] Got rid of practice python script for Q1 --- Ex9Q1-1.py | 57 ------------------------------------------------------ 1 file changed, 57 deletions(-) delete mode 100755 Ex9Q1-1.py diff --git a/Ex9Q1-1.py b/Ex9Q1-1.py deleted file mode 100755 index 3951328..0000000 --- a/Ex9Q1-1.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy -import pandas -from scipy.optimize import minimize -from scipy.stats import norm -data=pandas.read_csv("ponzr1.csv", sep=',') -data.shape -data.columns -#likelihood function for null -def null(p,obs): - B0=p[0] - sigma=p[1] - - expected=B0 - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -#likelihood function for when mutation effects expression -def mut(p,obs): - B0=p[0] - B1=p[1] - sigma=p[2] - - expected=B0+B1*obs.x - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -#t-test between control and M124K mutation -#data for control vs first mutation (M124K) -data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] -data1.columns=['x', 'y'] -data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) -#data for control vs second mutation (V456D) -data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] -data2.columns=['x', 'y'] -data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) -#data for control vs third mutation (I213N) -data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] -data3.columns=['x', 'y'] -data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) -#parameters with null model -initialGuess=numpy.array([2000,1]) -null=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) -#print parameters -print(null.x) -#print nll -print(null.fun) - -#parameters with mutation model -initialGuess=numpy.array([2000,1000, 1]) -mut=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) -#print parameters -print(mut.x) -#print nll -print(mut.fun) - -#difference in nll calculation -D=2*(null.fun-mut.fun) -#test for statistical significance -1-scipy.stats.chi2.cdf(x=D,df=1) From 54ae3a78caf4709b4cf3a91be6f029fe0155712a Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 15:08:17 -0400 Subject: [PATCH 12/15] Deleted one more file I erroneously made --- Ex9Q1 | 76 ----------------------------------------------------------- 1 file changed, 76 deletions(-) delete mode 100755 Ex9Q1 diff --git a/Ex9Q1 b/Ex9Q1 deleted file mode 100755 index 5af64cd..0000000 --- a/Ex9Q1 +++ /dev/null @@ -1,76 +0,0 @@ -import numpy -import pandas -import scipy -from scipy.optimize import minimize -from scipy.stats import norm -data=pandas.read_csv("ponzr1.csv", sep=',') -data.shape -data.columns -#likelihood function for null -def null(p,obs): - B0=p[0] - sigma=p[1] - expected=B0 - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -#likelihood function for when mutation effects expression -def mut(p,obs): - B0=p[0] - B1=p[1] - sigma=p[2] - expected=B0+B1*obs.x - nll=-1*norm(expected,sigma).logpdf(obs.y).sum() - return nll -#t-test between control and mutations -#data for control vs first mutation (M124K) -data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] -data1.columns=['x', 'y'] -data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) -#data for control vs second mutation (V456D) -data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] -data2.columns=['x', 'y'] -data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) -#data for control vs third mutation (I213N) -data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] -data3.columns=['x', 'y'] -data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) - -#parameters with null model; could we have used a for loop here? -initialGuess=numpy.array([2000,1]) -null1=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) -null2=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) -null3=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) -#print parameters -print(null1.x) -print(null2.x) -print(null3.x) -#print nll -print(null1.fun) -print(null2.fun) -print(null3.fun) - -#parameters with mutation model; ditto about the for loop? -initialGuess=numpy.array([2000,1000, 1]) -mut1=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) -mut2=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) -mut3=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) - -#print parameters -print(mut1.x) -print(mut2.x) -print(mut3.x) - -#print nll -print(mut1.fun) -print(mut2.fun) -print(mut3.fun) - -#difference in nll calculation -D1=2*(null1.fun-mut1.fun) -D2=2*(null2.fun-mut2.fun) -D3=2*(null3.fun-mut3.fun) - -#test for statistical significance -print ("The effect of M124K", 1-scipy.stats.chi2.cdf(x=D1,df=1)) -print ("The effect of V456D", 1-scipy.stats.chi2.cdf(x=D2,df=1)) -print ("The effect of I213N", 1-scipy.stats.chi2.cdf(x=D3,df=1)) From f71f8fa416702bba61ae8f1586954c7decabd111 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 15:24:18 -0400 Subject: [PATCH 13/15] Cleaned most of Q3; getting an odd error message though --- Ex9Q3.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Ex9Q3.py b/Ex9Q3.py index 3f7afeb..be55e67 100644 --- a/Ex9Q3.py +++ b/Ex9Q3.py @@ -1,3 +1,8 @@ +import numpy +import pandas +import scipy +from scipy.optimize import minimize +from scipy.stats import norm #add'leafDecomp.csv' ld=pandas.read_csv("leafDecomp.csv",header=0) @@ -10,12 +15,11 @@ def constant(p,obs): B0=p[0] sigma=p[1] - expected=B0 - nll=-1*norm(expected,sigma).logpdf(obs.y).sum + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll -#set intial guess +#set initial guess constantGuess=numpy.array([600,1]) #estimate parameters @@ -29,12 +33,11 @@ def linear(p,obs): B0=p[0] B1=p[1] sigma=p[2] - expected=B0+B1*obs.x nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll -#set intial guesses +#set initial guesses linearGuess=numpy.array([10,6,1]) #estimate parameters linearFit=minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leaves) @@ -48,18 +51,18 @@ def hump(p,obs): B1=p[1] B2=p[2] sigma=p[3] - expected=B0+B1*obs.x+B2*(obs.x)^2 nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll #set initial guesses -humpGuess=numpy.array([200,10,-.2,1]) +humpGuess=numpy.array([200,10,-0.2,1]) #estimate parameters humpFit=minimize(hump,humpGuess,method="Nelder-Mead",options={'disp':True},args=leaves) -print(humpyFit.x) -print(humpyFit.fun)#nll +print(humpFit.x) +#print nll +print(humpFit.fun) #difference in nll(constant vs linear) first_D=2*(constantFit.fun-linearFit.fun) From a9d93d9e3c5c656f781164aeed2d67186fea1300 Mon Sep 17 00:00:00 2001 From: Phil McCown Date: Thu, 2 Nov 2017 15:34:34 -0400 Subject: [PATCH 14/15] Hopefully final answers to Q3 --- Ex9Q3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ex9Q3.py b/Ex9Q3.py index be55e67..b3175af 100644 --- a/Ex9Q3.py +++ b/Ex9Q3.py @@ -51,7 +51,7 @@ def hump(p,obs): B1=p[1] B2=p[2] sigma=p[3] - expected=B0+B1*obs.x+B2*(obs.x)^2 + expected=B0+B1*obs.x+B2*((obs.x)**2) nll=-1*norm(expected,sigma).logpdf(obs.y).sum() return nll From 8b0cd399cecb894c2c0754e4138b8745bc940262 Mon Sep 17 00:00:00 2001 From: kkilgoreND <31992787+kkilgoreND@users.noreply.github.com> Date: Thu, 2 Nov 2017 15:48:29 -0400 Subject: [PATCH 15/15] Full script for parts 1-3 --- Ex9Full.py | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 Ex9Full.py diff --git a/Ex9Full.py b/Ex9Full.py new file mode 100644 index 0000000..6a57285 --- /dev/null +++ b/Ex9Full.py @@ -0,0 +1,189 @@ +#---Part 1--- +import numpy +import pandas +import scipy +from scipy.optimize import minimize +from scipy.stats import norm +data=pandas.read_csv("ponzr1.csv", sep=',') +data.shape +data.columns +#likelihood function for null +def null(p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#likelihood function for when mutation effects expression +def mut(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll +#t-test between control and mutations +#data for control vs first mutation (M124K) +data1=data.loc[(data.mutation == "WT") | (data.mutation == "M124K")] +data1.columns=['x', 'y'] +data1['x'] = data1['x'].map({'WT': 0, 'M124K': 1}) +#data for control vs second mutation (V456D) +data2=data.loc[(data.mutation == "WT") | (data.mutation == "V456D")] +data2.columns=['x', 'y'] +data2['x'] = data2['x'].map({'WT': 0, 'V456D': 1}) +#data for control vs third mutation (I213N) +data3=data.loc[(data.mutation == "WT") | (data.mutation == "I213N")] +data3.columns=['x', 'y'] +data3['x'] = data3['x'].map({'WT': 0, 'I213N': 1}) + +#parameters with null model; could we have used a for loop here? +initialGuess=numpy.array([2000,1]) +null1=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +null2=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +null3=minimize(null, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) +#print parameters +print(null1.x) +print(null2.x) +print(null3.x) +#print nll +print(null1.fun) +print(null2.fun) +print(null3.fun) + +#parameters with mutation model; ditto about the for loop? +initialGuess=numpy.array([2000,1000, 1]) +mut1=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data1) +mut2=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data2) +mut3=minimize(mut, initialGuess, method="Nelder-Mead", options={'disp': True}, args=data3) + +#print parameters +print(mut1.x) +print(mut2.x) +print(mut3.x) + +#print nll +print(mut1.fun) +print(mut2.fun) +print(mut3.fun) + +#difference in nll calculation +D1=2*(null1.fun-mut1.fun) +D2=2*(null2.fun-mut2.fun) +D3=2*(null3.fun-mut3.fun) + +#test for statistical significance +print ("The effect of M124K", 1-scipy.stats.chi2.cdf(x=D1,df=1)) +print ("The effect of V456D", 1-scipy.stats.chi2.cdf(x=D2,df=1)) +print ("The effect of I213N", 1-scipy.stats.chi2.cdf(x=D3,df=1)) + +#---Part 2--- +#load packages +import numpy +import pandas +import scipy +from scipy.optimize import minimize +from scipy.stats import norm + +#load dataset +data=pandas.read_csv("MmarinumGrowth.csv") +data.shape +data.columns + +#define that funky function +def Monod(p,obs): + max=p[0] + K=p[1] + sigma=p[2] + expected=max*(obs.S/(obs.S+K)) + nll=-1*norm(expected,sigma).logpdf(obs.u).sum() + return nll +#run the fit function +initialGuess=numpy.array([1,1,1]) +fit=minimize(Monod,initialGuess,method="Nelder-Mead",options={'disp':True},args=data) +print(fit.x) +#oh yeah, thanks YY! + +#---Part 3--- +import numpy +import pandas +import scipy +from scipy.optimize import minimize +from scipy.stats import norm +#add'leafDecomp.csv' +ld=pandas.read_csv("leafDecomp.csv",header=0) + +#make new dataframe with x and y as the headers +leaves=ld +leaves.columns=['x', 'y'] +leaves.head() + +#liklihood func. for constant decomp +def constant(p,obs): + B0=p[0] + sigma=p[1] + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#set initial guess +constantGuess=numpy.array([600,1]) + +#estimate parameters +constantFit=minimize(constant,constantGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(constantFit.x) +#print nll +print(constantFit.fun) + +#liklihood func. for linear decomp +def linear(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + expected=B0+B1*obs.x + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#set initial guesses +linearGuess=numpy.array([10,6,1]) +#estimate parameters +linearFit=minimize(linear,linearGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(linearFit.x) +#print nll +print(linearFit.fun) + +#liklihood function for hump decomp. +def hump(p,obs): + B0=p[0] + B1=p[1] + B2=p[2] + sigma=p[3] + expected=B0+B1*obs.x+B2*((obs.x)**2) + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +#set initial guesses +humpGuess=numpy.array([200,10,-0.2,1]) + +#estimate parameters +humpFit=minimize(hump,humpGuess,method="Nelder-Mead",options={'disp':True},args=leaves) +print(humpFit.x) +#print nll +print(humpFit.fun) + +#difference in nll(constant vs linear) +first_D=2*(constantFit.fun-linearFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=first_D,df=1) + +#difference in nll(linear vs hump) +second_D=2*(linearFit.fun-humpFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=second_D,df=1) + +#difference in nll(constant vs hump) +third_D=2*(constantFit.fun-humpFit.fun) + +#test for statistical significance +1-scipy.stats.chi2.cdf(x=third_D,df=2)