From 4d115977802424b7e25449c71791db60594cf0ec Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 27 Oct 2017 10:58:27 -0400 Subject: [PATCH 01/12] python fiile made --- .idea/vcs.xml | 6 ++++++ exercise 9.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 .idea/vcs.xml create mode 100644 exercise 9.py diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/exercise 9.py b/exercise 9.py new file mode 100644 index 0000000..6aa7b2f --- /dev/null +++ b/exercise 9.py @@ -0,0 +1,17 @@ +######## exercise 9 ##### +# Dan and Drew + + + + +#1 + + + +#2 + + + + + +#3 From 33955efb8828d569f65cd364f58d5de8d534d489 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Mon, 30 Oct 2017 20:34:16 -0400 Subject: [PATCH 02/12] dan- q1 progress --- exercise 9.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/exercise 9.py b/exercise 9.py index 6aa7b2f..dfff57b 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -6,6 +6,32 @@ #1 +import pandas +from plotnine import * +import scipy +pondat = pandas.read_csv("ponzr1.csv") + +#barplot for mean counts per mutation +grouped= pondat.groupby(["mutation"]).mean().reset_index() #mean counts by mutation +print grouped + +grouped.columns = ['mutation', 'mean_counts'] +p= (ggplot(data=grouped) + + aes(x='mutation', y= 'mean_counts',fill= 'mutation') + + geom_bar(stat = "identity") + + theme_classic() + ) +print p + + +subset_WT = pondat.query('mutation=="WT"') +subset_M124K = pondat.query('mutation=="M124K"') +subset_V456D = pondat.query('mutation=="V456D"') +subset_I213N = pondat.query('mutation=="I213N"') + + + + #2 From 47f7cde7b70771003faaa1918b286a30d582350c Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Tue, 31 Oct 2017 14:30:09 -0400 Subject: [PATCH 03/12] dan- q1 is DONE!!! WOOOOOOOO --- exercise 9.py | 75 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index dfff57b..10045c2 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -1,20 +1,20 @@ ######## exercise 9 ##### # Dan and Drew - - - -#1 - +# 1 +# import libs import pandas from plotnine import * -import scipy -pondat = pandas.read_csv("ponzr1.csv") +import numpy +from scipy.optimize import minimize +from scipy.stats import norm +import scipy.stats -#barplot for mean counts per mutation -grouped= pondat.groupby(["mutation"]).mean().reset_index() #mean counts by mutation -print grouped +# load dataframe +pondat = pandas.read_csv("ponzr1.csv") +# barplot for mean counts per mutation +grouped= pondat.groupby(["mutation"]).mean().reset_index() # mean counts by mutation grouped.columns = ['mutation', 'mean_counts'] p= (ggplot(data=grouped) + aes(x='mutation', y= 'mean_counts',fill= 'mutation') @@ -23,11 +23,56 @@ ) print p - -subset_WT = pondat.query('mutation=="WT"') -subset_M124K = pondat.query('mutation=="M124K"') -subset_V456D = pondat.query('mutation=="V456D"') -subset_I213N = pondat.query('mutation=="I213N"') +# format and subset dataframes +pondat.columns = ['mutation', 'y'] # rename col +pondat['x'] = [0 if ele == "WT" else 1 for ele in pondat["mutation"]] # assign row values depending on mutation + +pondat.set_index("mutation", inplace=True) # reset index + +M124K = pondat.loc[['M124K','WT']] # subset for each mutation +V456D = pondat.loc[['V456D','WT']] +I213N = pondat.loc[['I213N','WT']] + +# null function +def null(p, obs): + B0 = p[0] + sigma = p[1] + expected = B0 + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll + +# alt function +def alter (p, obs): + B0 = p[0] + B1 = p[1] + sigma = p[2] + expected =B0+B1*obs.x + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll + +# initial guess +initialGuess = numpy.array([1,1,1]) + +# mk124k +fitNull = minimize(null, initialGuess, method="Nelder-Mead", options={'disp':True}, args=M124K) +fitAlter = minimize(alter, initialGuess, method="Nelder-Mead", options={'disp':True}, args=M124K) +D = 2*(fitNull.fun-fitAlter.fun) +print "MK124K" +print 1 - scipy.stats.chi2.cdf(x=D, df=1) #insig + +# v456D +fitNull = minimize(null, initialGuess, method="Nelder-Mead", options={'disp':True}, args=V456D) +fitAlter = minimize(alter, initialGuess, method="Nelder-Mead", options={'disp':True}, args=V456D) +D = 2*(fitNull.fun-fitAlter.fun) +print "V456D" +print 1 - scipy.stats.chi2.cdf(x=D, df=1) #sig treatment + +# I212N +fitNull = minimize(null, initialGuess, method="Nelder-Mead", options={'disp':True}, args=I213N) +fitAlter = minimize(alter, initialGuess, method="Nelder-Mead", options={'disp':True}, args=I213N) +D = 2*(fitNull.fun-fitAlter.fun) +print "I212N" +print 1 - scipy.stats.chi2.cdf(x=D, df=1) # insig From 571a85e883467bb1589a62065feddb4324ece98c Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Tue, 31 Oct 2017 15:16:40 -0400 Subject: [PATCH 04/12] started q3 --- exercise 9.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/exercise 9.py b/exercise 9.py index 10045c2..2ac2395 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -86,3 +86,10 @@ def alter (p, obs): #3 +leaf = pandas.read_csv('leafDecomp.csv') +print leaf +d = (ggplot(data=leaf) + + aes(x= "Ms", y= "decomp") + + geom_point() + ) +print d \ No newline at end of file From 46c9f7d87b41a2afa8dc597dce9dd3842fb3f32e Mon Sep 17 00:00:00 2001 From: Andrew Guinness Date: Wed, 1 Nov 2017 14:10:53 -0400 Subject: [PATCH 05/12] prepull commit --- exercise 9.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exercise 9.py b/exercise 9.py index 6aa7b2f..f8d5048 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -1,5 +1,8 @@ ######## exercise 9 ##### # Dan and Drew +import scipy +import numpy +import os From 2ee813beb579bc00d464fa9865b4a2310c75d7c6 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Wed, 1 Nov 2017 17:48:18 -0400 Subject: [PATCH 06/12] more q3 --- exercise 9.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index 2ac2395..9c45fbf 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -21,7 +21,7 @@ + geom_bar(stat = "identity") + theme_classic() ) -print p +# print p # format and subset dataframes pondat.columns = ['mutation', 'y'] # rename col @@ -86,10 +86,38 @@ def alter (p, obs): #3 -leaf = pandas.read_csv('leafDecomp.csv') + +leaf = pandas.read_csv('leafDecomp.csv') # load data print leaf d = (ggplot(data=leaf) + aes(x= "Ms", y= "decomp") + geom_point() ) -print d \ No newline at end of file +print d + +# simple functions +def simple (p, obs): + B0 = p[0] + sigma = p[1] + expected = B0 + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll + +# alt1 function +def complex (p, obs): + B0 = p[0] + B1 = p[1] + sigma = p[2] + expected =B0+B1*obs.x + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll + +# more complex alt +def morecomplex (p, obs): + B0 = p[0] + B1 = p[1] + B2 = p[2] + sigma = p[3] + expected =B0+B1*obs.x+B2*(obs.x)^2 + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll \ No newline at end of file From a24fd822b61b08cfd7eb14a72a82800e960df144 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Wed, 1 Nov 2017 21:38:18 -0400 Subject: [PATCH 07/12] more q3 --- exercise 9.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exercise 9.py b/exercise 9.py index 9c45fbf..581a2a7 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -21,7 +21,7 @@ + geom_bar(stat = "identity") + theme_classic() ) -# print p +print p # format and subset dataframes pondat.columns = ['mutation', 'y'] # rename col From 089ab7ede31c7fbabbcea79e849d667d65c76f85 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 2 Nov 2017 00:55:12 -0400 Subject: [PATCH 08/12] most of 3 works and is right... I can't get the last model to work --- exercise 9.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index 581a2a7..982b726 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -88,7 +88,6 @@ def alter (p, obs): #3 leaf = pandas.read_csv('leafDecomp.csv') # load data -print leaf d = (ggplot(data=leaf) + aes(x= "Ms", y= "decomp") + geom_point() @@ -100,7 +99,7 @@ def simple (p, obs): B0 = p[0] sigma = p[1] expected = B0 - nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + nll = -1*norm(expected, sigma).logpdf(obs.decomp).sum() return nll # alt1 function @@ -108,8 +107,8 @@ def complex (p, obs): B0 = p[0] B1 = p[1] sigma = p[2] - expected =B0+B1*obs.x - nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + expected =B0+B1*obs.Ms + nll = -1*norm(expected, sigma).logpdf(obs.decomp).sum() return nll # more complex alt @@ -118,6 +117,26 @@ def morecomplex (p, obs): B1 = p[1] B2 = p[2] sigma = p[3] - expected =B0+B1*obs.x+B2*(obs.x)^2 - nll = -1*norm(expected, sigma).logpdf(obs.y).sum() - return nll \ No newline at end of file + expected =B0+B1*(obs.Ms)+B2*((obs.Ms)^2) + nll = -1*norm(expected, sigma).logpdf(obs.decomp).sum() + return nll + +initialGuess = numpy.array([1,1,1,1]) + +# null to linear +fitNull = minimize(simple, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +fitAlter = minimize(complex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +print fitNull #these values are right compared to Stuart's answers +print fitAlter # these values are right compared to Stuart's answers +D = 2*(fitNull.fun-fitAlter.fun) +print "simple to linear model = sig!" +print 1 - scipy.stats.chi2.cdf(x=D, df=1) + +# linear to quadratic +fitNull = minimize(complex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +fitAlter = minimize(morecomplex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) + +print "linear model to quadratic = sig!" +print 1 - scipy.stats.chi2.cdf(x=D, df=1) + +#check degrees of freedomand how to extract proper values from the printed lists. \ No newline at end of file From 2e2c39a0f5be45c8a06125ecd3ec233f8afcf469 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Thu, 2 Nov 2017 13:08:15 -0400 Subject: [PATCH 09/12] q3 works!!! wooooooolooolooooo --- exercise 9.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index 982b726..fc48338 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -117,16 +117,19 @@ def morecomplex (p, obs): B1 = p[1] B2 = p[2] sigma = p[3] - expected =B0+B1*(obs.Ms)+B2*((obs.Ms)^2) + expected =B0+(B1*(obs.Ms))+(B2*(obs.Ms*obs.Ms)) nll = -1*norm(expected, sigma).logpdf(obs.decomp).sum() return nll initialGuess = numpy.array([1,1,1,1]) +comGuess = numpy.array([200,10,-.2,1]) # null to linear fitNull = minimize(simple, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) fitAlter = minimize(complex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +print "null or simple model values" print fitNull #these values are right compared to Stuart's answers +print "complex or linear model values" print fitAlter # these values are right compared to Stuart's answers D = 2*(fitNull.fun-fitAlter.fun) print "simple to linear model = sig!" @@ -134,9 +137,24 @@ def morecomplex (p, obs): # linear to quadratic fitNull = minimize(complex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) -fitAlter = minimize(morecomplex, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) - +fitAlter = minimize(morecomplex, comGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +print "complex/linear model values" +print fitNull +print "quadratic model values" +print fitAlter print "linear model to quadratic = sig!" print 1 - scipy.stats.chi2.cdf(x=D, df=1) -#check degrees of freedomand how to extract proper values from the printed lists. \ No newline at end of file + +# null to quadratic +fitNull = minimize(simple, initialGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +fitAlter = minimize(morecomplex, comGuess, method="Nelder-Mead", options={'disp':True}, args=leaf) +print "null values" +print fitNull +print "quadratic model values" +print fitAlter +print "simple model to quadratic = sig!" +print 1 - scipy.stats.chi2.cdf(x=D, df=2) + + +## according to our calcs, we should use the hump shaped model!! \ No newline at end of file From ee9e47e3fb330768c1ae4a7f4fedcf972d9ac001 Mon Sep 17 00:00:00 2001 From: Andrew Guinness Date: Thu, 2 Nov 2017 17:42:25 -0400 Subject: [PATCH 10/12] some damn bullshit --- exercise 9.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/exercise 9.py b/exercise 9.py index 2d00387..b0badb6 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -83,8 +83,14 @@ def alter (p, obs): #2 +bakturriuh = pandas.read_csv("MmarinumGrowth.csv") - +def nullbac(p, obs): + B0 = p[0] + sigma = p[1] + expected = mumax*(S/S+Ks) + nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + return nll From 1f6f5d2e4f68bb0b3a72edbac6dc46cbe576d38a Mon Sep 17 00:00:00 2001 From: Andrew Guinness Date: Thu, 2 Nov 2017 17:58:00 -0400 Subject: [PATCH 11/12] q2 again --- exercise 9.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index 2f7a178..d10873c 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -86,13 +86,19 @@ def alter (p, obs): bakturriuh = pandas.read_csv("MmarinumGrowth.csv") def nullbac(p, obs): - B0 = p[0] - sigma = p[1] - expected = mumax*(S/S+Ks) - nll = -1*norm(expected, sigma).logpdf(obs.y).sum() + mu_max = p[0] + K = p[1] + sigma = p[2] + + mu = mu_max * (bakturriuh.S / bakturriuh.S + K) + nll = -1*norm(mu, sigma).logpdf(bakturriuh.u).sum() return nll +guesslist = numpy.array([1,1,1]) #not a guestlist +BacMLL = minimize(nullbac, guesslist, method="Nelder-Mead", options={'disp':True}, args=bakturriuh) +print "Bacterial growth estimates" +print BacMLL #3 From 118b536b94ea01af687fc0d249341a32a9b57ab4 Mon Sep 17 00:00:00 2001 From: Andrew Guinness Date: Thu, 2 Nov 2017 18:11:43 -0400 Subject: [PATCH 12/12] final exercise 9 --- exercise 9.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/exercise 9.py b/exercise 9.py index d10873c..5806767 100644 --- a/exercise 9.py +++ b/exercise 9.py @@ -83,22 +83,26 @@ def alter (p, obs): #2 +#import mu and S estimates of M. marinum bakturriuh = pandas.read_csv("MmarinumGrowth.csv") +#Maximum Likelihood model to estimate max growth rate and half-sat constant def nullbac(p, obs): mu_max = p[0] K = p[1] sigma = p[2] - mu = mu_max * (bakturriuh.S / bakturriuh.S + K) + mu = mu_max * (bakturriuh.S / (bakturriuh.S + K)) nll = -1*norm(mu, sigma).logpdf(bakturriuh.u).sum() return nll guesslist = numpy.array([1,1,1]) #not a guestlist +#optimize MLL BacMLL = minimize(nullbac, guesslist, method="Nelder-Mead", options={'disp':True}, args=bakturriuh) print "Bacterial growth estimates" -print BacMLL +print BacMLL.x + #3