diff --git a/part1script.py b/part1script.py new file mode 100644 index 0000000..04225d0 --- /dev/null +++ b/part1script.py @@ -0,0 +1,94 @@ +#Exercise9, 10/27/17 + +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + +data = pandas.read_csv('ponzr1.csv', header=0,sep=',') + +#subset data into three different data frames +sub1=data.loc[data.mutation.isin(['WT','M124K']),:] +sub2=data.loc[data.mutation.isin(['WT','V456D']),:] +sub3=data.loc[data.mutation.isin(['WT','I213N']),:] + + +#Make new data frame with 'group' column (your x=0 or x=1) +#var2=pandas.DataFrame({'y':var1.col2name, 'x':}) +sub1frame=pandas.DataFrame({'y':sub1.ponzr1Counts,'x':0}) +sub2frame=pandas.DataFrame({'y':sub2.ponzr1Counts,'x':0}) +sub3frame=pandas.DataFrame({'y':sub3.ponzr1Counts,'x':0}) +#Designate 'treatment' group as x=1 +#var2.loc[var1.col1name=='name of treatment group', 'x']=1 +sub1frame.loc[sub1.mutation=='M124K','x']=1 +sub2frame.loc[sub2.mutation=='V456D','x']=1 +sub3frame.loc[sub3.mutation=='I213N','x']=1 +#print(sub3frame) + +#### Define null function +def nllikeNull(p,obs): + B0=p[0] + sigma=p[1] + + expected=B0 + nll=-1*norm(expected,sigma).logpdf(obs.y).sum() + return nll + +# estimate parameters by minimizing the NLL for sub1frame +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub1frame) +nullM124K= fit.fun #M124K +#gives NLL value for affect of mutation 1 in null model +#print(nullM124K) + +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub2frame) +nullV456D= fit.fun #V456D + +initialGuess=numpy.array([1,1]) +fit=minimize(nllikeNull,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub3frame) +nullI213N = fit.fun #I213N + +#### Define function y=B0+B1*treat+error +def nllike(p,obs): + B0=p[0] + B1=p[1] + sigma=p[2] + + expectedAlt=B0+B1*obs.x + nll=-1*norm(expectedAlt,sigma).logpdf(obs.y).sum() + return nll + +#estimate parameters by minimizing the NLL +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub1frame) +altM124K = fit.fun #M124K + +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub2frame) +altV456D = fit.fun #V456D + +initialGuess=numpy.array([1,1,1]) +fit=minimize(nllike,initialGuess,method="Nelder-Mead",options={'disp': True},args=sub3frame) +altI213N = fit.fun #I213N + +### Calculating D values +DvalM124K = 2*(nullM124K-altM124K) +print("D-value for M124K= ", DvalM124K) + +DvalV456D = 2*(nullV456D-altV456D) +print("D-value for V456D = ", DvalV456D) + +DvalI213N = 2*(nullI213N-altI213N) +print("D-value for I213N = ", DvalI213N) + +### Chi-squared dist test values +pval1=1-scipy.stats.chi2.cdf(x=DvalM124K,df=1) +pval2=1-scipy.stats.chi2.cdf(x=DvalV456D,df=1) +pval3=1-scipy.stats.chi2.cdf(x=DvalI213N,df=1) +print(pval1) +print(pval2) +print(pval3) + + diff --git a/part2script.py b/part2script.py new file mode 100755 index 0000000..19ca8f7 --- /dev/null +++ b/part2script.py @@ -0,0 +1,17 @@ +###Exercise 9 Part 2 +import numpy +import pandas +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * +data=pandas.read_csv("MmarinumGrowth.csv") +def Grate(p,obs): + um=p[0] + K=p[1] + sigma=p[2] + ue=um*(obs.S/(obs.S+K)) + nll=-1*norm(ue,sigma).logpdf(obs.u).sum() + return nll +probswrong=numpy.array([1,1,1]) +best=minimize(Grate,probswrong,method="Nelder-Mead",options={"disp":True},args=data) +print (best.x) diff --git a/part3script.py b/part3script.py new file mode 100644 index 0000000..e5c1669 --- /dev/null +++ b/part3script.py @@ -0,0 +1,56 @@ +import numpy as np +import pandas as pd +import scipy as sp +from scipy.optimize import minimize +from scipy.stats import norm +from plotnine import * + +leaf=pd.read_csv("leafDecomp.csv") + + +ggplot(leaf, aes(x="Ms", y="decomp")) + geom_point() + + +def nllike_null (p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + +def nllike_linear(p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a+b*obs.Ms + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + +def nllike_hump(p,obs): + a=p[0] + b=p[1] + c=p[2] + sigma=p[3] + + expected=a+b*obs.Ms+c*obs.Ms**2 + nll=-1*norm(expected,sigma).logpdf(obs.decomp).sum() + return(nll) + + +guess_hump=np.array([1,100,500,50]) +fit_null=minimize(nllike_null,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) +fit_linear=minimize(nllike_linear,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) +fit_hump=minimize(nllike_hump,guess_hump, method="Nelder-Mead", options={'disp':True},args=leaf) + +chi2_linear=2*(fit_null.fun-fit_linear.fun) +1-sp.stats.chi2.cdf(x=chi2_linear,df=1) + +chi2_hump=2*(fit_null.fun-fit_hump.fun) +1-sp.stats.chi2.cdf(x=chi2_hump,df=2) + +#based on the chi2 test, the hump-shaped model is the best fit for the leaf decomposition data.