From ad61f04f6306b3ac8864dfce1700e320b0a2196a Mon Sep 17 00:00:00 2001
From: Sage <davisage@gmail.com>
Date: Fri, 27 Oct 2017 14:34:57 -0400
Subject: [PATCH 1/5] initial

---
 Exe9.R | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 Exe9.R

diff --git a/Exe9.R b/Exe9.R
new file mode 100644
index 0000000..ada7362
--- /dev/null
+++ b/Exe9.R
@@ -0,0 +1,141 @@
+setwd("C:/Users/DAVIS/Desktop/shell-novice-data/exe9/Intro_Biocomp_ND_317_Tutorial9/")
+
+#Question1 V456D is sig
+Ponzr <- read.csv("ponzr1.csv", header=T)
+
+like1 <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0
+  
+  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+like2 <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0+B1*x
+  
+  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+
+Ponzr1 <- Ponzr[which(Ponzr$mutation=="WT"),]
+Ponzr1[,1] <- 0
+Ponzr2 <- Ponzr[which(Ponzr$mutation=="M124K"),]
+Ponzr2[,1] <- 1
+Ponzr3 <- Ponzr[which(Ponzr$mutation=="V456D"),]
+Ponzr3[,1] <- 1
+Ponzr4 <- Ponzr[which(Ponzr$mutation=="I213N"),]
+Ponzr4[,1] <- 1
+
+Ponzr1_2 <- rbind(Ponzr1,Ponzr2)
+Ponzr1_3 <- rbind(Ponzr1,Ponzr3)
+Ponzr1_4 <- rbind(Ponzr1,Ponzr4)
+
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr2$mutation, y=Ponzr2$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+
+
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr3$mutation, y=Ponzr3$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr4$mutation, y=Ponzr4$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+
+#Question2
+Mmarinum <- read.csv("MmarinumGrowth.csv", header=T)
+nllike <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0*(x/(B1+x))
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+Guess = c(1,1,1)
+fit=optim(Guess, nllike, x=Mmarinum$S, y=Mmarinum$u)
+print(cbind("umax is ", fit$par[1], " and Ks is ", fit$par[2]))
+
+#Question3 (not finished yet)
+
+leafDecomp <- read.csv("leafDecomp.csv", header = T)
+
+one_var <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+two_var <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0 + B1*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+three_var <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0 + B1*x + B2*x*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+Guess = c(1,1,1,1)
+one_var_result=optim(Guess, one_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
+two_var_result=optim(Guess, two_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
+three_var_result=optim(Guess, three_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
+
+one_var_result$value
+two_var_result$value
+three_var_result$value
+
+one_var_result$par
+two_var_result$par
+three_var_result$par
+
+D1_2 = 2*(one_var_result$value-two_var_result$value)
+D1_3 = 2*(one_var_result$value-three_var_result$value)
+D2_3 = 2*(three_var_result$value-two_var_result$value)
+
+pchisq(D1_2, df=1, lower.tail=F)
+pchisq(D1_3, df=2, lower.tail=F)
+pchisq(D2_3, df=1, lower.tail=F)
+
+
+
+
+

From cead1cbda4db3c021555d7ee235236e02ae2648b Mon Sep 17 00:00:00 2001
From: Kate Vendrely <kvendrel@nd.edu>
Date: Fri, 27 Oct 2017 21:09:18 -0400
Subject: [PATCH 2/5] Still getting wrong number of parameters for number 3

---
 Tutorial9_1.R | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 Tutorial9_1.R

diff --git a/Tutorial9_1.R b/Tutorial9_1.R
new file mode 100644
index 0000000..284b44d
--- /dev/null
+++ b/Tutorial9_1.R
@@ -0,0 +1,179 @@
+########QUESTION 1#########
+
+#load ponzr csv data file
+ponzr1=read.csv("ponzr1.csv",header=TRUE)
+
+#make custom likelihood function that specifies model structure (parameters, observations)
+#-'unpack parameters'
+#-assign parameters to expected values (make a place to feed in variables)
+#-dnorm(x,mean,sd)
+nllike1=function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma=exp(p[3])
+  
+  expected=B0+B1*x
+  
+  nll=-sum(dnorm(x=y,mean=expected,sd=sigma,log=TRUE))
+  return(nll)
+}
+
+#make second likelihood model
+nllike2=function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0+B1*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#change all WT to 0 and each mutation to 1 and split them up
+Ponzr1 = ponzr1[which(ponzr1$mutation=="WT"),]
+Ponzr1[,1] = 0
+Ponzr2 = ponzr1[which(ponzr1$mutation=="M124K"),]
+Ponzr2[,1] = 1
+Ponzr3 = ponzr1[which(ponzr1$mutation=="V456D"),]
+Ponzr3[,1] = 1
+Ponzr4 = ponzr1[which(ponzr1$mutation=="I213N"),]
+Ponzr4[,1] = 1
+
+
+#optim function to look for maximum likelihood of our model
+#estimate parameters by minimizing the NLL
+#create a vector with initial guesses
+#minimize log likelihood
+
+#wt vs M124K
+initialguess=c(1,1,1)
+fit1=optim(par=initialguess,fn=nllike1,x=Ponzr1$mutation,y=Ponzr1$ponzr1Counts)
+fit2=optim(par=initialguess,fn=nllike2,x=Ponzr2$mutation,y=Ponzr2$ponzr1Counts)
+fit1$value
+fit2$value
+#run t-test
+D=2*(fit1$value-fit2$value)
+pchisq(q=D,df=1,lower.tail=FALSE)
+
+#wt vs. V456D
+initialguess = c(0,0,0)
+fit1=optim(initialguess, nllike1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(initialguess, nllike2, x=Ponzr3$mutation, y=Ponzr3$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(q=D, df=1, lower.tail=FALSE)
+
+#wt vs.I213N
+Guess = c(1,1,1)
+fit1=optim(Guess, nllike1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, nllike2, x=Ponzr4$mutation, y=Ponzr4$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+
+
+##Mutation M124K p-value=0.72, no effect of treatment
+##Mutation V456D p-value=0.0000056 effect of treatment
+##Mutation I213N p-value 0.88 no effect of treatment
+##Therefore, the V456D mutation significantly reduced the expression of ponzr1
+
+
+
+#### QUESTION 2 ################
+#load csv file for mmarinum
+Mmarinum = read.csv("MmarinumGrowth.csv", header=TRUE)
+
+#make custom likelihood function
+nllike1 <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0*(x/(B1+x))
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+#optim function to find the max growth rate and half-saturatation constant
+initialguess = c(1,1,1)
+fit=optim(initialguess, nllike1, x=Mmarinum$S, y=Mmarinum$u)
+print(cbind("umax:", fit$par[1], "Ks:", fit$par[2]))
+
+##max growth rate (umax)=1.46
+##half sat constant=42.6
+##sigma=0.04
+
+
+
+###QUESTION 3 ########
+#load data for decomposition of leaves
+leafs= read.csv("leafDecomp.csv", header = TRUE)
+
+#create custom functions for the three models:
+#constant fit model(null model)
+constant = function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0
+
+nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+return(nll)
+}
+
+#linear model
+linear = function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0 + B1*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#quadratic model
+quadratic = function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  B2=p[3]
+  sigma = exp(p[4])
+  expected = B0 + B1*x + B2*x*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#want to minimize parameters so we find max
+#likelihood to capture shape but with as few parameters
+#start with initial guess
+initialguess = c(1,1,1,1)
+
+#optimize each likelihood function with different number of parameters
+constantOpt=optim(initialguess, constant, x=leafs$Ms, y=leafs$decomp)
+linearOpt=optim(initialguess, linear, x=leafs$Ms, y=leafs$decomp)
+quadraticOpt=optim(initialguess, quadratic, x=leafs$Ms, y=leafs$decomp)
+
+print(constantOpt)
+print(linearOpt)
+print(quadraticOpt)
+
+#do t-tests on all combinations of min NLL for 3 models:
+D1_2 = 2*(constantOpt$value-linearOpt$value)
+D1_3 = 2*(constantOpt$value-quadraticOpt$value)
+D2_3 = 2*(quadraticOpt$value-linearOpt$value)
+#set df to 1 or 2 depending on difference in parameters between two models
+pchisq(D1_2, df=1, lower.tail=FALSE)
+pchisq(D1_3, df=2, lower.tail=FALSE)
+pchisq(D2_3, df=1, lower.tail=F)
+
+
+#p-values for likelihood ratio tests are all about 0
+#contant fit B0=589.7, sigma=164
+#linear fit B0=318, B1=6.3, sigma=54
+#quadratic fit B0=180, B1=15.7, B2=-0.11, signma=10.7
+
+##quadratic model is the best, linear model is second best, null model is the worst
\ No newline at end of file

From 9b1616b686c0d0d3047dce1bc19104985f9a0adc Mon Sep 17 00:00:00 2001
From: Kate Vendrely <kvendrel@nd.edu>
Date: Tue, 31 Oct 2017 17:03:43 -0400
Subject: [PATCH 3/5] Gives correct number of parameters

---
 Tutorial9_1.R | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/Tutorial9_1.R b/Tutorial9_1.R
index 284b44d..686f7d2 100644
--- a/Tutorial9_1.R
+++ b/Tutorial9_1.R
@@ -113,10 +113,8 @@ leafs= read.csv("leafDecomp.csv", header = TRUE)
 #create custom functions for the three models:
 #constant fit model(null model)
 constant = function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
+  B0=p[2]
+  sigma = exp(p[1])
   expected = B0
 
 nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
@@ -125,10 +123,10 @@ return(nll)
 
 #linear model
 linear = function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
+  B0=p[2]
+  B1=p[3]
+
+  sigma = exp(p[1])
   expected = B0 + B1*x
   
   nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
@@ -137,10 +135,10 @@ linear = function(p,x,y){
 
 #quadratic model
 quadratic = function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
+  B0=p[2]
+  B1=p[3]
+  B2=p[4]
+  sigma = exp(p[1])
   expected = B0 + B1*x + B2*x*x
   
   nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
@@ -150,17 +148,23 @@ quadratic = function(p,x,y){
 #want to minimize parameters so we find max
 #likelihood to capture shape but with as few parameters
 #start with initial guess
-initialguess = c(1,1,1,1)
+constantguess = c(1,1)
+linearguess = c(1,1,1)
+quadraticguess = c(1,1,1,1)
 
 #optimize each likelihood function with different number of parameters
-constantOpt=optim(initialguess, constant, x=leafs$Ms, y=leafs$decomp)
-linearOpt=optim(initialguess, linear, x=leafs$Ms, y=leafs$decomp)
-quadraticOpt=optim(initialguess, quadratic, x=leafs$Ms, y=leafs$decomp)
+constantOpt=optim(constantguess, constant, x=leafs$Ms, y=leafs$decomp)
+linearOpt=optim(linearguess, linear, x=leafs$Ms, y=leafs$decomp)
+quadraticOpt=optim(quadraticguess, quadratic, x=leafs$Ms, y=leafs$decomp)
 
 print(constantOpt)
 print(linearOpt)
 print(quadraticOpt)
 
+constantOpt$value
+linearOpt$value
+quadraticOpt$value
+
 #do t-tests on all combinations of min NLL for 3 models:
 D1_2 = 2*(constantOpt$value-linearOpt$value)
 D1_3 = 2*(constantOpt$value-quadraticOpt$value)

From 05d7792d3d3549cc666b3f94c052230f93f2d198 Mon Sep 17 00:00:00 2001
From: Kate Vendrely <kvendrel@nd.edu>
Date: Tue, 31 Oct 2017 17:25:35 -0400
Subject: [PATCH 4/5] update

---
 Exe9.R        | 332 +++++++++++++++++++++++++++++---------------------
 Tutorial9_1.R |  24 ++--
 2 files changed, 204 insertions(+), 152 deletions(-)

diff --git a/Exe9.R b/Exe9.R
index ada7362..ab5da03 100644
--- a/Exe9.R
+++ b/Exe9.R
@@ -1,141 +1,191 @@
-setwd("C:/Users/DAVIS/Desktop/shell-novice-data/exe9/Intro_Biocomp_ND_317_Tutorial9/")
-
-#Question1 V456D is sig
-Ponzr <- read.csv("ponzr1.csv", header=T)
-
-like1 <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  sigma = exp(p[3])
-  expected = B0
-  
-  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-
-like2 <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  sigma = exp(p[3])
-  expected = B0+B1*x
-  
-  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-
-
-Ponzr1 <- Ponzr[which(Ponzr$mutation=="WT"),]
-Ponzr1[,1] <- 0
-Ponzr2 <- Ponzr[which(Ponzr$mutation=="M124K"),]
-Ponzr2[,1] <- 1
-Ponzr3 <- Ponzr[which(Ponzr$mutation=="V456D"),]
-Ponzr3[,1] <- 1
-Ponzr4 <- Ponzr[which(Ponzr$mutation=="I213N"),]
-Ponzr4[,1] <- 1
-
-Ponzr1_2 <- rbind(Ponzr1,Ponzr2)
-Ponzr1_3 <- rbind(Ponzr1,Ponzr3)
-Ponzr1_4 <- rbind(Ponzr1,Ponzr4)
-
-Guess = c(1,1,1)
-fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
-fit2=optim(Guess, like2, x=Ponzr2$mutation, y=Ponzr2$ponzr1Counts)
-fit1$value
-fit2$value
-D = 2*(fit1$value-fit2$value)
-pchisq(D, df=1, lower.tail=F)
-
-
-Guess = c(1,1,1)
-fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
-fit2=optim(Guess, like2, x=Ponzr3$mutation, y=Ponzr3$ponzr1Counts)
-fit1$value
-fit2$value
-D = 2*(fit1$value-fit2$value)
-pchisq(D, df=1, lower.tail=F)
-
-Guess = c(1,1,1)
-fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
-fit2=optim(Guess, like2, x=Ponzr4$mutation, y=Ponzr4$ponzr1Counts)
-fit1$value
-fit2$value
-D = 2*(fit1$value-fit2$value)
-pchisq(D, df=1, lower.tail=F)
-
-#Question2
-Mmarinum <- read.csv("MmarinumGrowth.csv", header=T)
-nllike <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  sigma = exp(p[3])
-  expected = B0*(x/(B1+x))
-  
-  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-Guess = c(1,1,1)
-fit=optim(Guess, nllike, x=Mmarinum$S, y=Mmarinum$u)
-print(cbind("umax is ", fit$par[1], " and Ks is ", fit$par[2]))
-
-#Question3 (not finished yet)
-
-leafDecomp <- read.csv("leafDecomp.csv", header = T)
-
-one_var <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
-  expected = B0
-  
-  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-
-two_var <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
-  expected = B0 + B1*x
-  
-  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-
-three_var <- function(p,x,y){
-  B0=p[1]
-  B1=p[2]
-  B2=p[3]
-  sigma = exp(p[4])
-  expected = B0 + B1*x + B2*x*x
-  
-  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
-  return(nll)
-}
-
-Guess = c(1,1,1,1)
-one_var_result=optim(Guess, one_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
-two_var_result=optim(Guess, two_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
-three_var_result=optim(Guess, three_var, x=leafDecomp$Ms, y=leafDecomp$decomp)
-
-one_var_result$value
-two_var_result$value
-three_var_result$value
-
-one_var_result$par
-two_var_result$par
-three_var_result$par
-
-D1_2 = 2*(one_var_result$value-two_var_result$value)
-D1_3 = 2*(one_var_result$value-three_var_result$value)
-D2_3 = 2*(three_var_result$value-two_var_result$value)
-
-pchisq(D1_2, df=1, lower.tail=F)
-pchisq(D1_3, df=2, lower.tail=F)
-pchisq(D2_3, df=1, lower.tail=F)
-
-
-
-
-
+setwd("C:/Users/DAVIS/Desktop/shell-novice-data/exe9/Intro_Biocomp_ND_317_Tutorial9/")
+
+##########QUESTION 1 ################
+#load ponzr csv data file
+Ponzr <- read.csv("ponzr1.csv", header=T)
+
+#make custom likelihood function that specifies model structure (parameters, observations)
+#-'unpack parameters'
+#-assign parameters to expected values (make a place to feed in variables)
+#-dnorm(x,mean,sd)
+like1 <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0
+  
+  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#make second likelihood model
+like2 <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0+B1*x
+  
+  nll = -sum(dnorm(y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#change all WT to 0 and each mutation to 1 and split them up
+Ponzr1 <- Ponzr[which(Ponzr$mutation=="WT"),]
+Ponzr1[,1] <- 0
+Ponzr2 <- Ponzr[which(Ponzr$mutation=="M124K"),]
+Ponzr2[,1] <- 1
+Ponzr3 <- Ponzr[which(Ponzr$mutation=="V456D"),]
+Ponzr3[,1] <- 1
+Ponzr4 <- Ponzr[which(Ponzr$mutation=="I213N"),]
+Ponzr4[,1] <- 1
+
+Ponzr1_2 <- rbind(Ponzr1,Ponzr2)
+Ponzr1_3 <- rbind(Ponzr1,Ponzr3)
+Ponzr1_4 <- rbind(Ponzr1,Ponzr4)
+
+
+#optim function to look for maximum likelihood of our model
+#estimate parameters by minimizing the NLL
+#create a vector with initial guesses
+#minimize log likelihood
+
+#wt vs M124K
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr2$mutation, y=Ponzr2$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+##Mutation M124K p-value=0.72, no effect of treatment
+
+#wt vs. V456D
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr3$mutation, y=Ponzr3$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+##Mutation V456D p-value=0.0000056 effect of treatment
+
+#wt vs.I213N
+Guess = c(1,1,1)
+fit1=optim(Guess, like1, x=Ponzr1$mutation, y=Ponzr1$ponzr1Counts)
+fit2=optim(Guess, like2, x=Ponzr4$mutation, y=Ponzr4$ponzr1Counts)
+fit1$value
+fit2$value
+D = 2*(fit1$value-fit2$value)
+pchisq(D, df=1, lower.tail=F)
+##Mutation I213N p-value 0.88 no effect of treatment
+
+
+##Mutation M124K p-value=0.72, no effect of treatment
+##Mutation V456D p-value=0.0000056 effect of treatment
+##Mutation I213N p-value 0.88 no effect of treatment
+##Therefore, the V456D mutation significantly reduced the expression of ponzr1
+
+
+##########QUESTION 2 ################
+#load csv file for mmarinum
+Mmarinum <- read.csv("MmarinumGrowth.csv", header=T)
+
+#make custom likelihood function
+nllike <- function(p,x,y){
+  B0=p[1]
+  B1=p[2]
+  sigma = exp(p[3])
+  expected = B0*(x/(B1+x))
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#optim function to find the max growth rate and half-saturatation constant
+Guess = c(1,1,1)
+fit=optim(Guess, nllike, x=Mmarinum$S, y=Mmarinum$u)
+print(cbind("umax is ", fit$par[1], " and Ks is ", fit$par[2]))
+
+##max growth rate (umax)=1.46
+##half sat constant=42.6
+##sigma=0.04
+
+
+
+#############QUESTION 3 #############
+#load data for decomposition of leaves
+leafDecomp <- read.csv("leafDecomp.csv", header = T)
+
+#create custom functions for the three models:
+#constant fit model(null model)
+constant <- function(p,x,y){
+  sigma = exp(p[1])
+  B0=p[2]
+  
+  expected = B0
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#linear model
+linear <- function(p,x,y){
+  sigma = exp(p[1])
+  B0=p[2]
+  B1=p[3]
+  
+  expected = B0 + B1*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#quadratic model
+quadratic <- function(p,x,y){
+  sigma = exp(p[1])
+  B0=p[2]
+  B1=p[3]
+  B2=p[4]
+  
+  expected = B0 + B1*x + B2*x*x
+  
+  nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
+  return(nll)
+}
+
+#want to minimize parameters so we find max
+#likelihood to capture shape but with as few parameters
+#start with initial guess which gives the correct number of parameters
+constantGuess = c(1,1)
+linearGuess = c(1,1,1)
+quadraticGuess = c(1,1,1,1)
+
+#optimize each likelihood function with different number of parameters
+constantResult=optim(constantGuess, constant, x=leafDecomp$Ms, y=leafDecomp$decomp)
+linearResult=optim(linearGuess, linear, x=leafDecomp$Ms, y=leafDecomp$decomp)
+quadraticResult=optim(quadraticGuess, quadratic, x=leafDecomp$Ms, y=leafDecomp$decomp)
+
+constantResult$value
+linearResult$value
+quadraticResult$value
+
+constantResult$par
+linearResult$par
+quadraticResult$par
+
+#do t-tests on all combinations of min NLL for 3 models:
+D1_2 = 2*(constantResult$value - linearResult$value)
+D1_3 = 2*(constantResult$value - quadraticResult$value)
+D2_3 = 2*(quadraticResult$value - linearResult$value)
+
+#set df to 1 or 2 depending on difference in parameters between two models
+pchisq(D1_2, df=1, lower.tail=F)
+pchisq(D1_3, df=2, lower.tail=F)
+pchisq(D2_3, df=1, lower.tail=F)
+
+#p-values for likelihood ratio tests are all about 0
+#contant fit B0=589.7, sigma=164
+#linear fit B0=318, B1=6.3, sigma=54
+#quadratic fit B0=180, B1=15.7, B2=-0.11, signma=10.7
+
+##quadratic model is the best, linear model is second best, null model is the worst
\ No newline at end of file
diff --git a/Tutorial9_1.R b/Tutorial9_1.R
index 686f7d2..7aeed9b 100644
--- a/Tutorial9_1.R
+++ b/Tutorial9_1.R
@@ -113,9 +113,10 @@ leafs= read.csv("leafDecomp.csv", header = TRUE)
 #create custom functions for the three models:
 #constant fit model(null model)
 constant = function(p,x,y){
-  B0=p[2]
   sigma = exp(p[1])
-  expected = B0
+  a=p[2]
+  
+  expected = a
 
 nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
 return(nll)
@@ -123,11 +124,11 @@ return(nll)
 
 #linear model
 linear = function(p,x,y){
-  B0=p[2]
-  B1=p[3]
-
-  sigma = exp(p[1])
-  expected = B0 + B1*x
+  sigma = exp(p[1]) 
+  a=p[2]
+  b=p[3]
+ 
+  expected = a + b*x
   
   nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
   return(nll)
@@ -135,11 +136,12 @@ linear = function(p,x,y){
 
 #quadratic model
 quadratic = function(p,x,y){
-  B0=p[2]
-  B1=p[3]
-  B2=p[4]
   sigma = exp(p[1])
-  expected = B0 + B1*x + B2*x*x
+  a=p[2]
+  b=p[3]
+  c=p[4]
+  
+  expected = a + b*x + c*x*x
   
   nll = -sum(dnorm(x=y, mean=expected, sd=sigma, log=TRUE))
   return(nll)

From e0fd48ff2808733ebd899618281f2fbafc456457 Mon Sep 17 00:00:00 2001
From: Kate Vendrely <kvendrel@nd.edu>
Date: Wed, 1 Nov 2017 18:44:12 -0400
Subject: [PATCH 5/5] final

---
 Exe9.R | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/Exe9.R b/Exe9.R
index ab5da03..82037b2 100644
--- a/Exe9.R
+++ b/Exe9.R
@@ -155,10 +155,15 @@ quadratic <- function(p,x,y){
 
 #want to minimize parameters so we find max
 #likelihood to capture shape but with as few parameters
-#start with initial guess which gives the correct number of parameters
-constantGuess = c(1,1)
-linearGuess = c(1,1,1)
-quadraticGuess = c(1,1,1,1)
+#estimate parameters for each of the three likelihood functions
+#use mean of all decomposition values to set initial parameter guess for the constant model
+mean(leafDecomp$decomp)
+constantGuess = c(1,500)
+#use slope and intercept of data to inform initial guess for linear model
+plot(leafDecomp)
+linearGuess = c(1,200,10)
+#difficult to have initial guess, try B0=200, B1=10, B2=-0.2, sigma=1
+quadraticGuess = c(1,200,10,-0.2)
 
 #optimize each likelihood function with different number of parameters
 constantResult=optim(constantGuess, constant, x=leafDecomp$Ms, y=leafDecomp$decomp)
@@ -176,7 +181,7 @@ quadraticResult$par
 #do t-tests on all combinations of min NLL for 3 models:
 D1_2 = 2*(constantResult$value - linearResult$value)
 D1_3 = 2*(constantResult$value - quadraticResult$value)
-D2_3 = 2*(quadraticResult$value - linearResult$value)
+D2_3 = 2*(linearResult$value - quadraticResult$value)
 
 #set df to 1 or 2 depending on difference in parameters between two models
 pchisq(D1_2, df=1, lower.tail=F)
@@ -184,8 +189,7 @@ pchisq(D1_3, df=2, lower.tail=F)
 pchisq(D2_3, df=1, lower.tail=F)
 
 #p-values for likelihood ratio tests are all about 0
-#contant fit B0=589.7, sigma=164
+#constant fit B0=589.7, sigma=164
 #linear fit B0=318, B1=6.3, sigma=54
-#quadratic fit B0=180, B1=15.7, B2=-0.11, signma=10.7
-
-##quadratic model is the best, linear model is second best, null model is the worst
\ No newline at end of file
+#quadratic fit B0=180, B1=15.7, B2=-0.11, sigma=10.7
+##therefore, quadratic model is the best since it has the smallest sigma, linear model is second best, null model is the worst
\ No newline at end of file