From 512d765d1596105b326f9d0008355772a59503ff Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Thu, 6 Nov 2025 15:43:32 +0800 Subject: [PATCH 01/14] full GBM supported --- R/relaxed.tree.R | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100755 R/relaxed.tree.R diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R new file mode 100755 index 0000000..8974a10 --- /dev/null +++ b/R/relaxed.tree.R @@ -0,0 +1,178 @@ +#' Simulate branch lengths on a phylogeny under a relaxed clock +#' +#' @param tree an object of class phylo representing a bifurcating phylogeny +#' @param model character, the relaxed clock model +#' @param r numeric, the mean rate in substitutions per site +#' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks +#' +#' @details The \code{tree} is assummed to be a timetree. Thus, if all the tip +#' species are extant, then \code{tree} must be ultrametric. If \code{tree} is +#' not ultrametric then it is assummed you have extinct tips. The \code{tree} +#' must be rooted. +#' +#' The options for \code{model} are "clk", "iln", "gbm_RY07", and "gbm0", for +#' the strict clock, the independent log-normal rates, and the +#' geometric-Brownian motion rates, respectively. +#' +#' If \code{model == "clk"} the branch lengths of \code{tree} are multiplied +#' by \code{r}. For the other models, \eqn{n} rates (one for each branch in +#' the \code{s} species phylogeny) are sampled from the appropriate +#' distribution. The branch lengths in \code{tree} are then multiplied by the +#' corresponding rates. +#' +#' Model "gbm_RY07" is the the one implemented by Rannala and Yang (2007) with +#' the mean of the rate stabilized across the branches of the tree, i.e., this +#' version of the GBM process is a martingale. Mean stabilization is achieved +#' by adding a drift factor to the Brownian process on the log-rate, and thus +#' the process on the log-rate is no longer a martingale. An alternative +#' version "gbm0" is provided in which the drift factor is removed. In this +#' version the process on the log-rate is now a martingale while the process +#' on the rate is not. The drift factor causes collapsation of the rates to +#' zero as time goes to infinity. +#' +#' @references +#' Drummond et al. (2006) \emph{Relaxed phylogenetics and dating with +#' confidence.} PLoS Biology, 4(5): e88. +#' +#' Panchaksaram et al. (2024) \emph{Bayesian selection of relaxed-clock +#' models: Distinguishing between independent and autocorrelated rates.} +#' Systematic Biology, 74: 323--334. +#' +#' Rannala and Yang (2007) \emph{Inferring speciation times under an +#' episodic molecular clock.} Systematic Biology, 56: 453--466. +#' +#' @return An object of class phylo with branch lengths in substitutions per +#' site. +#' +#' @examples +#' require(ape) +#' par(mfrow=c(2,3)) +#' +#' data(pri10s) +#' # Simulate using autocorrelated log-normal rates on a primate phylogeny: +#' tt <- relaxed.tree(pri10s, model="gbm_RY07", r=.04e-2, s2=.26e-2) +#' +#' # The relaxed tree (branch lengths are in substitutions per site): +#' plot(tt, main="Relaxed primate tree (subs per site)") +#' +#' # The ultrametric timetree of Primates used in the simulation: +#' plot(pri10s, main="Primates timetree (Ma)") +#' axisPhylo() # Time unit in 1 Ma. +#' +#' # Plot the branch lengths for both trees against each other: +#' plot(pri10s$edge.length, tt$edge.length, +#' xlab="Branch lengths (Million years)", ylab="Branch lengths (subs per site)") +#' abline(0, .04e-2) # the slope is the substitution rate, r +#' +#' data(flu289s) +#' # Simulate using independent log-normal rates on an influenza H1N1 phylogeny: +#' tt2 <- relaxed.tree(flu289s, model="iln", r=.15e-2, s2=.45) +#' +#' # The relaxed tree of influenza: +#' plot(tt2, show.tip.label=FALSE, main="Relaxed influenza tree (subs per site)") +#' +#' # The timetree of Influenza (not ultrametric): +#' plot(flu289s, show.tip.label=FALSE, main="Influenza H1N1 timetree (y)") +#' axisPhylo(root.time=1907.35, backward=FALSE) # Time unit in 1 y. +#' +#' # Plot the branch lengths: +#' plot(flu289s$edge.length, tt2$edge.length, +#' xlab="Branch lengths (years)", ylab="Branch lengths (subs per site)") +#' abline(0, .15e-2) # the slope is the substitution rate, r +#' +#' @author Mario dos Reis +#' +#' @export +# TODO: add indpendent gamma rates model. +relaxed.tree <- function(tree, model, r, s2, drift) { + tt <- tree + nb <- length(tt$edge.length) + model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full")) + + if (!ape::is.rooted(tt)) { + stop("tree must be rooted") + } + if (r < 0) { + stop ("r must be positive") + } + + if (model == "clk") { + tt$edge.length <- tt$edge.length * r + } + # r0 = exp(mu + s2/2) -> mu = log(r0) - s2/2 + else if (model == "iln") { + rv <- rlnorm(nb, meanlog=log(r) - s2/2, sdlog=sqrt(s2)) + tt$edge.length <- tt$edge.length * rv + } + else if (model == "gbm_RY07") { + rv <- .sim.gbmRY07(tree, r, s2, drift=0) + tt$edge.length <- tt$edge.length * rv + } + else if (model == "gbm0") { + rv <- .sim.gbmRY07(tree, r, s2, drift=0.5*s2) + tt$edge.length <- tt$edge.length * rv + } + else if (model == 'gbm_full') { + rv <- .sim.gbmRY07(tree, r, s2, drift=drift) + tt$edge.length <- tt$edge.length * rv + } + return (tt) +} + +# Simulate using the GBM rates of Yang and Rannala (2007, Syst. Biol.) +# Guillaume fixed the function to allow simulation on multi-furcating trees. +# Sishuo extended the model to a full GBM with a drift coefficient, representing +# the non-stochastic change in the process. In gbm_full, RY07 and gbm0 are special +# cases with the drift equal to 0 and 0.5*s2, respectively. +.sim.gbmRY07 <- function(tree, r, s2, log=FALSE, drift=0) { + + nb <- length(tree$edge.length) + nt <- length(tree$tip.label) + tree$edge.length <- tree$edge.length / 2 + rv <- numeric(nb) + + for (node in (nt+1):(nb+1)) { + dad <- which(tree$edge[,2] == node) + if (length(dad) == 0) { # I'm the root! + ta <- 0 # ancestral time + ya <- log(r) # root rate + } + else { + ta <- tree$edge.length[dad] + ya <- rv[dad] + } + + desc <- which(tree$edge[,1] == node) + desc.t <- tree$edge.length[desc] + + # drift == 0 is the YR07 model with stabilised mean (i.e., the gbm + # process is forced to be a martingale) + # "r" is added to make it accommodate gbm_full + # specifically, r = 0, 0.5*s2, user_specified for RY07, gbm0, gbm_full + mu <- ya + (drift-s2/2) * (ta + desc.t) + + Sig <- matrix(ta * s2, length(desc), length(desc)) + diag(Sig) <- (ta + desc.t) * s2 + + rv[desc] <- MASS::mvrnorm(1, mu, Sig) + #print(c(node, exp(c(ya, rr)))) + } + if (log) { + return (rv) + } else { + return (exp(rv)) + } +} + +#' Calculate quantiles of GBM process +#' @export +# TODO: write documentation +gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { + pps <- qnorm(p, mean=log(ra) - t*s2/2, sd=sqrt(s2 * t)) + if (log) { + return (pps) + } + else { + return (exp(pps)) + } +} From 79f771e3d030bc179190bcd5960a35b82183ea56 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Thu, 6 Nov 2025 18:35:02 +0800 Subject: [PATCH 02/14] ou --- R/relaxed.tree.R | 61 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 8974a10..8480f55 100755 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -87,7 +87,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { tt <- tree nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full")) + model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "ou")) if (!ape::is.rooted(tt)) { stop("tree must be rooted") @@ -164,6 +164,65 @@ relaxed.tree <- function(tree, model, r, s2, drift) { } } + +.sim.ou <- function(tree, r, theta, r_opt, s2, log = FALSE) { + # Simulate rate variation under an OU process along a phylogeny + # tree : phylo object (ape) + # r : root rate + # theta : strength of mean reversion + # r_opt : exponential of the long-term mean (optimal_rate) + # s2 : diffusion variance parameter + # log : return on log scale if TRUE + + nb <- length(tree$edge.length) + nt <- length(tree$tip.label) + tree$edge.length <- tree$edge.length / 2 + rv <- numeric(nb) + + for (node in (nt + 1):(nb + 1)) { + dad <- which(tree$edge[, 2] == node) + + if (length(dad) == 0) { + ## root case + tA <- 0 + yA <- log(r) # rate at the root + } else { + tA <- tree$edge.length[dad] + yA <- rv[dad] + } + + desc <- which(tree$edge[, 1] == node) + desc.t <- tree$edge.length[desc] + n.desc <- length(desc) + + ## Expected means for descendants + # note that 'mu' is diff from .sim.gbmRY07() + mu <- log(r_opt) + means <- (yA - mu) * exp(-theta * (tA + desc.t)) + mu + + ## Cov matrix: Sigma + Sigma <- matrix(NA, n.desc, n.desc) + for (i in 1:n.desc) { + for (j in 1:n.desc) { + if (i == j) { + Sigma[i, i] <- (s2 / (2 * theta)) * (1 - exp(-2 * theta * (tA + desc.t[i]))) + } else { + # Cov btwn descendants + Sigma[i, j] <- (s2 / (2 * theta)) * + (exp(-theta * (desc.t[i] + desc.t[j])) * (1 - exp(-2 * theta * tA))) + } + } + } + + ## descendant trait values + rv[desc] <- MASS::mvrnorm(1, means, Sigma) + } + + if (log) return(rv) else return(exp(rv)) +} + + + #' Calculate quantiles of GBM process #' @export # TODO: write documentation From 5e39e464c623a6798dbe18d268f845e3258af1da Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Thu, 6 Nov 2025 18:37:55 +0800 Subject: [PATCH 03/14] ou --- R/relaxed.tree.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 8480f55..24b4e5a 100755 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -116,6 +116,11 @@ relaxed.tree <- function(tree, model, r, s2, drift) { rv <- .sim.gbmRY07(tree, r, s2, drift=drift) tt$edge.length <- tt$edge.length * rv } + else if (model == 'ou') { + #rv <- .sim.ou(tree, r, s2, drift=drift) + rv <- .sim.ou(tree, r, s2, r_opt, theta) + tt$edge.length <- tt$edge.length * rv + } return (tt) } @@ -165,7 +170,8 @@ relaxed.tree <- function(tree, model, r, s2, drift) { } -.sim.ou <- function(tree, r, theta, r_opt, s2, log = FALSE) { +#.sim.ou <- function(tree, r, theta, r_opt, s2, log=FALSE) { +.sim.ou <- function(tree, r, s2, r_opt, theta, log=FALSE) { # Simulate rate variation under an OU process along a phylogeny # tree : phylo object (ape) # r : root rate From 75ed76b4cb87cced47baa86971ac5373faee0375 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Thu, 6 Nov 2025 18:42:49 +0800 Subject: [PATCH 04/14] ou --- R/relaxed.tree.R | 216 +++++++++++++++++++++---------------------- R/simclock.R | 233 ----------------------------------------------- 2 files changed, 103 insertions(+), 346 deletions(-) mode change 100755 => 100644 R/relaxed.tree.R delete mode 100644 R/simclock.R diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R old mode 100755 new mode 100644 index 24b4e5a..50f8e5e --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -5,30 +5,19 @@ #' @param r numeric, the mean rate in substitutions per site #' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks #' -#' @details The \code{tree} is assummed to be a timetree. Thus, if all the tip +#' @details The \code{tree} is assummed to be a timetree. Thus, if all your tip #' species are extant, then \code{tree} must be ultrametric. If \code{tree} is #' not ultrametric then it is assummed you have extinct tips. The \code{tree} -#' must be rooted. -#' -#' The options for \code{model} are "clk", "iln", "gbm_RY07", and "gbm0", for -#' the strict clock, the independent log-normal rates, and the -#' geometric-Brownian motion rates, respectively. -#' -#' If \code{model == "clk"} the branch lengths of \code{tree} are multiplied -#' by \code{r}. For the other models, \eqn{n} rates (one for each branch in -#' the \code{s} species phylogeny) are sampled from the appropriate -#' distribution. The branch lengths in \code{tree} are then multiplied by the -#' corresponding rates. -#' -#' Model "gbm_RY07" is the the one implemented by Rannala and Yang (2007) with -#' the mean of the rate stabilized across the branches of the tree, i.e., this -#' version of the GBM process is a martingale. Mean stabilization is achieved -#' by adding a drift factor to the Brownian process on the log-rate, and thus -#' the process on the log-rate is no longer a martingale. An alternative -#' version "gbm0" is provided in which the drift factor is removed. In this -#' version the process on the log-rate is now a martingale while the process -#' on the rate is not. The drift factor causes collapsation of the rates to -#' zero as time goes to infinity. +#' must be rooted and strictly bifurcating. +#' +#' The options for \code{model} are "clk", "iln", and "gbm_RY07", for the +#' strict clock, the independent log-normal rates, and the geometric-Brownian +#' motion rates, respectively (see Rannala and Yang, 2007). If \code{model == +#' "clk"} the branch lengths of \code{tree} are multiplied by \code{r}. If +#' \code{model == "iln"} or \code{model == "gbm_RN07"}, \eqn{n = 2*s - 2} +#' rates (one for each branch in the \code{s} species phylogeny) are sampled +#' from the appropriate distribution. The branch lengths in \code{tree} are +#' then multiplied by the corresponding rates. #' #' @references #' Drummond et al. (2006) \emph{Relaxed phylogenetics and dating with @@ -36,10 +25,10 @@ #' #' Panchaksaram et al. (2024) \emph{Bayesian selection of relaxed-clock #' models: Distinguishing between independent and autocorrelated rates.} -#' Systematic Biology, 74: 323--334. +#' Systematic Biology. #' -#' Rannala and Yang (2007) \emph{Inferring speciation times under an -#' episodic molecular clock.} Systematic Biology, 56: 453--466. +#' Yang and Rannala (2007) \emph{Inferring speciation times under an +#' episodic molecular clock.} Systematic Biology, 56: 453-466. #' #' @return An object of class phylo with branch lengths in substitutions per #' site. @@ -50,7 +39,7 @@ #' #' data(pri10s) #' # Simulate using autocorrelated log-normal rates on a primate phylogeny: -#' tt <- relaxed.tree(pri10s, model="gbm_RY07", r=.04e-2, s2=.26e-2) +#' tt <- relaxed.tree(pri10s, model="gbm", r=.04e-2, s2=.26e-2) #' #' # The relaxed tree (branch lengths are in substitutions per site): #' plot(tt, main="Relaxed primate tree (subs per site)") @@ -84,10 +73,10 @@ #' #' @export # TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2, drift) { +relaxed.tree <- function(tree, model, r, s2) { tt <- tree nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "ou")) + model <- match.arg(model, c("clk", "iln", "gbm_RY07")) if (!ape::is.rooted(tt)) { stop("tree must be rooted") @@ -105,20 +94,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { tt$edge.length <- tt$edge.length * rv } else if (model == "gbm_RY07") { - rv <- .sim.gbmRY07(tree, r, s2, drift=0) - tt$edge.length <- tt$edge.length * rv - } - else if (model == "gbm0") { - rv <- .sim.gbmRY07(tree, r, s2, drift=0.5*s2) - tt$edge.length <- tt$edge.length * rv - } - else if (model == 'gbm_full') { - rv <- .sim.gbmRY07(tree, r, s2, drift=drift) - tt$edge.length <- tt$edge.length * rv - } - else if (model == 'ou') { - #rv <- .sim.ou(tree, r, s2, drift=drift) - rv <- .sim.ou(tree, r, s2, r_opt, theta) + rv <- .sim.gbmRY07(tree, r, s2) tt$edge.length <- tt$edge.length * rv } return (tt) @@ -126,10 +102,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { # Simulate using the GBM rates of Yang and Rannala (2007, Syst. Biol.) # Guillaume fixed the function to allow simulation on multi-furcating trees. -# Sishuo extended the model to a full GBM with a drift coefficient, representing -# the non-stochastic change in the process. In gbm_full, RY07 and gbm0 are special -# cases with the drift equal to 0 and 0.5*s2, respectively. -.sim.gbmRY07 <- function(tree, r, s2, log=FALSE, drift=0) { +.sim.gbmRY07 <- function(tree, r, s2, log=FALSE) { nb <- length(tree$edge.length) nt <- length(tree$tip.label) @@ -150,12 +123,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { desc <- which(tree$edge[,1] == node) desc.t <- tree$edge.length[desc] - # drift == 0 is the YR07 model with stabilised mean (i.e., the gbm - # process is forced to be a martingale) - # "r" is added to make it accommodate gbm_full - # specifically, r = 0, 0.5*s2, user_specified for RY07, gbm0, gbm_full - mu <- ya + (drift-s2/2) * (ta + desc.t) - + mu <- ya - (ta + desc.t) * s2/2 Sig <- matrix(ta * s2, length(desc), length(desc)) diag(Sig) <- (ta + desc.t) * s2 @@ -169,69 +137,8 @@ relaxed.tree <- function(tree, model, r, s2, drift) { } } - -#.sim.ou <- function(tree, r, theta, r_opt, s2, log=FALSE) { -.sim.ou <- function(tree, r, s2, r_opt, theta, log=FALSE) { - # Simulate rate variation under an OU process along a phylogeny - # tree : phylo object (ape) - # r : root rate - # theta : strength of mean reversion - # r_opt : exponential of the long-term mean (optimal_rate) - # s2 : diffusion variance parameter - # log : return on log scale if TRUE - - nb <- length(tree$edge.length) - nt <- length(tree$tip.label) - tree$edge.length <- tree$edge.length / 2 - rv <- numeric(nb) - - for (node in (nt + 1):(nb + 1)) { - dad <- which(tree$edge[, 2] == node) - - if (length(dad) == 0) { - ## root case - tA <- 0 - yA <- log(r) # rate at the root - } else { - tA <- tree$edge.length[dad] - yA <- rv[dad] - } - - desc <- which(tree$edge[, 1] == node) - desc.t <- tree$edge.length[desc] - n.desc <- length(desc) - - ## Expected means for descendants - # note that 'mu' is diff from .sim.gbmRY07() - mu <- log(r_opt) - means <- (yA - mu) * exp(-theta * (tA + desc.t)) + mu - - ## Cov matrix: Sigma - Sigma <- matrix(NA, n.desc, n.desc) - for (i in 1:n.desc) { - for (j in 1:n.desc) { - if (i == j) { - Sigma[i, i] <- (s2 / (2 * theta)) * (1 - exp(-2 * theta * (tA + desc.t[i]))) - } else { - # Cov btwn descendants - Sigma[i, j] <- (s2 / (2 * theta)) * - (exp(-theta * (desc.t[i] + desc.t[j])) * (1 - exp(-2 * theta * tA))) - } - } - } - - ## descendant trait values - rv[desc] <- MASS::mvrnorm(1, means, Sigma) - } - - if (log) return(rv) else return(exp(rv)) -} - - - #' Calculate quantiles of GBM process #' @export -# TODO: write documentation gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { pps <- qnorm(p, mean=log(ra) - t*s2/2, sd=sqrt(s2 * t)) if (log) { @@ -241,3 +148,86 @@ gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { return (exp(pps)) } } + +#' Simulate correlated branch lengths among loci on a phylogeny under a relaxed clock +#' +#' @param tree an object of class phylo representing a bifurcating phylogeny +#' @param model character, the relaxed clock model +#' @param r numeric, the mean rate in substitutions per site +#' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks +#' @param nloci numeric, the number of trees to simulate (one per locus) +#' @param corr, numeric, the correlation of log rates among loci +#' +#' @details A total of \code{nloci} trees are simulated, with the log-rates +#' for branches across trees having correlation \code{corr}. +#' +#' @return A list with two elements: A list of length \code{nloci} of trees of +#' class phylo with branch lengths in substitutions per site, and a matrix of +#' branch rates for each locus. +#' +#' @seealso \link{relaxed.tree} to simulate a single tree. +#' +#' @examples +#' require(ape) +#' par(mfrow=c(2,3)) +#' +#' data(pri10s) +#' # ILN model: +#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, +#' # with no correlation among three loci: +#' iln0 <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0) +#' lapply(iln0$trees, plot) +#' # Repeat with strong correlation among loci: +#' ilnc <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0.9) +#' lapply(ilnc$trees, plot) +#' +#' # GBM model: +#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, +#' # with no correlation among three loci: +#' gbm0 <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0) +#' lapply(gbm0$trees, plot) +#' # Repeat with strong correlation among loci: +#' gbmc <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0.9) +#' lapply(gbmc$trees, plot) +#' +#' +#' +#' @author Mario dos Reis +#' +#' @export +correlated.trees <- function(tree, model, r, s2, nloci, corr) { + tt <- tree + nb <- length(tt$edge.length) + model <- match.arg(model, c("iln", "gbm_RY07")) + + # construct among loci covariance matrix (p x p): + R <- matrix(corr, ncol=nloci, nrow=nloci) * s2 + diag(R) <- s2 + + # use Eigen decomposition to simulate correlations + eS <- eigen(R, symmetric = TRUE) + ev <- eS$values + + # n x p; n: nb; p: nloci + lrvm <- matrix(nrow=nb, ncol=nloci) + + for (i in 1:nloci) { + if (model == "iln") { + lrvm[,i] <- rnorm(nb, 0, 1) + } else if (model == "gbm_RY07") { + lrvm[,i] <- .sim.gmbRY07(tree, 1, 1, log=TRUE) + # NOTE: gmbRY07 needs more testing! + } + } + + # p x p %*% p x n = p x n + rvm <- exp( eS$vectors %*% diag(sqrt(pmax(ev, 0)), nloci) %*% t(lrvm) + log(r) - s2/2 ) + + tls <- list() + for (i in 1:nloci) { + tls[[i]] <- tt + tls[[i]]$edge.length <- tt$edge.length * rvm[i,] + } + + return(list(trees=tls, rates=rvm)) +} diff --git a/R/simclock.R b/R/simclock.R deleted file mode 100644 index 50f8e5e..0000000 --- a/R/simclock.R +++ /dev/null @@ -1,233 +0,0 @@ -#' Simulate branch lengths on a phylogeny under a relaxed clock -#' -#' @param tree an object of class phylo representing a bifurcating phylogeny -#' @param model character, the relaxed clock model -#' @param r numeric, the mean rate in substitutions per site -#' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks -#' -#' @details The \code{tree} is assummed to be a timetree. Thus, if all your tip -#' species are extant, then \code{tree} must be ultrametric. If \code{tree} is -#' not ultrametric then it is assummed you have extinct tips. The \code{tree} -#' must be rooted and strictly bifurcating. -#' -#' The options for \code{model} are "clk", "iln", and "gbm_RY07", for the -#' strict clock, the independent log-normal rates, and the geometric-Brownian -#' motion rates, respectively (see Rannala and Yang, 2007). If \code{model == -#' "clk"} the branch lengths of \code{tree} are multiplied by \code{r}. If -#' \code{model == "iln"} or \code{model == "gbm_RN07"}, \eqn{n = 2*s - 2} -#' rates (one for each branch in the \code{s} species phylogeny) are sampled -#' from the appropriate distribution. The branch lengths in \code{tree} are -#' then multiplied by the corresponding rates. -#' -#' @references -#' Drummond et al. (2006) \emph{Relaxed phylogenetics and dating with -#' confidence.} PLoS Biology, 4(5): e88. -#' -#' Panchaksaram et al. (2024) \emph{Bayesian selection of relaxed-clock -#' models: Distinguishing between independent and autocorrelated rates.} -#' Systematic Biology. -#' -#' Yang and Rannala (2007) \emph{Inferring speciation times under an -#' episodic molecular clock.} Systematic Biology, 56: 453-466. -#' -#' @return An object of class phylo with branch lengths in substitutions per -#' site. -#' -#' @examples -#' require(ape) -#' par(mfrow=c(2,3)) -#' -#' data(pri10s) -#' # Simulate using autocorrelated log-normal rates on a primate phylogeny: -#' tt <- relaxed.tree(pri10s, model="gbm", r=.04e-2, s2=.26e-2) -#' -#' # The relaxed tree (branch lengths are in substitutions per site): -#' plot(tt, main="Relaxed primate tree (subs per site)") -#' -#' # The ultrametric timetree of Primates used in the simulation: -#' plot(pri10s, main="Primates timetree (Ma)") -#' axisPhylo() # Time unit in 1 Ma. -#' -#' # Plot the branch lengths for both trees against each other: -#' plot(pri10s$edge.length, tt$edge.length, -#' xlab="Branch lengths (Million years)", ylab="Branch lengths (subs per site)") -#' abline(0, .04e-2) # the slope is the substitution rate, r -#' -#' data(flu289s) -#' # Simulate using independent log-normal rates on an influenza H1N1 phylogeny: -#' tt2 <- relaxed.tree(flu289s, model="iln", r=.15e-2, s2=.45) -#' -#' # The relaxed tree of influenza: -#' plot(tt2, show.tip.label=FALSE, main="Relaxed influenza tree (subs per site)") -#' -#' # The timetree of Influenza (not ultrametric): -#' plot(flu289s, show.tip.label=FALSE, main="Influenza H1N1 timetree (y)") -#' axisPhylo(root.time=1907.35, backward=FALSE) # Time unit in 1 y. -#' -#' # Plot the branch lengths: -#' plot(flu289s$edge.length, tt2$edge.length, -#' xlab="Branch lengths (years)", ylab="Branch lengths (subs per site)") -#' abline(0, .15e-2) # the slope is the substitution rate, r -#' -#' @author Mario dos Reis -#' -#' @export -# TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2) { - tt <- tree - nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07")) - - if (!ape::is.rooted(tt)) { - stop("tree must be rooted") - } - if (r < 0) { - stop ("r must be positive") - } - - if (model == "clk") { - tt$edge.length <- tt$edge.length * r - } - # r0 = exp(mu + s2/2) -> mu = log(r0) - s2/2 - else if (model == "iln") { - rv <- rlnorm(nb, meanlog=log(r) - s2/2, sdlog=sqrt(s2)) - tt$edge.length <- tt$edge.length * rv - } - else if (model == "gbm_RY07") { - rv <- .sim.gbmRY07(tree, r, s2) - tt$edge.length <- tt$edge.length * rv - } - return (tt) -} - -# Simulate using the GBM rates of Yang and Rannala (2007, Syst. Biol.) -# Guillaume fixed the function to allow simulation on multi-furcating trees. -.sim.gbmRY07 <- function(tree, r, s2, log=FALSE) { - - nb <- length(tree$edge.length) - nt <- length(tree$tip.label) - tree$edge.length <- tree$edge.length / 2 - rv <- numeric(nb) - - for (node in (nt+1):(nb+1)) { - dad <- which(tree$edge[,2] == node) - if (length(dad) == 0) { # I'm the root! - ta <- 0 # ancestral time - ya <- log(r) # root rate - } - else { - ta <- tree$edge.length[dad] - ya <- rv[dad] - } - - desc <- which(tree$edge[,1] == node) - desc.t <- tree$edge.length[desc] - - mu <- ya - (ta + desc.t) * s2/2 - Sig <- matrix(ta * s2, length(desc), length(desc)) - diag(Sig) <- (ta + desc.t) * s2 - - rv[desc] <- MASS::mvrnorm(1, mu, Sig) - #print(c(node, exp(c(ya, rr)))) - } - if (log) { - return (rv) - } else { - return (exp(rv)) - } -} - -#' Calculate quantiles of GBM process -#' @export -gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { - pps <- qnorm(p, mean=log(ra) - t*s2/2, sd=sqrt(s2 * t)) - if (log) { - return (pps) - } - else { - return (exp(pps)) - } -} - -#' Simulate correlated branch lengths among loci on a phylogeny under a relaxed clock -#' -#' @param tree an object of class phylo representing a bifurcating phylogeny -#' @param model character, the relaxed clock model -#' @param r numeric, the mean rate in substitutions per site -#' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks -#' @param nloci numeric, the number of trees to simulate (one per locus) -#' @param corr, numeric, the correlation of log rates among loci -#' -#' @details A total of \code{nloci} trees are simulated, with the log-rates -#' for branches across trees having correlation \code{corr}. -#' -#' @return A list with two elements: A list of length \code{nloci} of trees of -#' class phylo with branch lengths in substitutions per site, and a matrix of -#' branch rates for each locus. -#' -#' @seealso \link{relaxed.tree} to simulate a single tree. -#' -#' @examples -#' require(ape) -#' par(mfrow=c(2,3)) -#' -#' data(pri10s) -#' # ILN model: -#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, -#' # with no correlation among three loci: -#' iln0 <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0) -#' lapply(iln0$trees, plot) -#' # Repeat with strong correlation among loci: -#' ilnc <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0.9) -#' lapply(ilnc$trees, plot) -#' -#' # GBM model: -#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, -#' # with no correlation among three loci: -#' gbm0 <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0) -#' lapply(gbm0$trees, plot) -#' # Repeat with strong correlation among loci: -#' gbmc <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0.9) -#' lapply(gbmc$trees, plot) -#' -#' -#' -#' @author Mario dos Reis -#' -#' @export -correlated.trees <- function(tree, model, r, s2, nloci, corr) { - tt <- tree - nb <- length(tt$edge.length) - model <- match.arg(model, c("iln", "gbm_RY07")) - - # construct among loci covariance matrix (p x p): - R <- matrix(corr, ncol=nloci, nrow=nloci) * s2 - diag(R) <- s2 - - # use Eigen decomposition to simulate correlations - eS <- eigen(R, symmetric = TRUE) - ev <- eS$values - - # n x p; n: nb; p: nloci - lrvm <- matrix(nrow=nb, ncol=nloci) - - for (i in 1:nloci) { - if (model == "iln") { - lrvm[,i] <- rnorm(nb, 0, 1) - } else if (model == "gbm_RY07") { - lrvm[,i] <- .sim.gmbRY07(tree, 1, 1, log=TRUE) - # NOTE: gmbRY07 needs more testing! - } - } - - # p x p %*% p x n = p x n - rvm <- exp( eS$vectors %*% diag(sqrt(pmax(ev, 0)), nloci) %*% t(lrvm) + log(r) - s2/2 ) - - tls <- list() - for (i in 1:nloci) { - tls[[i]] <- tt - tls[[i]]$edge.length <- tt$edge.length * rvm[i,] - } - - return(list(trees=tls, rates=rvm)) -} From 9eff8c072201925ed96c98faf4f9cf8a6e59890f Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Sun, 9 Nov 2025 17:13:41 +0800 Subject: [PATCH 05/14] ou --- R/relaxed.tree.R | 151 +++++++++++++++-------------------------------- 1 file changed, 48 insertions(+), 103 deletions(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 50f8e5e..8974a10 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -5,19 +5,30 @@ #' @param r numeric, the mean rate in substitutions per site #' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks #' -#' @details The \code{tree} is assummed to be a timetree. Thus, if all your tip +#' @details The \code{tree} is assummed to be a timetree. Thus, if all the tip #' species are extant, then \code{tree} must be ultrametric. If \code{tree} is #' not ultrametric then it is assummed you have extinct tips. The \code{tree} -#' must be rooted and strictly bifurcating. -#' -#' The options for \code{model} are "clk", "iln", and "gbm_RY07", for the -#' strict clock, the independent log-normal rates, and the geometric-Brownian -#' motion rates, respectively (see Rannala and Yang, 2007). If \code{model == -#' "clk"} the branch lengths of \code{tree} are multiplied by \code{r}. If -#' \code{model == "iln"} or \code{model == "gbm_RN07"}, \eqn{n = 2*s - 2} -#' rates (one for each branch in the \code{s} species phylogeny) are sampled -#' from the appropriate distribution. The branch lengths in \code{tree} are -#' then multiplied by the corresponding rates. +#' must be rooted. +#' +#' The options for \code{model} are "clk", "iln", "gbm_RY07", and "gbm0", for +#' the strict clock, the independent log-normal rates, and the +#' geometric-Brownian motion rates, respectively. +#' +#' If \code{model == "clk"} the branch lengths of \code{tree} are multiplied +#' by \code{r}. For the other models, \eqn{n} rates (one for each branch in +#' the \code{s} species phylogeny) are sampled from the appropriate +#' distribution. The branch lengths in \code{tree} are then multiplied by the +#' corresponding rates. +#' +#' Model "gbm_RY07" is the the one implemented by Rannala and Yang (2007) with +#' the mean of the rate stabilized across the branches of the tree, i.e., this +#' version of the GBM process is a martingale. Mean stabilization is achieved +#' by adding a drift factor to the Brownian process on the log-rate, and thus +#' the process on the log-rate is no longer a martingale. An alternative +#' version "gbm0" is provided in which the drift factor is removed. In this +#' version the process on the log-rate is now a martingale while the process +#' on the rate is not. The drift factor causes collapsation of the rates to +#' zero as time goes to infinity. #' #' @references #' Drummond et al. (2006) \emph{Relaxed phylogenetics and dating with @@ -25,10 +36,10 @@ #' #' Panchaksaram et al. (2024) \emph{Bayesian selection of relaxed-clock #' models: Distinguishing between independent and autocorrelated rates.} -#' Systematic Biology. +#' Systematic Biology, 74: 323--334. #' -#' Yang and Rannala (2007) \emph{Inferring speciation times under an -#' episodic molecular clock.} Systematic Biology, 56: 453-466. +#' Rannala and Yang (2007) \emph{Inferring speciation times under an +#' episodic molecular clock.} Systematic Biology, 56: 453--466. #' #' @return An object of class phylo with branch lengths in substitutions per #' site. @@ -39,7 +50,7 @@ #' #' data(pri10s) #' # Simulate using autocorrelated log-normal rates on a primate phylogeny: -#' tt <- relaxed.tree(pri10s, model="gbm", r=.04e-2, s2=.26e-2) +#' tt <- relaxed.tree(pri10s, model="gbm_RY07", r=.04e-2, s2=.26e-2) #' #' # The relaxed tree (branch lengths are in substitutions per site): #' plot(tt, main="Relaxed primate tree (subs per site)") @@ -73,10 +84,10 @@ #' #' @export # TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2) { +relaxed.tree <- function(tree, model, r, s2, drift) { tt <- tree nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07")) + model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full")) if (!ape::is.rooted(tt)) { stop("tree must be rooted") @@ -94,7 +105,15 @@ relaxed.tree <- function(tree, model, r, s2) { tt$edge.length <- tt$edge.length * rv } else if (model == "gbm_RY07") { - rv <- .sim.gbmRY07(tree, r, s2) + rv <- .sim.gbmRY07(tree, r, s2, drift=0) + tt$edge.length <- tt$edge.length * rv + } + else if (model == "gbm0") { + rv <- .sim.gbmRY07(tree, r, s2, drift=0.5*s2) + tt$edge.length <- tt$edge.length * rv + } + else if (model == 'gbm_full') { + rv <- .sim.gbmRY07(tree, r, s2, drift=drift) tt$edge.length <- tt$edge.length * rv } return (tt) @@ -102,7 +121,10 @@ relaxed.tree <- function(tree, model, r, s2) { # Simulate using the GBM rates of Yang and Rannala (2007, Syst. Biol.) # Guillaume fixed the function to allow simulation on multi-furcating trees. -.sim.gbmRY07 <- function(tree, r, s2, log=FALSE) { +# Sishuo extended the model to a full GBM with a drift coefficient, representing +# the non-stochastic change in the process. In gbm_full, RY07 and gbm0 are special +# cases with the drift equal to 0 and 0.5*s2, respectively. +.sim.gbmRY07 <- function(tree, r, s2, log=FALSE, drift=0) { nb <- length(tree$edge.length) nt <- length(tree$tip.label) @@ -123,7 +145,12 @@ relaxed.tree <- function(tree, model, r, s2) { desc <- which(tree$edge[,1] == node) desc.t <- tree$edge.length[desc] - mu <- ya - (ta + desc.t) * s2/2 + # drift == 0 is the YR07 model with stabilised mean (i.e., the gbm + # process is forced to be a martingale) + # "r" is added to make it accommodate gbm_full + # specifically, r = 0, 0.5*s2, user_specified for RY07, gbm0, gbm_full + mu <- ya + (drift-s2/2) * (ta + desc.t) + Sig <- matrix(ta * s2, length(desc), length(desc)) diag(Sig) <- (ta + desc.t) * s2 @@ -139,6 +166,7 @@ relaxed.tree <- function(tree, model, r, s2) { #' Calculate quantiles of GBM process #' @export +# TODO: write documentation gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { pps <- qnorm(p, mean=log(ra) - t*s2/2, sd=sqrt(s2 * t)) if (log) { @@ -148,86 +176,3 @@ gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { return (exp(pps)) } } - -#' Simulate correlated branch lengths among loci on a phylogeny under a relaxed clock -#' -#' @param tree an object of class phylo representing a bifurcating phylogeny -#' @param model character, the relaxed clock model -#' @param r numeric, the mean rate in substitutions per site -#' @param s2 numeric, the rate "diffusion" parameter for the relaxed clocks -#' @param nloci numeric, the number of trees to simulate (one per locus) -#' @param corr, numeric, the correlation of log rates among loci -#' -#' @details A total of \code{nloci} trees are simulated, with the log-rates -#' for branches across trees having correlation \code{corr}. -#' -#' @return A list with two elements: A list of length \code{nloci} of trees of -#' class phylo with branch lengths in substitutions per site, and a matrix of -#' branch rates for each locus. -#' -#' @seealso \link{relaxed.tree} to simulate a single tree. -#' -#' @examples -#' require(ape) -#' par(mfrow=c(2,3)) -#' -#' data(pri10s) -#' # ILN model: -#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, -#' # with no correlation among three loci: -#' iln0 <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0) -#' lapply(iln0$trees, plot) -#' # Repeat with strong correlation among loci: -#' ilnc <- correlated.trees(pri10s, model="iln", r=.04e-2, s2=.1, 3, 0.9) -#' lapply(ilnc$trees, plot) -#' -#' # GBM model: -#' # Simulate using autocorrelated log-normal rates on a primate phylogeny, -#' # with no correlation among three loci: -#' gbm0 <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0) -#' lapply(gbm0$trees, plot) -#' # Repeat with strong correlation among loci: -#' gbmc <- correlated.trees(pri10s, model="gbm", r=.04e-2, s2=.26e-2, 3, 0.9) -#' lapply(gbmc$trees, plot) -#' -#' -#' -#' @author Mario dos Reis -#' -#' @export -correlated.trees <- function(tree, model, r, s2, nloci, corr) { - tt <- tree - nb <- length(tt$edge.length) - model <- match.arg(model, c("iln", "gbm_RY07")) - - # construct among loci covariance matrix (p x p): - R <- matrix(corr, ncol=nloci, nrow=nloci) * s2 - diag(R) <- s2 - - # use Eigen decomposition to simulate correlations - eS <- eigen(R, symmetric = TRUE) - ev <- eS$values - - # n x p; n: nb; p: nloci - lrvm <- matrix(nrow=nb, ncol=nloci) - - for (i in 1:nloci) { - if (model == "iln") { - lrvm[,i] <- rnorm(nb, 0, 1) - } else if (model == "gbm_RY07") { - lrvm[,i] <- .sim.gmbRY07(tree, 1, 1, log=TRUE) - # NOTE: gmbRY07 needs more testing! - } - } - - # p x p %*% p x n = p x n - rvm <- exp( eS$vectors %*% diag(sqrt(pmax(ev, 0)), nloci) %*% t(lrvm) + log(r) - s2/2 ) - - tls <- list() - for (i in 1:nloci) { - tls[[i]] <- tt - tls[[i]]$edge.length <- tt$edge.length * rvm[i,] - } - - return(list(trees=tls, rates=rvm)) -} From a1a7f636af6b694fd82b852c4e901ec9da60fc24 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Sun, 9 Nov 2025 17:15:45 +0800 Subject: [PATCH 06/14] ou --- R/relaxed.tree.R | 68 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 8974a10..aab0812 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -87,7 +87,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { tt <- tree nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full")) + model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "ou")) if (!ape::is.rooted(tt)) { stop("tree must be rooted") @@ -116,6 +116,11 @@ relaxed.tree <- function(tree, model, r, s2, drift) { rv <- .sim.gbmRY07(tree, r, s2, drift=drift) tt$edge.length <- tt$edge.length * rv } + else if (model == 'ou') { + #rv <- .sim.ou(tree, r, s2, drift=drift) + rv <- .sim.ou(tree, r, s2, r_opt, theta) + tt$edge.length <- tt$edge.length * rv + } return (tt) } @@ -164,6 +169,66 @@ relaxed.tree <- function(tree, model, r, s2, drift) { } } + +#.sim.ou <- function(tree, r, theta, r_opt, s2, log=FALSE) { +.sim.ou <- function(tree, r, s2, r_opt, theta, log=FALSE) { + # Simulate rate variation under an OU process along a phylogeny + # tree : phylo object (ape) + # r : root rate + # theta : strength of mean reversion + # r_opt : exponential of the long-term mean (optimal_rate) + # s2 : diffusion variance parameter + # log : return on log scale if TRUE + + nb <- length(tree$edge.length) + nt <- length(tree$tip.label) + tree$edge.length <- tree$edge.length / 2 + rv <- numeric(nb) + + for (node in (nt + 1):(nb + 1)) { + dad <- which(tree$edge[, 2] == node) + + if (length(dad) == 0) { + ## root case + tA <- 0 + yA <- log(r) # rate at the root + } else { + tA <- tree$edge.length[dad] + yA <- rv[dad] + } + + desc <- which(tree$edge[, 1] == node) + desc.t <- tree$edge.length[desc] + n.desc <- length(desc) + + ## Expected means for descendants + # note that 'mu' is diff from .sim.gbmRY07() + mu <- log(r_opt) + means <- (yA - mu) * exp(-theta * (tA + desc.t)) + mu + + ## Cov matrix: Sigma + Sigma <- matrix(NA, n.desc, n.desc) + for (i in 1:n.desc) { + for (j in 1:n.desc) { + if (i == j) { + Sigma[i, i] <- (s2 / (2 * theta)) * (1 - exp(-2 * theta * (tA + desc.t[i]))) + } else { + # Cov btwn descendants + Sigma[i, j] <- (s2 / (2 * theta)) * + (exp(-theta * (desc.t[i] + desc.t[j])) * (1 - exp(-2 * theta * tA))) + } + } + } + + ## descendant trait values + rv[desc] <- MASS::mvrnorm(1, means, Sigma) + } + + if (log) return(rv) else return(exp(rv)) +} + + + #' Calculate quantiles of GBM process #' @export # TODO: write documentation @@ -176,3 +241,4 @@ gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { return (exp(pps)) } } + From 97229a710cf516f0ca82dcb8e3b94d6f955d9dce Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Mon, 17 Nov 2025 15:57:11 +0800 Subject: [PATCH 07/14] ou2 --- R/relaxed.tree.R | 60 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index aab0812..5d22b55 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -105,15 +105,19 @@ relaxed.tree <- function(tree, model, r, s2, drift) { tt$edge.length <- tt$edge.length * rv } else if (model == "gbm_RY07") { - rv <- .sim.gbmRY07(tree, r, s2, drift=0) + rv <- .sim.gbm(tree, r, s2, drift=0) + #equivalent to: rv <- .sim.gbm(tree, r, s2, log_drift=-0.5*s2) tt$edge.length <- tt$edge.length * rv } else if (model == "gbm0") { - rv <- .sim.gbmRY07(tree, r, s2, drift=0.5*s2) + rv <- .sim.gbm(tree, r, s2, drift=0.5*s2) + #equivalent to: rv <- .sim.gbm(tree, r, s2, log_drift=0) tt$edge.length <- tt$edge.length * rv } - else if (model == 'gbm_full') { - rv <- .sim.gbmRY07(tree, r, s2, drift=drift) + else if (model == 'gbm_full' || model == 'gbm_f') { + # actually 'gbm' is fine but as the term GBM is often used in prev + # molecular clock studies to represent gbm0, 'gbm_full' might be clearer + rv <- .sim.gbm(tree, r, s2, drift=drift) tt$edge.length <- tt$edge.length * rv } else if (model == 'ou') { @@ -129,7 +133,11 @@ relaxed.tree <- function(tree, model, r, s2, drift) { # Sishuo extended the model to a full GBM with a drift coefficient, representing # the non-stochastic change in the process. In gbm_full, RY07 and gbm0 are special # cases with the drift equal to 0 and 0.5*s2, respectively. -.sim.gbmRY07 <- function(tree, r, s2, log=FALSE, drift=0) { +.sim.gbm <- function(tree, r, s2, log=FALSE, drift=0, log_drift) { + + if(is.numeric(log_drift)){ + drift <- log_drift + 0.5 * s2 + } nb <- length(tree$edge.length) nt <- length(tree$tip.label) @@ -229,6 +237,47 @@ relaxed.tree <- function(tree, model, r, s2, drift) { +# prev gbm func +.sim.gbmRY07 <- function(tree, r, s2, log=FALSE, drift) { + + nb <- length(tree$edge.length) + nt <- length(tree$tip.label) + tree$edge.length <- tree$edge.length / 2 + rv <- numeric(nb) + + for (node in (nt+1):(nb+1)) { + dad <- which(tree$edge[,2] == node) + if (length(dad) == 0) { # I'm the root! + ta <- 0 # ancestral time + ya <- log(r) # root rate + } + else { + ta <- tree$edge.length[dad] + ya <- rv[dad] + } + + desc <- which(tree$edge[,1] == node) + desc.t <- tree$edge.length[desc] + + # drift == TRUE is the YR07 model with stabilised mean (i.e., the gbm + # process is forced to be a martingale) + if (drift) mu <- ya - (ta + desc.t) * s2/2 + else mu <- ya + + Sig <- matrix(ta * s2, length(desc), length(desc)) + diag(Sig) <- (ta + desc.t) * s2 + + rv[desc] <- MASS::mvrnorm(1, mu, Sig) + #print(c(node, exp(c(ya, rr)))) + } + if (log) { + return (rv) + } else { + return (exp(rv)) + } +} + + #' Calculate quantiles of GBM process #' @export # TODO: write documentation @@ -242,3 +291,4 @@ gbm_RY07q <- function(p, ra, s2, t, log=FALSE) { } } + From 5a283c2ed67316f90e2bf1ced8e5f2e71fb0fd60 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Mon, 17 Nov 2025 16:13:44 +0800 Subject: [PATCH 08/14] ou2 --- R/relaxed.tree.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 5d22b55..315ad4d 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -133,7 +133,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { # Sishuo extended the model to a full GBM with a drift coefficient, representing # the non-stochastic change in the process. In gbm_full, RY07 and gbm0 are special # cases with the drift equal to 0 and 0.5*s2, respectively. -.sim.gbm <- function(tree, r, s2, log=FALSE, drift=0, log_drift) { +.sim.gbm <- function(tree, r, s2, log=FALSE, drift=0, log_drift=NA) { if(is.numeric(log_drift)){ drift <- log_drift + 0.5 * s2 From 04f4801bdb3ea00c8003031276df4758199e2055 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Mon, 17 Nov 2025 16:52:48 +0800 Subject: [PATCH 09/14] ou2 --- R/relaxed.tree.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 315ad4d..17aaf87 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -87,7 +87,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { tt <- tree nb <- length(tt$edge.length) - model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "ou")) + model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "gbm", "ou")) if (!ape::is.rooted(tt)) { stop("tree must be rooted") @@ -114,7 +114,7 @@ relaxed.tree <- function(tree, model, r, s2, drift) { #equivalent to: rv <- .sim.gbm(tree, r, s2, log_drift=0) tt$edge.length <- tt$edge.length * rv } - else if (model == 'gbm_full' || model == 'gbm_f') { + else if (model == 'gbm_full' || model == 'gbm') { # actually 'gbm' is fine but as the term GBM is often used in prev # molecular clock studies to represent gbm0, 'gbm_full' might be clearer rv <- .sim.gbm(tree, r, s2, drift=drift) From 7b1a8484fd5e3e8e4323598236bfaeb9b2942e4c Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Thu, 20 Nov 2025 14:56:06 +0800 Subject: [PATCH 10/14] ou2 --- R/relaxed.tree.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 17aaf87..6c56738 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -84,7 +84,7 @@ #' #' @export # TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2, drift) { +relaxed.tree <- function(tree, model, r, s2, drift=0) { tt <- tree nb <- length(tt$edge.length) model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "gbm", "ou")) From 2104f734448fbe447e908bfcfd7458a6410f2295 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Wed, 10 Jun 2026 16:35:17 +0800 Subject: [PATCH 11/14] ou_r_opt_fixed --- R/relaxed.tree.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 6c56738..7df852b 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -84,7 +84,7 @@ #' #' @export # TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2, drift=0) { +relaxed.tree <- function(tree, model, r, s2, r_opt=r, drift=0) { tt <- tree nb <- length(tt$edge.length) model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "gbm", "ou")) From 9daa4d26683fc4a233a78b727f8087ed2220c549 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Wed, 10 Jun 2026 16:38:33 +0800 Subject: [PATCH 12/14] ou_r_opt_fixed --- R/relaxed.tree.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index 7df852b..c842a89 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -121,8 +121,7 @@ relaxed.tree <- function(tree, model, r, s2, r_opt=r, drift=0) { tt$edge.length <- tt$edge.length * rv } else if (model == 'ou') { - #rv <- .sim.ou(tree, r, s2, drift=drift) - rv <- .sim.ou(tree, r, s2, r_opt, theta) + rv <- .sim.ou(tree, r, s2, r_opt, theta=exp(drift)) tt$edge.length <- tt$edge.length * rv } return (tt) From 8d9e8b3920636804f675c219d30e2ed3cd874f52 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Wed, 10 Jun 2026 17:12:33 +0800 Subject: [PATCH 13/14] ou_r_opt_fixed --- R/relaxed.tree.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/relaxed.tree.R b/R/relaxed.tree.R index c842a89..3999a17 100644 --- a/R/relaxed.tree.R +++ b/R/relaxed.tree.R @@ -84,7 +84,7 @@ #' #' @export # TODO: add indpendent gamma rates model. -relaxed.tree <- function(tree, model, r, s2, r_opt=r, drift=0) { +relaxed.tree <- function(tree, model, r, s2, r_opt=r, drift=0, theta=NULL) { tt <- tree nb <- length(tt$edge.length) model <- match.arg(model, c("clk", "iln", "gbm_RY07", "gbm0", "gbm_full", "gbm", "ou")) @@ -121,7 +121,7 @@ relaxed.tree <- function(tree, model, r, s2, r_opt=r, drift=0) { tt$edge.length <- tt$edge.length * rv } else if (model == 'ou') { - rv <- .sim.ou(tree, r, s2, r_opt, theta=exp(drift)) + rv <- .sim.ou(tree, r, s2, r_opt, theta=theta) tt$edge.length <- tt$edge.length * rv } return (tt) From 6d1f16880792cfb4fa4587442a8df3b2acec8fe5 Mon Sep 17 00:00:00 2001 From: evolbeginner Date: Fri, 12 Jun 2026 18:06:45 +0800 Subject: [PATCH 14/14] GBM_ou_readme --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 591a2b1..6cb9089 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,21 @@ This writes the trees to three separate files in Newick format. You can put these trees in, for example, MCbase.dat to generate the sequence alignments with Evolver. +### Additional relaxed-clock examples + +`relaxed.tree` also supports OU, full GBM, and the lograte-martingale GBM model: + +```r +## Lograte-martingale GBM (gbm0) +reltt_gbm0 <- relaxed.tree(pri10s, model="gbm0", r=.04e-2, s2=.26e-2) + +## Full GBM with user-specified drift +reltt_gbm_full <- relaxed.tree(pri10s, model="gbm_full", r=.04e-2, s2=.26e-2, drift=.10e-2) + +## OU process (mean reversion on log-rates) +reltt_ou <- relaxed.tree(pri10s, model="ou", r=.04e-2, s2=.26e-2, r_opt=.05e-2, theta=0.15) +``` + ## References * Panchaksaram, Freitas and dos Reis (2024) Bayesian Selection of Relaxed-clock Models: Distinguishing Between Independent and Autocorrelated Rates. Systematic Biology, 74: 453--466.