From 62b798a11d347fccfe119a5c7ea75f8b1e5227e1 Mon Sep 17 00:00:00 2001
From: Sarah Teichman <teichs@uw.edu>
Date: Fri, 19 Dec 2025 16:12:56 -0500
Subject: [PATCH 1/2] updates before submitting to cran

---
 DESCRIPTION                                   |  2 +-
 R/X_cup_from_X.R                              | 25 +++++++
 R/emuFit.R                                    | 53 ++++++++++-----
 R/emuFit_micro_penalized.R                    |  4 +-
 R/fit_null.R                                  |  2 +-
 R/get_G_for_augmentations.R                   | 18 +++++
 man/emuFit.Rd                                 | 42 ++++++++----
 tests/testthat/test-X_cup_from_X.R            | 13 ++++
 tests/testthat/test-emuFit.R                  |  4 +-
 tests/testthat/test-fit_null.R                |  3 +
 tests/testthat/test-get_G_for_augmentations.R | 17 +++++
 vignettes/quick_start.Rmd                     | 67 +++++++++++++++++++
 12 files changed, 213 insertions(+), 37 deletions(-)
 create mode 100644 tests/testthat/test-X_cup_from_X.R
 create mode 100644 tests/testthat/test-get_G_for_augmentations.R
 create mode 100644 vignettes/quick_start.Rmd

diff --git a/DESCRIPTION b/DESCRIPTION
index 30cbd8e..b2e7c8a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: radEmu
 Title: Using Relative Abundance Data to Estimate of Multiplicative Differences in Mean Absolute Abundance 
-Version: 2.2.1.0
+Version: 2.3.1.0
 Authors@R: c(person("David", "Clausen", role = c("aut")),
              person("Amy D", "Willis", email = "adwillis@uw.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2802-4317")),
              person("Sarah", "Teichman", role = "aut"))
diff --git a/R/X_cup_from_X.R b/R/X_cup_from_X.R
index f736b04..37e855a 100644
--- a/R/X_cup_from_X.R
+++ b/R/X_cup_from_X.R
@@ -20,3 +20,28 @@ X_cup_from_X <- function(X,J){
 
   return(X_cup)
 }
+
+X_cup_from_X_fast <- function(X, J) {
+  n <- nrow(X)
+  p <- ncol(X)
+  
+  # Total number of rows in final matrix
+  total_rows <- n * J
+  
+  # Construct i index (row indices)
+  i_coords <- rep(1:total_rows, each = p)
+  
+  # Construct j index (column indices)
+  j_block <- rep(0:(J - 1), each = p) * p
+  j_coords <- rep(j_block, times = n) + rep(1:p, times = total_rows)
+  
+  # Construct values
+  X_rep <- X[rep(1:n, each = J), ]
+  x_vals <- as.vector(t(X_rep))  # row-wise unrolling
+  
+  X_cup <- Matrix::sparseMatrix(i = i_coords,
+                                j = j_coords,
+                                x = x_vals)
+  
+  return(X_cup)
+}
\ No newline at end of file
diff --git a/R/emuFit.R b/R/emuFit.R
index 52f62ba..a79213d 100644
--- a/R/emuFit.R
+++ b/R/emuFit.R
@@ -45,6 +45,10 @@
 #' (corresponding to 95% confidence intervals)
 #' @param return_wald_p logical: return p-values from Wald tests? Default is `FALSE`.
 #' @param compute_cis logical: compute and return Wald CIs? Default is `TRUE`.
+#' @param max_abs_B maximum allowed value for elements of B (in absolute value) in full model fitting. In
+#' most cases this is not needed as Firth penalty will prevent infinite estimates
+#' under separation. However, such a threshold may be helpful in very poorly conditioned problems (e.g., with many
+#' nearly collinear regressors). Default is \code{250}.
 #' @param run_score_tests logical: perform robust score testing? Default is TRUE.
 #' @param test_kj a data frame whose rows give coordinates (in category j and
 #' covariate k) of elements of B to construct hypothesis tests for. If you don't know
@@ -77,26 +81,34 @@
 #' @param ... Additional arguments. Arguments matching the names of \code{control_fn()} options are forwarded to that function and override
 #' defaults. Unknown arguments are ignored with a warning.
 #' 
-#' @return A list containing elements 'coef', 'B', 'penalized', 'Y_augmented',
-#' 'z_hat', 'I', 'Dy', and 'score_test_hyperparams' if score tests are run.  
-#' Parameter estimates by covariate and outcome category (e.g., taxon for microbiome data),
-#' as well as optionally confidence intervals and p-values, are contained in 'coef'.
-#' Any robust score statistics and score test p-values are also included in 'coef'. 
-#' If there are any zero-comparison parameters in the model, a column 'zero_comparison'
-#' is also included, which is TRUE for any parameters that compare the level of a categorical
+#' @return \code{emuFit} returns a list containing elements \code{coef}, \code{B}, \code{penalized}, \code{Y_augmented},
+#' \code{z_hat}, \code{I}, \code{Dy}, and \code{score_test_hyperparams} if score tests are run.  
+#' 
+#' The \code{coef} table contains log fold-difference parameter estimates by covariate and outcome 
+#' category (e.g., taxon for microbiome data). A log fold-difference estimate of \code{1} for a treatment (versus control) and taxon X can be 
+#' interpreted to say that we expect taxon X is \code{exp(1) = 2.72} times more abundant in someone who has received the treatment compared to 
+#' someone who has received the control (holding all other covariates equal), when compared to typical fold-differences in abundances of 
+#' taxa in this analysis.
+#'
+#' \code{coef} also includes optionally-computed confidence intervals and robust Wald p-values.
+#' Robust score statistics and score test p-values are also included in \code{coef}. 
+#' As explained in the associated manuscript, we recommend use of the robust score test values instead of the robust 
+#' Wald test p-values, due to better error rate control (i.e. fewer false positives). 
+#' 
+#' If there are any zero-comparison parameters in the model, a column "zero_comparison"
+#' is also included, which is \code{TRUE} for any parameters that compare the level of a categorical
 #' covariate to a reference level for a category with only zero counts for both the comparison
 #' level and the reference level. This check is currently implemented for an arbitrary design matrix
 #' generated using the \code{formula} and \code{data} arguments, and for a design matrix with no more
 #' than one categorical covariate if the design matrix \code{X} is input directly.
-#' 'B' contains parameter estimates in matrix format (rows indexing covariates and
+#' 
+#' \code{B} contains parameter estimates in matrix format (rows indexing covariates and
 #' columns indexing outcome category / taxon). 
-#' 'penalized' is equal to TRUE f Firth penalty is used in estimation (default) and FALSE otherwise. 
-#' 'z_hat' returns the nuisance parameters calculated in Equation 7 of the radEmu manuscript,
-#' corresponding to either 'Y_augmented' or 'Y' if the 'penalized' is equal to TRUE
-#' or FALSE, respectively. 
-#' I' and 'Dy' contain an information matrix and empirical score covariance matrix computed under the 
+#' \code{penalized} is \code{TRUE} if Firth penalty is used in estimation (default) and \code{FALSE} otherwise. 
+#' \code{z_hat} returns the nuisance parameters (sample-specific sequencing effects).
+#' \code{I} and \code{Dy} contain an information matrix and empirical score covariance matrix computed under the 
 #' full model. 
-#' 'score_test_hyperparams' contains parameters and hyperparameters related to estimation under the null,
+#' \code{score_test_hyperparams} contains parameters and hyperparameters related to estimation under the null,
 #' including whether or not the algorithm converged, which can be helpful for debugging. 
 #'
 #' @importFrom stats cov median model.matrix optim pchisq qnorm weighted.mean
@@ -146,6 +158,7 @@ emuFit <- function(Y,
                    alpha = 0.05,
                    return_wald_p = FALSE,
                    compute_cis = TRUE,
+                   max_abs_B = 250, 
                    run_score_tests = TRUE,
                    test_kj = NULL,
                    null_fit_alg = "constraint_sandwich",
@@ -222,7 +235,7 @@ emuFit <- function(Y,
   # check for zero-comparison parameters
   zero_comparison_res <- zero_comparison_check(X = X, Y = Y)
   
-  X_cup <- X_cup_from_X(X,J)
+  X_cup <- X_cup_from_X_fast(X,J)
   
   
   
@@ -251,7 +264,8 @@ emuFit <- function(Y,
                                max_step = control$max_step,
                                tolerance = tolerance,
                                verbose = (verbose == "development"),
-                               j_ref = j_ref)
+                               j_ref = j_ref,
+                               max_abs_B = max_abs_B)
       Y_test <- fitted_model$Y_augmented
       fitted_B <- fitted_model$B
       converged_estimates <- fitted_model$convergence
@@ -295,7 +309,7 @@ emuFit <- function(Y,
     } else {
       fitted_B <- B
       if (penalize) {
-        G <- get_G_for_augmentations(X, J, n, X_cup)
+        G <- get_G_for_augmentations_fast(X, J, n, X_cup)
         Y_test <- Y_augmented <- Y + 
           get_augmentations(X = X, G = G, Y = Y, B = fitted_B)
       } else {
@@ -305,6 +319,11 @@ emuFit <- function(Y,
     }
   }
   
+  max_est_B <- max(abs(fitted_B))
+  if (refit & (max_est_B >= 0.9 * max_abs_B)) {
+    warning("At least one estimated B value is within 10% of your `max_abs_B` boundary. We suggest that you rerun estimation with a larger `max_abs_B` value.")
+  }
+  
   if (p == 1) {
     message("You are running an intercept-only model. In this model the intercept beta_0^j is an unidentifiable combination of intercept for category j and the detection efficiency of category j. Therefore this parameter is not interpretable.")
     
diff --git a/R/emuFit_micro_penalized.R b/R/emuFit_micro_penalized.R
index 385f72a..86a9eae 100644
--- a/R/emuFit_micro_penalized.R
+++ b/R/emuFit_micro_penalized.R
@@ -64,9 +64,9 @@ may take a moment."
       )
     }
     if (is.null(X_cup)) {
-      X_cup <- X_cup_from_X(X, J)
+      X_cup <- X_cup_from_X_fast(X, J)
     }
-    G <- get_G_for_augmentations(X, J, n, X_cup)
+    G <- get_G_for_augmentations_fast(X, J, n, X_cup)
 
     while (!converged) {
       # print(counter)
diff --git a/R/fit_null.R b/R/fit_null.R
index b6f6b65..0756272 100644
--- a/R/fit_null.R
+++ b/R/fit_null.R
@@ -101,7 +101,7 @@ fit_null <- function(B,
   
   #get X_cup for later use
   if (is.null(X_cup)) {
-    X_cup = X_cup_from_X(X,J)
+    X_cup = X_cup_from_X_fast(X,J)
   }
   
   #set iteration to zero
diff --git a/R/get_G_for_augmentations.R b/R/get_G_for_augmentations.R
index b1cef00..a1dc793 100644
--- a/R/get_G_for_augmentations.R
+++ b/R/get_G_for_augmentations.R
@@ -22,3 +22,21 @@ get_G_for_augmentations <- function(X,
   G <- cbind(X_tilde_J,Z)
   return(G)
 }
+
+get_G_for_augmentations_fast <- function(X, J, n, X_cup) {
+  p <- ncol(X)
+  #calculate indices of columns to remove from X_cup (aka X_tilde, the expanded design matrix)
+  to_delete <- p*(J - 1) + 1:p #col indices for elements of B^J in long B format
+  last_class_rows <- seq(from = J, to = n * J, by = J)
+  X_cup[last_class_rows, ] <- 0
+  
+  X_cup <- X_cup[, -to_delete]
+  
+  #design matrix in z
+  Z <- Matrix::sparseMatrix(i = 1:(n*J),
+                            j = rep(1:n, each = J),
+                            x = 1,
+                            dims = c(n * J, n))
+  G <- cbind(X_cup,Z)
+  return(G)
+}
diff --git a/man/emuFit.Rd b/man/emuFit.Rd
index 7b57325..1d3afdc 100644
--- a/man/emuFit.Rd
+++ b/man/emuFit.Rd
@@ -26,6 +26,7 @@ emuFit(
   alpha = 0.05,
   return_wald_p = FALSE,
   compute_cis = TRUE,
+  max_abs_B = 250,
   run_score_tests = TRUE,
   test_kj = NULL,
   null_fit_alg = "constraint_sandwich",
@@ -107,6 +108,11 @@ Used in estimation.}
 
 \item{compute_cis}{logical: compute and return Wald CIs? Default is \code{TRUE}.}
 
+\item{max_abs_B}{maximum allowed value for elements of B (in absolute value) in full model fitting. In
+most cases this is not needed as Firth penalty will prevent infinite estimates
+under separation. However, such a threshold may be helpful in very poorly conditioned problems (e.g., with many
+nearly collinear regressors). Default is \code{250}.}
+
 \item{run_score_tests}{logical: perform robust score testing? Default is TRUE.}
 
 \item{test_kj}{a data frame whose rows give coordinates (in category j and
@@ -151,26 +157,34 @@ Default is \code{0.01}.}
 defaults. Unknown arguments are ignored with a warning.}
 }
 \value{
-A list containing elements 'coef', 'B', 'penalized', 'Y_augmented',
-'z_hat', 'I', 'Dy', and 'score_test_hyperparams' if score tests are run.
-Parameter estimates by covariate and outcome category (e.g., taxon for microbiome data),
-as well as optionally confidence intervals and p-values, are contained in 'coef'.
-Any robust score statistics and score test p-values are also included in 'coef'.
-If there are any zero-comparison parameters in the model, a column 'zero_comparison'
-is also included, which is TRUE for any parameters that compare the level of a categorical
+\code{emuFit} returns a list containing elements \code{coef}, \code{B}, \code{penalized}, \code{Y_augmented},
+\code{z_hat}, \code{I}, \code{Dy}, and \code{score_test_hyperparams} if score tests are run.
+
+The \code{coef} table contains log fold-difference parameter estimates by covariate and outcome
+category (e.g., taxon for microbiome data). A log fold-difference estimate of \code{1} for a treatment (versus control) and taxon X can be
+interpreted to say that we expect taxon X is \code{exp(1) = 2.72} times more abundant in someone who has received the treatment compared to
+someone who has received the control (holding all other covariates equal), when compared to typical fold-differences in abundances of
+taxa in this analysis.
+
+\code{coef} also includes optionally-computed confidence intervals and robust Wald p-values.
+Robust score statistics and score test p-values are also included in \code{coef}.
+As explained in the associated manuscript, we recommend use of the robust score test values instead of the robust
+Wald test p-values, due to better error rate control (i.e. fewer false positives).
+
+If there are any zero-comparison parameters in the model, a column "zero_comparison"
+is also included, which is \code{TRUE} for any parameters that compare the level of a categorical
 covariate to a reference level for a category with only zero counts for both the comparison
 level and the reference level. This check is currently implemented for an arbitrary design matrix
 generated using the \code{formula} and \code{data} arguments, and for a design matrix with no more
 than one categorical covariate if the design matrix \code{X} is input directly.
-'B' contains parameter estimates in matrix format (rows indexing covariates and
+
+\code{B} contains parameter estimates in matrix format (rows indexing covariates and
 columns indexing outcome category / taxon).
-'penalized' is equal to TRUE f Firth penalty is used in estimation (default) and FALSE otherwise.
-'z_hat' returns the nuisance parameters calculated in Equation 7 of the radEmu manuscript,
-corresponding to either 'Y_augmented' or 'Y' if the 'penalized' is equal to TRUE
-or FALSE, respectively.
-I' and 'Dy' contain an information matrix and empirical score covariance matrix computed under the
+\code{penalized} is \code{TRUE} if Firth penalty is used in estimation (default) and \code{FALSE} otherwise.
+\code{z_hat} returns the nuisance parameters (sample-specific sequencing effects).
+\code{I} and \code{Dy} contain an information matrix and empirical score covariance matrix computed under the
 full model.
-'score_test_hyperparams' contains parameters and hyperparameters related to estimation under the null,
+\code{score_test_hyperparams} contains parameters and hyperparameters related to estimation under the null,
 including whether or not the algorithm converged, which can be helpful for debugging.
 }
 \description{
diff --git a/tests/testthat/test-X_cup_from_X.R b/tests/testthat/test-X_cup_from_X.R
new file mode 100644
index 0000000..45ace43
--- /dev/null
+++ b/tests/testthat/test-X_cup_from_X.R
@@ -0,0 +1,13 @@
+test_that("X_cup_from_X_fast works and is faster than X_cup_from_X", {
+  X <- cbind(1, rnorm(100), runif(100))
+  J <- 200
+  start1 <- proc.time()
+  Xcup1 <- X_cup_from_X(X, J)
+  end1 <- proc.time() - start1
+  start2 <- proc.time()
+  Xcup2 <- X_cup_from_X_fast(X, J)
+  end2 <- proc.time() - start2
+  
+  expect_true(all.equal(Xcup1, Xcup2))
+  expect_true(end2[3] < end1[3])
+})
\ No newline at end of file
diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R
index 6a7f802..8621dd3 100644
--- a/tests/testthat/test-emuFit.R
+++ b/tests/testthat/test-emuFit.R
@@ -404,7 +404,7 @@ test_that("emuFit has 'score_test_hyperparams' object and throws warnings when c
   expect_false(fitted_model$estimation_converged)
   
   # check that warning is returned when estimation under the null doesn't converge
-  suppressWarnings({
+  suppressMessages({suppressWarnings({
     fitted_model2 <- emuFit(Y = Y,
                            X = cbind(X, rnorm(nrow(X))),
                            verbose = FALSE,
@@ -416,7 +416,7 @@ test_that("emuFit has 'score_test_hyperparams' object and throws warnings when c
                            test_kj = data.frame(k = 1, j = 1:2),
                            maxit_null = 5,
                            inner_maxit = 1)
-  })
+  })})
   
   # check that fitted model contains score_test_hyperparams object
   expect_true("score_test_hyperparams" %in% names(fitted_model2))
diff --git a/tests/testthat/test-fit_null.R b/tests/testthat/test-fit_null.R
index 062a71a..2932718 100644
--- a/tests/testthat/test-fit_null.R
+++ b/tests/testthat/test-fit_null.R
@@ -1,4 +1,7 @@
 test_that("we get same null fit with different j_ref", {
+  
+  skip("Deterministic, slow to run but passes when tested")
+  
   set.seed(59542234)
   n <- 10
   J <- 5
diff --git a/tests/testthat/test-get_G_for_augmentations.R b/tests/testthat/test-get_G_for_augmentations.R
new file mode 100644
index 0000000..8558f1f
--- /dev/null
+++ b/tests/testthat/test-get_G_for_augmentations.R
@@ -0,0 +1,17 @@
+test_that("get_G_for_augmentations_fast works and is faster than get_G_for_augmentations", {
+  X <- cbind(1, rnorm(200), runif(200))
+  J <- 200
+  Xcup2 <- X_cup_from_X_fast(X, J)
+  
+  start1 <- proc.time()
+  g1 <- get_G_for_augmentations(X, J, nrow(X), Xcup2)
+  end1 <- proc.time() - start1
+  
+  start2 <- proc.time()
+  g2 <- get_G_for_augmentations_fast(X, J, nrow(X), Xcup2)
+  end2 <- proc.time() - start2
+  
+  expect_true(all.equal(g1, g2))
+  expect_true(end2[3] < end1[3])
+})
+
diff --git a/vignettes/quick_start.Rmd b/vignettes/quick_start.Rmd
new file mode 100644
index 0000000..6aa9a6a
--- /dev/null
+++ b/vignettes/quick_start.Rmd
@@ -0,0 +1,67 @@
+---
+title: "Quick start with `radEmu`"
+author: "Sarah Teichman and Amy Willis"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{quickstart}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+## Setup
+
+First, we will install `radEmu`, if we haven't already. 
+
+```{r, eval = FALSE}
+# if (!require("remotes", quietly = TRUE))
+#     install.packages("remotes")
+# 
+# remotes::install_github("statdivlab/radEmu")
+```
+
+Next, we can load `radEmu`.
+
+```{r setup, message = FALSE}
+library(radEmu)
+```
+
+In this vignette we'll explore a [dataset published by Wirbel et al. (2019)](https://www.nature.com/articles/s41591-019-0406-6). This is a meta-analysis of case-control studies of colorectal cancer, meaning that Wirbel et al. collected raw sequencing data from studies other researchers conducted and re-analyzed it.
+
+Wirbel et al. published two pieces of data we'll focus on:
+
+* metadata giving demographics and other information about participants
+* a mOTU (metagenomic OTU) table
+
+```{r}
+data("wirbel_sample")
+dim(wirbel_sample)
+data("wirbel_otu")
+dim(wirbel_otu)
+```
+
+We can see that we have $566$ samples and $845$ mOTUs. We will subset to samples from the Chinese study.
+
+```{r}
+wirbel_sample_ch <- wirbel_sample[wirbel_sample$Country == "CHI", ]
+wirbel_otu_ch <- wirbel_otu[rownames(wirbel_sample_ch), ]
+to_rm <- which(colSums(wirbel_otu_ch) == 0)
+wirbel_otu_ch <- wirbel_otu_ch[, -to_rm]
+```
+
+## Differential abundance analysis with radEmu
+
+```{r}
+tmp <- emuFit(Y = wirbel_otu_ch, formula = ~ Group + Gender, data = wirbel_sample_ch,
+              run_score_tests = FALSE, tolerance = 1e-3)
+tmp1 <- emuFit(Y = wirbel_otu_ch, formula = ~ Group + Gender, data = wirbel_sample_ch,
+              refit = FALSE, fitted_model = tmp, compute_cis = FALSE, verbose = "development", test_kj = data.frame(k = 2, j = 3))
+```
+

From a7eda693a7a0ad4eba894d8ec08bca577cd837b5 Mon Sep 17 00:00:00 2001
From: Sarah Teichman <teichs@uw.edu>
Date: Fri, 19 Dec 2025 17:17:42 -0500
Subject: [PATCH 2/2] add quickstart vignette

---
 R/emuFit.R                |  4 ++++
 R/emuFit_check.R          |  4 ++--
 vignettes/quick_start.Rmd | 33 +++++++++++++++++++++++++++++----
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/R/emuFit.R b/R/emuFit.R
index a79213d..c854d0a 100644
--- a/R/emuFit.R
+++ b/R/emuFit.R
@@ -248,6 +248,10 @@ emuFit <- function(Y,
   
   if (refit) {
     
+    if (verbose %in% c(TRUE, "development")) {
+      message("Estimating parameters")
+    }
+    
     if (!is.null(fitted_model)) {
       B <- fitted_model$B
     }
diff --git a/R/emuFit_check.R b/R/emuFit_check.R
index 498d602..ffc82a2 100644
--- a/R/emuFit_check.R
+++ b/R/emuFit_check.R
@@ -251,7 +251,7 @@ ignoring argument 'cluster'.")
   # check that test_kj is not null if running score tests
   if (run_score_tests) {
     if (is.null(test_kj)) {
-      stop("When `run_score_tests = TRUE`, you must provide a matrix `test_kj` to determine which parameters you want to test. If you don't know which indices k correspond to the covariate(s) that you would like to test, run the function `radEmu::make_design_matrix()` in order to view the design matrix, and identify which column of the design matrix corresponds to each covariate in your model. If you don't know which indices j correspond to categories (taxa) that you want to test, you can look at the columns and column names of your `Y` matrix.")
+      stop("When `run_score_tests = TRUE`, you must provide a data frame `test_kj` to determine which parameters you want to test. If you don't know which indices k correspond to the covariate(s) that you would like to test, run the function `radEmu::make_design_matrix()` in order to view the design matrix, and identify which column of the design matrix corresponds to each covariate in your model. If you don't know which indices j correspond to categories (taxa) that you want to test, you can look at the columns and column names of your `Y` matrix.")
     }
   }
   
@@ -299,7 +299,7 @@ ignoring argument 'cluster'.")
     
     if (is.logical(all.equal(constraint_fn[[k]], pseudohuber_median))) {
       if (all.equal(constraint_fn[[k]], pseudohuber_median)) {
-        if (verbose %in% c(TRUE, "development")) message("Centering row ", k, " of B with pseudo-Huber smoothed median with smoothing parameter ", constraint_param, ".")
+        if (verbose %in% c("development")) message("Centering row ", k, " of B with pseudo-Huber smoothed median with smoothing parameter ", constraint_param, ".")
         
         stopifnot(!is.na(constraint_param))
         
diff --git a/vignettes/quick_start.Rmd b/vignettes/quick_start.Rmd
index 6aa9a6a..8a3472d 100644
--- a/vignettes/quick_start.Rmd
+++ b/vignettes/quick_start.Rmd
@@ -42,6 +42,8 @@ Wirbel et al. published two pieces of data we'll focus on:
 
 ```{r}
 data("wirbel_sample")
+# encode control as the baseline level
+wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC"))
 dim(wirbel_sample)
 data("wirbel_otu")
 dim(wirbel_otu)
@@ -58,10 +60,33 @@ wirbel_otu_ch <- wirbel_otu_ch[, -to_rm]
 
 ## Differential abundance analysis with radEmu
 
+We now are ready to fit a `radEmu` model with the `emuFit()` function! This will take 1-2 minutes to run, so we suggest you start running it and then read below about the arguments!
+
 ```{r}
-tmp <- emuFit(Y = wirbel_otu_ch, formula = ~ Group + Gender, data = wirbel_sample_ch,
-              run_score_tests = FALSE, tolerance = 1e-3)
-tmp1 <- emuFit(Y = wirbel_otu_ch, formula = ~ Group + Gender, data = wirbel_sample_ch,
-              refit = FALSE, fitted_model = tmp, compute_cis = FALSE, verbose = "development", test_kj = data.frame(k = 2, j = 3))
+mod <- emuFit(Y = wirbel_otu_ch, formula = ~ Group + Gender, data = wirbel_sample_ch,
+              test_kj = data.frame(k = 2, j = 1), verbose = TRUE)
 ```
 
+Some of the important arguments for `emuFit()` are the following: 
+
+  - `formula`: This is a formula telling radEmu what predictors to use in its model. We are using Group, which is an indicator for case (CRC) vs control (CTR), and Gender. 
+  - `data`: A data frame containing covariate information about our samples. 
+  - `Y`: A matrix containing our observed abundance data (e.g., counts or depth measurements). The rows give the observations (samples), and the columns give the categories (taxa/mOTUs). Note that `Y` doesn't have to be integer-valued (counts)! 
+  - `test_kj`: a data frame describing which robust score tests you want to run. `k` corresponds with the predictor(s) that you want to test (if you don't know what order your predictors appear in your design matrix, use the function `make_design_matrix()` with your `formula` and `data` to check!) and `j` corresponds with the taxa that you want to test (check `colnames(Y)` to see the order of the taxa). If you wanted to test the Group covariate for all taxa then you could set `test_kj = data.frame(k = 2, j = 1:ncol(Y))`. 
+  - `verbose`: do you want the code to give you updates as it goes along? 
+  
+In the example above we only test the first taxon, because each test for a data set this size takes approximately 30 seconds. Typically you'll want to test all taxa that you are interested in, so you may need to leave this running for a few hours, or check out the "parallel_radEmu" vignette to see how you could parallelize these score tests if you have access to additional computing resources. 
+  
+Now that we've fit the model and run a robust score test for the first taxon, we can look at the results! The parameter estimates and any test results are in the `coef` part of the `emuFit` object. 
+
+```{r}
+head(mod$coef)
+```
+
+The first taxon in our model is *Streptococcus anginosus*, which has a log fold-difference estimate of $1.23$. We will interpret this to mean that we expect that *Streptococcus anginosus* is $exp(1.23) \approx 3.42$ times more abundant in cases of colorectal cancer compared to controls of the same gender, when compared to the typical fold-differences in abundances of the taxa in this analysis. 
+
+We could similarly interpret the log fold-differences for each taxon in our analysis. 
+
+For *Streptococcus anginosus* we have a robust score test p-value of $0.09$. This means that we do not have enough evidence to reject the null hypothesis that the fold-difference in abundance of *Streptococcus anginosus* between cases and controls is the same as the typical fold-difference in abundance between cases and controls across all taxa in this analysis. When we say ``typical" here we mean approximately the median fold-difference across taxa.  
+
+Now you are ready to start using `radEmu`! We recommend our other vignettes for a deeper look using `radEmu`, including for `phyloseq` or `TreeSummarizedExperiment` objects, with clustered data, and in parallel. 
\ No newline at end of file