From d01ee0fbc4289723f473c9ec182cb48ea1cd80d0 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 15:43:55 +0100
Subject: [PATCH 01/23] Add weight.statistics when hazard = TRUE

---
 SEQTaRget/R/SEQuential.R               |  1 +
 SEQTaRget/tests/testthat/test_hazard.R | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R
index ba420a19..b430cbf7 100644
--- a/SEQTaRget/R/SEQuential.R
+++ b/SEQTaRget/R/SEQuential.R
@@ -248,6 +248,7 @@ SEQuential <- function(data, id.col, time.col, eligible.col, treatment.col, outc
       label <- subgroups[[i]]
       models <- lapply(analytic, function(x) x$model[[i]])
       hazard[[label]] <- internal.hazard(models, params, formula_cache)
+      weights[[label]] <- lapply(analytic, function(x) x$weighted_stats)
     }
   }
   rm(analytic)
diff --git a/SEQTaRget/tests/testthat/test_hazard.R b/SEQTaRget/tests/testthat/test_hazard.R
index c2a34ea8..0e45594d 100644
--- a/SEQTaRget/tests/testthat/test_hazard.R
+++ b/SEQTaRget/tests/testthat/test_hazard.R
@@ -5,6 +5,22 @@ test_that("Hazard and vcov", {
   expect_s4_class(model, "SEQoutput")
 })
 
+test_that("Weighted hazard models populate weight.statistics", {
+  # Regression test: weight.statistics was NULL/empty for hazard = TRUE models
+  # even when weighted = TRUE, because the weights slot was only populated in the
+  # non-hazard branch of SEQuential(). The models were still weighted correctly,
+  # so the statistics should be reported.
+  data <- data.table::copy(SEQdata)
+  model <- suppressWarnings(SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"),
+                      method = "censoring",
+                      options = SEQopts(hazard = TRUE, weighted = TRUE)))
+  expect_s4_class(model, "SEQoutput")
+  expect_true(length(model@weight.statistics) > 0)
+  expect_false(is.null(model@weight.statistics[[1]]))
+  expect_true(is.numeric(model@weight.statistics[[1]][[1]]$max))
+})
+
 test_that("Hazard estimate is reproducible with same seed", {
   skip_on_cran()
   args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),

From 7b267abbfab1ac0e0a476cc9115ef81e8bc7a226 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 15:44:01 +0100
Subject: [PATCH 02/23] Bump version

---
 SEQTaRget/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SEQTaRget/DESCRIPTION b/SEQTaRget/DESCRIPTION
index 26b22672..d16abb82 100644
--- a/SEQTaRget/DESCRIPTION
+++ b/SEQTaRget/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SEQTaRget
 Type: Package
 Title: Sequential Trial Emulation
-Version: 1.4.2.9001
+Version: 1.4.2.9002
 Authors@R: c(person(given = "Ryan",
                     family = "O'Dea",
                     role = c("aut", "cre"),

From 285eca19f99274856082050bae21edbf60114666 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 15:53:13 +0100
Subject: [PATCH 03/23] Populate outcome.model when hazard = TRUE

---
 SEQTaRget/R/SEQuential.R               |  1 +
 SEQTaRget/tests/testthat/test_hazard.R | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R
index b430cbf7..da4ed14c 100644
--- a/SEQTaRget/R/SEQuential.R
+++ b/SEQTaRget/R/SEQuential.R
@@ -248,6 +248,7 @@ SEQuential <- function(data, id.col, time.col, eligible.col, treatment.col, outc
       label <- subgroups[[i]]
       models <- lapply(analytic, function(x) x$model[[i]])
       hazard[[label]] <- internal.hazard(models, params, formula_cache)
+      outcome[[label]] <- lapply(models, function(x) clean_fastglm(x$model))
       weights[[label]] <- lapply(analytic, function(x) x$weighted_stats)
     }
   }
diff --git a/SEQTaRget/tests/testthat/test_hazard.R b/SEQTaRget/tests/testthat/test_hazard.R
index 0e45594d..041d814e 100644
--- a/SEQTaRget/tests/testthat/test_hazard.R
+++ b/SEQTaRget/tests/testthat/test_hazard.R
@@ -21,6 +21,21 @@ test_that("Weighted hazard models populate weight.statistics", {
   expect_true(is.numeric(model@weight.statistics[[1]][[1]]$max))
 })
 
+test_that("Hazard models populate outcome.model", {
+  # Regression test: outcome.model was NULL for hazard = TRUE models even though
+  # the pooled-logistic outcome models are fit and consumed by internal.hazard().
+  # The cleaned outcome models should be returned, as they are for hazard = FALSE.
+  data <- data.table::copy(SEQdata)
+  model <- suppressWarnings(SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"),
+                      method = "censoring",
+                      options = SEQopts(hazard = TRUE, weighted = TRUE)))
+  expect_s4_class(model, "SEQoutput")
+  expect_true(length(model@outcome.model) > 0)
+  expect_false(is.null(model@outcome.model[[1]]))
+  expect_true(length(coef(model@outcome.model[[1]][[1]])) > 0)
+})
+
 test_that("Hazard estimate is reproducible with same seed", {
   skip_on_cran()
   args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),

From c8bf5952441b1a1b82013ce84f181b140576d54e Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 16:40:02 +0100
Subject: [PATCH 04/23] Add warning if numerator and denominator are the same

---
 SEQTaRget/R/class_setters.R              |  8 ++++++++
 SEQTaRget/tests/testthat/test_warnings.R | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/SEQTaRget/R/class_setters.R b/SEQTaRget/R/class_setters.R
index de390275..8051d7d9 100644
--- a/SEQTaRget/R/class_setters.R
+++ b/SEQTaRget/R/class_setters.R
@@ -137,6 +137,14 @@ parameter.simplifier <- function(params) {
     warning("Without LTFU or Visit, weighted ITT model is not supported, automatically changed to weighted = FALSE")
     params@weighted <- FALSE
   }
+
+  if (params@weighted && params@method != "ITT" &&
+      !is.na(params@numerator) && !is.na(params@denominator) &&
+      identical(params@numerator, params@denominator)) {
+    warning("Numerator and denominator weight models use identical covariates ('", params@numerator,
+            "'); the stabilized weights will all equal 1 (i.e., no weighting). The denominator should ",
+            "typically include the time-varying confounders that the numerator omits - check for a typo in either or both of 'numerator' and 'denominator'.")
+  }
   if (params@followup.class && params@followup.spline) stop("Followup cannot be both a class and a spline, please select one.")
   if (!params@plot.type %in% c("survival", "risk", "inc")) stop("Supported plot types are 'survival', 'risk', and 'inc' (in the case of censoring), please select one.")
   
diff --git a/SEQTaRget/tests/testthat/test_warnings.R b/SEQTaRget/tests/testthat/test_warnings.R
index 294e9b5e..6fbb9bd8 100644
--- a/SEQTaRget/tests/testthat/test_warnings.R
+++ b/SEQTaRget/tests/testthat/test_warnings.R
@@ -36,3 +36,25 @@ test_that("Unexcused Excused Censoring", {
     options = SEQopts(weighted = TRUE, excused = TRUE)
   ))
 })
+
+test_that("Identical numerator and denominator weight models warn about unit weights", {
+  # A common typo (denominator pointed at the numerator covariates) makes the two
+  # weight models identical, so every stabilized weight is exactly 1 (no weighting).
+  data <- data.table::copy(SEQdata)
+  expect_warning(
+    SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+      method = "censoring",
+      options = SEQopts(weighted = TRUE, numerator = "N+L+P", denominator = "N+L+P")),
+    regexp = "identical covariates"
+  )
+})
+
+test_that("Differing numerator and denominator weight models do not warn about unit weights", {
+  data <- data.table::copy(SEQdata)
+  ws <- testthat::capture_warnings(
+    SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+      method = "censoring",
+      options = SEQopts(weighted = TRUE, numerator = "sex", denominator = "N+L+P"))
+  )
+  expect_false(any(grepl("identical covariates", ws)))
+})

From b05919242135b9bbcfe1840512911e08b30b4830 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 15:54:47 +0100
Subject: [PATCH 05/23] Update NEWS.md

---
 SEQTaRget/NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md
index b23de83a..87fbd26a 100644
--- a/SEQTaRget/NEWS.md
+++ b/SEQTaRget/NEWS.md
@@ -5,6 +5,8 @@
 * Speed up the hazard ratio calculation by fitting the Cox model with the survival C fitters directly on a prebuilt design matrix instead of `coxph(formula, data)`, avoiding the `model.frame`/`model.matrix` rebuild on every bootstrap iteration: `survival::coxph.fit()` for the non-competing-event model and `survival::agreg.fit()` for the competing-event Fine-Gray (counting-process) model. The hazard ratio and CIs are unchanged.
 * Fix the competing-event Fine-Gray hazard fit to use the `finegray()` case weights (`fgwt`), which are required for a valid subdistribution-hazard estimate and were previously omitted. This is a no-op for the current hazard simulation (which has only administrative censoring, so all `fgwt` are 1) but corrects the estimate should the simulated data ever carry random censoring.
 * Report competing events per treatment arm in the `@info` slot as `info$compevent.unique` and `info$compevent.nonunique`, mirroring the structure of `info$outcome.unique` / `info$outcome.nonunique`. Both are grouped by baseline treatment; the non-unique table counts all competing event occurrences in the expanded data and the unique table counts distinct subjects who experienced the competing event. Both are `NA` when no `compevent` is specified.
+* From a `SEQuential()` fit, populate `weight.statistics` and `outcome.model` when `hazard = TRUE`.
+* Warn when the `numerator` and `denominator` weight models are given identical covariates. In that case the stabilized weights all equal 1 (i.e., no weighting), which is usually a typo in the `denominator` argument.
 
 # SEQTaRget v1.4.2
 

From 43546a64feacb01030521fd46d4f5e7905a04bfb Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 17:32:00 +0100
Subject: [PATCH 06/23] Improve SEQuential helpfile

---
 SEQTaRget/R/SEQuential.R | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R
index da4ed14c..b8437c76 100644
--- a/SEQTaRget/R/SEQuential.R
+++ b/SEQTaRget/R/SEQuential.R
@@ -29,15 +29,37 @@
 #' @examples
 #' \donttest{
 #' data <- SEQdata
-#' model <- SEQuential(data, id.col = "ID", 
-#'                           time.col = "time", 
-#'                           eligible.col = "eligible",
-#'                           treatment.col = "tx_init",
-#'                           outcome.col = "outcome",
-#'                           time_varying.cols = c("N", "L", "P"),
-#'                           fixed.cols = "sex",
-#'                           method = "ITT", 
-#'                           options = SEQopts())
+#'
+#' # Intention-to-treat (ITT) effect: subjects are assigned to the treatment
+#' # arm defined by their baseline treatment and followed regardless of any later
+#' # treatment changes, so no weighting is required.
+#' SEQuential(data, id.col = "ID",
+#'            time.col = "time",
+#'            eligible.col = "eligible",
+#'            treatment.col = "tx_init",
+#'            outcome.col = "outcome",
+#'            time_varying.cols = c("N", "L", "P"),
+#'            fixed.cols = "sex",
+#'            method = "ITT",
+#'            options = SEQopts())
+#'
+#' # Per-protocol effect via artificial censoring: subjects are censored when they
+#' # deviate from their assigned strategy, and inverse-probability-of-censoring
+#' # weights adjust for the resulting selection bias. The denominator models the
+#' # probability of remaining uncensored given the time-varying confounders, while
+#' # the numerator uses only the baseline covariates to stabilize the weights (so
+#' # the two formulas must differ - identical formulas give weights of 1).
+#' SEQuential(data, id.col = "ID",
+#'            time.col = "time",
+#'            eligible.col = "eligible",
+#'            treatment.col = "tx_init",
+#'            outcome.col = "outcome",
+#'            time_varying.cols = c("N", "L", "P"),
+#'            fixed.cols = "sex",
+#'            method = "censoring",
+#'            options = SEQopts(weighted = TRUE,
+#'                              numerator = "sex",
+#'                              denominator = "N + L + P + sex"))
 #' }
 #'
 #' @export

From 1107b4924693b53a207e4129c2c7bb430fbfa4e8 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 17:32:06 +0100
Subject: [PATCH 07/23] Document

---
 SEQTaRget/man/SEQuential.Rd | 40 ++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/SEQTaRget/man/SEQuential.Rd b/SEQTaRget/man/SEQuential.Rd
index 94c9a754..7a319d5b 100644
--- a/SEQTaRget/man/SEQuential.Rd
+++ b/SEQTaRget/man/SEQuential.Rd
@@ -57,15 +57,37 @@ and per-protocol effects, and can adjust for potential selection bias induced by
 \examples{
 \donttest{
 data <- SEQdata
-model <- SEQuential(data, id.col = "ID", 
-                          time.col = "time", 
-                          eligible.col = "eligible",
-                          treatment.col = "tx_init",
-                          outcome.col = "outcome",
-                          time_varying.cols = c("N", "L", "P"),
-                          fixed.cols = "sex",
-                          method = "ITT", 
-                          options = SEQopts())
+
+# Intention-to-treat (ITT) effect: subjects are assigned to the treatment
+# arm defined by their baseline treatment and followed regardless of any later
+# treatment changes, so no weighting is required.
+SEQuential(data, id.col = "ID",
+           time.col = "time",
+           eligible.col = "eligible",
+           treatment.col = "tx_init",
+           outcome.col = "outcome",
+           time_varying.cols = c("N", "L", "P"),
+           fixed.cols = "sex",
+           method = "ITT",
+           options = SEQopts())
+
+# Per-protocol effect via artificial censoring: subjects are censored when they
+# deviate from their assigned strategy, and inverse-probability-of-censoring
+# weights adjust for the resulting selection bias. The denominator models the
+# probability of remaining uncensored given the time-varying confounders, while
+# the numerator uses only the baseline covariates to stabilize the weights (so
+# the two formulas must differ - identical formulas give weights of 1).
+SEQuential(data, id.col = "ID",
+           time.col = "time",
+           eligible.col = "eligible",
+           treatment.col = "tx_init",
+           outcome.col = "outcome",
+           time_varying.cols = c("N", "L", "P"),
+           fixed.cols = "sex",
+           method = "censoring",
+           options = SEQopts(weighted = TRUE,
+                             numerator = "sex",
+                             denominator = "N + L + P + sex"))
 }
 
 }

From 7a7b133da83c34585497e9dd679d4ae68c3ab21c Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Wed, 3 Jun 2026 17:08:20 +0100
Subject: [PATCH 08/23] Add proper tests for selection.random

---
 SEQTaRget/tests/testthat/test_coverage.R | 46 +++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 8fd1f169..3ce73c54 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -221,7 +221,10 @@ test_that("selection.first_trial restricts to first trial per subject", {
 
 # ── SEQexpand.R: selection.random ───────────────────────────────────────────
 
-test_that("selection.random subsamples controls", {
+test_that("SEQuential runs with selection.prob set (smoke test)", {
+  # Smoke test only: selection.prob has no effect unless selection.random = TRUE,
+  # so this just confirms the option is accepted and the run completes. The actual
+  # subsampling behaviour is checked in the two tests below.
   model <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
                       list("N", "L", "P"), list("sex"),
                       method = "ITT",
@@ -229,6 +232,47 @@ test_that("selection.random subsamples controls", {
   expect_s4_class(model, "SEQoutput")
 })
 
+test_that("selection.random keeps all treated starts and subsamples control starts", {
+  skip_on_cran()
+  prob <- 0.5
+
+  # One row per trial-start (followup == 0), counted by baseline treatment arm.
+  arm_counts <- function(DT) {
+    s <- DT[followup == 0, .N, by = tx_init_bas]
+    stats::setNames(s$N, as.character(s$tx_init_bas))
+  }
+
+  base <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"),
+                      method = "ITT",
+                      options = SEQopts(data.return = TRUE, seed = 1L), verbose = FALSE))
+  sel <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"),
+                      method = "ITT",
+                      options = SEQopts(selection.random = TRUE, selection.prob = prob,
+                                        data.return = TRUE, seed = 1L), verbose = FALSE))
+
+  base_counts <- arm_counts(base@DT)
+  sel_counts  <- arm_counts(sel@DT)
+
+  # Treated trial-starts (baseline tx_init = 1) are all retained
+  expect_equal(sel_counts[["1"]], base_counts[["1"]])
+  # Control trial-starts (baseline tx_init = 0) are reduced to the requested fraction
+  expect_lt(sel_counts[["0"]], base_counts[["0"]])
+  expect_equal(sel_counts[["0"]], round(base_counts[["0"]] * prob))
+})
+
+test_that("selection.random is reproducible with a fixed seed", {
+  skip_on_cran()
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "ITT",
+               options = SEQopts(selection.random = TRUE, selection.prob = 0.5,
+                                 data.return = TRUE, seed = 7L))
+  run1 <- suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args)))
+  run2 <- suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args)))
+  expect_equal(run1@DT, run2@DT)
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 9d467d20c91acec2802ac3025b42082925b82bf9 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:46:34 +0100
Subject: [PATCH 09/23] Add behavioural test for weight.lower/weight.upper
 truncation

Replaces reliance on a smoke test: verifies the truncation is actually
applied to the weights used in the outcome fit. Two clamp bands entirely
above the weight range collapse all weights to a constant and, by GLM
scale-invariance, give identical coefficients, while a genuinely
varying-weight fit differs.
---
 SEQTaRget/tests/testthat/test_coverage.R | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 3ce73c54..1730ae86 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -273,6 +273,32 @@ test_that("selection.random is reproducible with a fixed seed", {
   expect_equal(run1@DT, run2@DT)
 })
 
+# ── internal_models.R: weight truncation ────────────────────────────────────
+
+test_that("weight.lower/weight.upper truncate the weights used in the outcome fit", {
+  skip_on_cran()
+  # Truncation is applied to the weight vector passed to the GLM fit (not the
+  # returned @DT or weight.statistics), so its effect is checked through the
+  # fitted coefficients. SEQdata weights span ~0.54-2.16, so a band entirely
+  # above that range clamps every weight to the same constant. A GLM is invariant
+  # to a uniform scaling of its weights, so two such all-constant clamps must give
+  # identical coefficients, while a genuinely varying-weight fit must differ.
+  cf <- function(m) coef(m@outcome.model[[1]][[1]])
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "censoring")
+  mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args,
+                                                              list(options = opts, verbose = FALSE))))
+
+  varying  <- mk(SEQopts(weighted = TRUE, seed = 1L))
+  clamp3   <- mk(SEQopts(weighted = TRUE, weight.lower = 3,  weight.upper = 4,  seed = 1L))
+  clamp10  <- mk(SEQopts(weighted = TRUE, weight.lower = 10, weight.upper = 11, seed = 1L))
+
+  # Both bands clamp all weights to a constant -> scale-invariant, identical fit
+  expect_equal(cf(clamp3), cf(clamp10), tolerance = 1e-6)
+  # Clamping away the real weight variation changes the fit
+  expect_false(isTRUE(all.equal(cf(clamp3), cf(varying), tolerance = 1e-6)))
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From f1666211328cfa27089ff6ceec4375b03602cdb7 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:48:52 +0100
Subject: [PATCH 10/23] Add behavioural test for weight.p99 truncation

Verifies weight.p99 = TRUE is equivalent to explicitly setting
weight.lower/weight.upper to the p01/p99 reported in weight.statistics,
and that it differs from an untruncated weighted fit.
---
 SEQTaRget/tests/testthat/test_coverage.R | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 1730ae86..638a2900 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -299,6 +299,27 @@ test_that("weight.lower/weight.upper truncate the weights used in the outcome fi
   expect_false(isTRUE(all.equal(cf(clamp3), cf(varying), tolerance = 1e-6)))
 })
 
+test_that("weight.p99 truncates at the 1st/99th percentile weights", {
+  skip_on_cran()
+  # weight.p99 = TRUE sets weight.lower/upper to the p01/p99 of the (untruncated)
+  # weights, which are reported in weight.statistics. So it must be equivalent to
+  # explicitly passing those percentiles as the bounds, and must differ from an
+  # untruncated weighted fit.
+  cf <- function(m) coef(m@outcome.model[[1]][[1]])
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "censoring")
+  mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args,
+                                                              list(options = opts, verbose = FALSE))))
+
+  p99  <- mk(SEQopts(weighted = TRUE, weight.p99 = TRUE, seed = 1L))
+  ws   <- p99@weight.statistics[[1]][[1]]
+  expl <- mk(SEQopts(weighted = TRUE, weight.lower = ws$p01, weight.upper = ws$p99, seed = 1L))
+  none <- mk(SEQopts(weighted = TRUE, seed = 1L))
+
+  expect_equal(cf(p99), cf(expl), tolerance = 1e-8)
+  expect_false(isTRUE(all.equal(cf(p99), cf(none), tolerance = 1e-6)))
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 84309b324990e567663894fa71ccfe3498d40979 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:51:33 +0100
Subject: [PATCH 11/23] Add behavioural test for followup.include/trial.include

Verifies each flag adds or drops its follow-up / trial terms (and their
squares) in the fitted outcome-model coefficients, leaving the other
flag's terms intact.
---
 SEQTaRget/tests/testthat/test_coverage.R | 29 ++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 638a2900..14a4f4b9 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -320,6 +320,35 @@ test_that("weight.p99 truncates at the 1st/99th percentile weights", {
   expect_false(isTRUE(all.equal(cf(p99), cf(none), tolerance = 1e-6)))
 })
 
+# ── internal_covariates.R: followup.include / trial.include ──────────────────
+
+test_that("followup.include / trial.include add or drop their outcome-model terms", {
+  skip_on_cran()
+  # These flags control whether the follow-up and trial terms (and their squares)
+  # enter the outcome model formula (internal_covariates.R), so the effect is
+  # visible in the fitted coefficient names.
+  nm <- function(m) names(coef(m@outcome.model[[1]][[1]]))
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "ITT")
+  mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args,
+                                                              list(options = opts, verbose = FALSE))))
+
+  both     <- nm(mk(SEQopts()))                          # both included by default
+  no_fup   <- nm(mk(SEQopts(followup.include = FALSE)))
+  no_trial <- nm(mk(SEQopts(trial.include = FALSE)))
+
+  # Baseline: all four terms present
+  expect_true(all(c("followup", "followup_sq", "trial", "trial_sq") %in% both))
+
+  # followup.include = FALSE drops the follow-up terms but keeps the trial terms
+  expect_false(any(c("followup", "followup_sq") %in% no_fup))
+  expect_true(all(c("trial", "trial_sq") %in% no_fup))
+
+  # trial.include = FALSE drops the trial terms but keeps the follow-up terms
+  expect_false(any(c("trial", "trial_sq") %in% no_trial))
+  expect_true(all(c("followup", "followup_sq") %in% no_trial))
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 14065fe2dcbcf55dac557cc24652e026d72c29c0 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:54:04 +0100
Subject: [PATCH 12/23] Add behavioural test for followup.class

Verifies followup.class = TRUE encodes follow-up as a factor: the linear
followup / followup_sq terms are gone and there is one dummy coefficient
per non-reference follow-up level.
---
 SEQTaRget/tests/testthat/test_coverage.R | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 14a4f4b9..f84bf904 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -349,6 +349,29 @@ test_that("followup.include / trial.include add or drop their outcome-model term
   expect_true(all(c("followup", "followup_sq") %in% no_trial))
 })
 
+# ── internal_models.R: followup.class ────────────────────────────────────────
+
+test_that("followup.class encodes follow-up as a factor (one term per level)", {
+  skip_on_cran()
+  # followup.class = TRUE treats follow-up as categorical (internal_models.R coerces
+  # it to a factor), so the outcome model gains one dummy per non-reference level
+  # instead of the linear followup / followup_sq pair. It is exclusive with
+  # followup.include, so that is switched off here.
+  m <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"), method = "ITT",
+                      options = SEQopts(followup.include = FALSE, followup.class = TRUE,
+                                        data.return = TRUE), verbose = FALSE))
+  cf <- names(coef(m@outcome.model[[1]][[1]]))
+  dummies <- grep("^followup[0-9]+$", cf, value = TRUE)
+
+  # Categorical, not continuous: no linear follow-up terms
+  expect_false("followup" %in% cf)
+  expect_false("followup_sq" %in% cf)
+  # One dummy per non-reference follow-up level
+  expect_gt(length(dummies), 2L)
+  expect_equal(length(dummies), length(unique(m@DT$followup)) - 1L)
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 6bb857b020c9c579bd57b8667ca0e080434a6d24 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:56:56 +0100
Subject: [PATCH 13/23] Add behavioural test for weight.lag_condition

Previously untested. Verifies weight.lag_condition = TRUE fits each
arm's weight model only on its treatment-lag stratum (per-arm
observation counts differ and sum to the full data), while FALSE fits
both arms on the full data (equal counts).
---
 SEQTaRget/tests/testthat/test_coverage.R | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index f84bf904..f6c671ad 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -372,6 +372,32 @@ test_that("followup.class encodes follow-up as a factor (one term per level)", {
   expect_equal(length(dummies), length(unique(m@DT$followup)) - 1L)
 })
 
+# ── internal_glmHelpers.R: weight.lag_condition ──────────────────────────────
+
+test_that("weight.lag_condition conditions each arm's weight model on its treatment lag", {
+  skip_on_cran()
+  # weight.lag_condition = TRUE (default) fits each treatment arm's weight model
+  # only on the rows in that arm's treatment-lag stratum (prepare.data_cached
+  # subsets on tx_lag == model); = FALSE fits both arms on the full data. The
+  # number of observations behind each fitted denominator model makes this visible.
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "censoring")
+  mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args,
+                                                              list(options = opts, verbose = FALSE))))
+  # Observations behind each per-arm denominator model (fastglm df.null + 1)
+  nobs <- function(m) vapply(m@weight.statistics[[1]][[1]]$coef.denominator,
+                             function(x) x$df.null + 1L, integer(1))
+
+  on  <- nobs(mk(SEQopts(weighted = TRUE, weight.lag_condition = TRUE,  seed = 1L)))
+  off <- nobs(mk(SEQopts(weighted = TRUE, weight.lag_condition = FALSE, seed = 1L)))
+
+  # FALSE: both arms fit on the full data -> equal observation counts
+  expect_equal(off[[1]], off[[2]])
+  # TRUE: arms fit on disjoint treatment-lag strata that partition that full data
+  expect_false(on[[1]] == on[[2]])
+  expect_equal(on[[1]] + on[[2]], off[[1]])
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 5a3ddbe91955a879b79a2e094c936bed1affbebb Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 10:58:50 +0100
Subject: [PATCH 14/23] Add behavioural test for followup.min / followup.max

Verifies the expanded data (via expand.only) is clamped to the requested
[followup.min, followup.max] window, while the unrestricted expansion
spans beyond it.
---
 SEQTaRget/tests/testthat/test_coverage.R | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index f6c671ad..2a25c1c6 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -398,6 +398,27 @@ test_that("weight.lag_condition conditions each arm's weight model on its treatm
   expect_equal(on[[1]] + on[[2]], off[[1]])
 })
 
+# ── SEQexpand.R: followup.min / followup.max ─────────────────────────────────
+
+test_that("followup.min / followup.max restrict the expanded follow-up range", {
+  skip_on_cran()
+  # SEQexpand.R filters the expanded rows to followup in [followup.min, followup.max].
+  # expand.only returns that expanded data.table directly.
+  mk <- function(opts) suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible",
+                      "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+                      method = "ITT", options = opts, verbose = FALSE))
+
+  full <- mk(SEQopts(expand.only = TRUE))
+  lim  <- mk(SEQopts(expand.only = TRUE, followup.min = 3, followup.max = 10))
+
+  # The unrestricted expansion genuinely extends past the requested window
+  expect_lt(min(full$followup), 3)
+  expect_gt(max(full$followup), 10)
+  # The restricted expansion is clamped to exactly [3, 10] and has fewer rows
+  expect_equal(range(lim$followup), c(3, 10))
+  expect_lt(nrow(lim), nrow(full))
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From f222950d185b2389e513287a651e1dda2af48f81 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 11:02:22 +0100
Subject: [PATCH 15/23] Add behavioural test for weight.eligible_cols

Verifies each arm's weight model is fit only on rows where its
weight.eligible_cols indicator == 1, by checking the per-arm denominator
observation counts drop versus an unfiltered fit.
---
 SEQTaRget/tests/testthat/test_coverage.R | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index 2a25c1c6..cad8a0b2 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -419,6 +419,30 @@ test_that("followup.min / followup.max restrict the expanded follow-up range", {
   expect_lt(nrow(lim), nrow(full))
 })
 
+# ── internal_weights.R: weight.eligible_cols ─────────────────────────────────
+
+test_that("weight.eligible_cols restricts the weight models to eligible rows", {
+  skip_on_cran()
+  # Each arm's weight model is fit only on rows where its weight.eligible_cols
+  # indicator == 1 (internal_weights.R). With a roughly half-on indicator, the
+  # per-arm denominator model is fit on fewer observations than without it.
+  d <- copy(SEQdata)
+  d[, welig := as.integer(N > median(N))]   # balanced 0/1 eligibility indicator
+  args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"),
+               method = "censoring")
+  mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(d)), args,
+                                                              list(options = opts, verbose = FALSE))))
+  nobs <- function(m) vapply(m@weight.statistics[[1]][[1]]$coef.denominator,
+                             function(x) x$df.null + 1L, integer(1))
+
+  base <- nobs(mk(SEQopts(weighted = TRUE, seed = 1L)))
+  elig <- nobs(mk(SEQopts(weighted = TRUE,
+                          weight.eligible_cols = list("welig", "welig"), seed = 1L)))
+
+  # Both arms' models drop the ineligible (welig == 0) rows
+  expect_true(all(elig < base))
+})
+
 # ── SEQuential.R: validation paths ──────────────────────────────────────────
 
 test_that("SEQuential errors on non-binary outcome", {

From 7feb05a1c84d73411d68b5ff3ed27b95948af2ec Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 11:26:29 +0100
Subject: [PATCH 16/23] Fix numerator()/denominator() accessors returning NULL

They looked up non-existent fields (n0.coef/d0.coef) instead of the coef.numerator/coef.denominator lists actually stored in
weight.statistics, so they silently returned NULL for every weighted model. Index the per-arm models correctly and guard the ITT case where no treatment-weight models exist. Add a regression test.
---
 SEQTaRget/R/class_methods.R              | 14 ++++++++----
 SEQTaRget/tests/testthat/test_coverage.R | 28 ++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/SEQTaRget/R/class_methods.R b/SEQTaRget/R/class_methods.R
index 6232a365..b87013ce 100644
--- a/SEQTaRget/R/class_methods.R
+++ b/SEQTaRget/R/class_methods.R
@@ -184,8 +184,11 @@ setMethod("show", "SEQoutput", function(object) {
 numerator <- function(object) {
   if (!is(object, "SEQoutput")) stop("Object is not of class SEQoutput")
   if (!object@params@weighted) stop("SEQuential process was not weighted")
-  return(list(numerator0 = lapply(object@weight.statistics, function(x) x[[1]]$n0.coef),
-              numerator1 = lapply(object@weight.statistics, function(x) x[[1]]$n1.coef)))
+  arm <- function(i) lapply(object@weight.statistics, function(x) {
+    coefs <- x[[1]]$coef.numerator
+    if (length(coefs) >= i) coefs[[i]] else NULL
+  })
+  return(list(numerator0 = arm(1L), numerator1 = arm(2L)))
 }
 
 #' Retrieves Denominator Models from SEQuential object
@@ -198,8 +201,11 @@ numerator <- function(object) {
 denominator <- function(object) {
   if (!is(object, "SEQoutput")) stop("Object is not of class SEQoutput")
   if (!object@params@weighted) stop("SEQuential process was not weighted")
-  return(list(denominator0 = lapply(object@weight.statistics, function(x) x[[1]]$d0.coef),
-              denominator1 = lapply(object@weight.statistics, function(x) x[[1]]$d1.coef)))
+  arm <- function(i) lapply(object@weight.statistics, function(x) {
+    coefs <- x[[1]]$coef.denominator
+    if (length(coefs) >= i) coefs[[i]] else NULL
+  })
+  return(list(denominator0 = arm(1L), denominator1 = arm(2L)))
 }
 
 #' Retrieves Outcome Models from SEQuential object
diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R
index cad8a0b2..5af141c6 100644
--- a/SEQTaRget/tests/testthat/test_coverage.R
+++ b/SEQTaRget/tests/testthat/test_coverage.R
@@ -79,6 +79,34 @@ test_that("numerator/denominator error on unweighted model", {
   expect_error(denominator(model), "weighted")
 })
 
+test_that("numerator/denominator return the per-arm weight models", {
+  # Regression test: these accessors looked up non-existent fields (n0.coef etc.)
+  # and silently returned NULL. They should return the fitted per-arm numerator
+  # and denominator weight models held in weight.statistics$coef.numerator /
+  # $coef.denominator.
+  skip_on_cran()
+  model <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
+                      list("N", "L", "P"), list("sex"),
+                      method = "censoring", options = SEQopts(weighted = TRUE), verbose = FALSE))
+  num <- numerator(model)
+  den <- denominator(model)
+  expect_named(num, c("numerator0", "numerator1"))
+  expect_named(den, c("denominator0", "denominator1"))
+
+  # Non-NULL fitted models with extractable coefficients (one per treatment arm)
+  expect_false(is.null(num$numerator0[[1]]))
+  expect_false(is.null(num$numerator1[[1]]))
+  expect_true(length(coef(num$numerator0[[1]])) > 0)
+  expect_true(length(coef(den$denominator1[[1]])) > 0)
+
+  # They are exactly the models stored in weight.statistics
+  ws <- model@weight.statistics[[1]][[1]]
+  expect_identical(num$numerator0[[1]],   ws$coef.numerator[[1]])
+  expect_identical(num$numerator1[[1]],   ws$coef.numerator[[2]])
+  expect_identical(den$denominator0[[1]], ws$coef.denominator[[1]])
+  expect_identical(den$denominator1[[1]], ws$coef.denominator[[2]])
+})
+
 test_that("km_curve errors on invalid plot.type", {
   model <- SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome",
                       list("N", "L", "P"), list("sex"),

From 265f96e0d768734d1ebf0a539f692cfa0ff98cd7 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 11:27:03 +0100
Subject: [PATCH 17/23] Document that weight truncation applies only to the
 outcome-model fit

Clarify in the weight.lower/weight.upper/weight.p99 docs that
weight.statistics and the returned data report the untruncated weights; truncation affects only the weights used to fit the outcome model.
---
 SEQTaRget/R/SEQopts.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/SEQTaRget/R/SEQopts.R b/SEQTaRget/R/SEQopts.R
index 8e4a038a..5c215348 100644
--- a/SEQTaRget/R/SEQopts.R
+++ b/SEQTaRget/R/SEQopts.R
@@ -57,11 +57,11 @@
 #' @param visit.denominator String: visit denominator covariates to the right hand side of a formula object
 #' @param visit.numerator String: visit numerator covariates to the right hand side of a formula object
 #' @param weight.eligible_cols List: list of column names for indicator columns defining which weights are eligible for weight models - in order of \code{treat.level}
-#' @param weight.lower Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is `0`
+#' @param weight.lower Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is `0`. Truncation is applied only to the weights used to fit the outcome model; the weights reported in \code{weight.statistics} and in the returned data (when \code{data.return = TRUE}) are the untruncated values.
 #' @param weight.lag_condition Logical: whether weights should be conditioned on treatment lag value, default `TRUE`
-#' @param weight.p99 Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}
+#' @param weight.p99 Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}. The percentiles are taken from the untruncated weight distribution (as reported in \code{weight.statistics}), and as with \code{weight.lower}/\code{weight.upper} the truncation affects only the weights used to fit the outcome model.
 #' @param weight.preexpansion Logical: whether weighting should be done on pre-expanded data, default `TRUE`
-#' @param weight.upper Numeric: weights truncated at upper end at this weight, default is `Inf`
+#' @param weight.upper Numeric: weights truncated at upper end at this weight, default is `Inf`. As with \code{weight.lower}, truncation affects only the weights used to fit the outcome model, not those reported in \code{weight.statistics} or the returned data.
 #' @param weighted Logical: whether or not to perform weighted analysis, default is `FALSE`
 #' @returns An object of class 'SEQopts'
 #' @export

From 607166da1096a1e63e0813baf149ac864dd5b639 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 11:27:10 +0100
Subject: [PATCH 18/23] Document

---
 SEQTaRget/man/SEQopts.Rd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/SEQTaRget/man/SEQopts.Rd b/SEQTaRget/man/SEQopts.Rd
index cf6b9d1c..73c171ed 100644
--- a/SEQTaRget/man/SEQopts.Rd
+++ b/SEQTaRget/man/SEQopts.Rd
@@ -185,15 +185,15 @@ SEQopts(
 
 \item{weight.eligible_cols}{List: list of column names for indicator columns defining which weights are eligible for weight models - in order of \code{treat.level}}
 
-\item{weight.lower}{Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is \code{0}}
+\item{weight.lower}{Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is \code{0}. Truncation is applied only to the weights used to fit the outcome model; the weights reported in \code{weight.statistics} and in the returned data (when \code{data.return = TRUE}) are the untruncated values.}
 
 \item{weight.lag_condition}{Logical: whether weights should be conditioned on treatment lag value, default \code{TRUE}}
 
-\item{weight.p99}{Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}}
+\item{weight.p99}{Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}. The percentiles are taken from the untruncated weight distribution (as reported in \code{weight.statistics}), and as with \code{weight.lower}/\code{weight.upper} the truncation affects only the weights used to fit the outcome model.}
 
 \item{weight.preexpansion}{Logical: whether weighting should be done on pre-expanded data, default \code{TRUE}}
 
-\item{weight.upper}{Numeric: weights truncated at upper end at this weight, default is \code{Inf}}
+\item{weight.upper}{Numeric: weights truncated at upper end at this weight, default is \code{Inf}. As with \code{weight.lower}, truncation affects only the weights used to fit the outcome model, not those reported in \code{weight.statistics} or the returned data.}
 
 \item{weighted}{Logical: whether or not to perform weighted analysis, default is \code{FALSE}}
 }

From 9b7603ee5a8e88d0f051001e5390469e63309d99 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Thu, 4 Jun 2026 12:37:52 +0100
Subject: [PATCH 19/23] Add thanks to Francesca

---
 SEQTaRget/NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md
index 87fbd26a..0b634d46 100644
--- a/SEQTaRget/NEWS.md
+++ b/SEQTaRget/NEWS.md
@@ -13,7 +13,7 @@
 * Remove mention of units from time in docs.
 * Improve memory usage in the bootstrapping.
 * Fix off-by-one labeling in survival output so that `followup = k` correctly represents survival after `k` intervals, adding a row at `followup = survival.max + 1` for the final interval's estimate.
-* Fix expansion bug where subjects experiencing the outcome early were incorrectly carried forward with `outcome=0` rows from subsequent periods by truncating each trial at the first event row
+* Fix expansion bug where subjects experiencing the outcome early were incorrectly carried forward with `outcome=0` rows from subsequent periods by truncating each trial at the first event row (thanks, @francescazaccagnino)
 * Add `expand.only` option to `SEQopts()`. When `TRUE`, `SEQuential()` returns the expanded `data.table` directly and skips the analysis steps, for users who want to inspect or store the expanded dataset on its own.
 * Fix `followup.spline = TRUE` so the basis is genuinely non-linear. Splines are now built into the model formula via `splines::ns()` instead of being applied as a single-column transform of `followup`, and the new `followup.spline.df` option (default `4`) controls the number of basis functions. The treatment-by-followup interaction now uses the same spline basis. Knots are baked from the full expanded `followup` once at fit time so the basis is identical at fit and prediction time across bootstraps and survival grids. Internally, formula column extraction now uses `all.vars()`, so user-supplied covariates may include `ns()`, `bs()`, `I()`, `factor()`, `poly()` etc. without breaking expansion.
 * Rename `format.time()` to `format_time()` because it wasn't an S3 method and hence was causing roxygen2 to write incorrect information in its helpfile.

From ef7ef870810987d37bdeecd734f4181c0ea2e4e5 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Sat, 6 Jun 2026 20:19:07 +0100
Subject: [PATCH 20/23] Pass the formula cache to inline.pred() in the weight
 models

Routes the weight numerator/denominator/LTFU/visit predictions through the cached formula path, matching the survival and hazard paths. Results are bit-identical; this is a consistency cleanup, not a perf change.
---
 SEQTaRget/R/internal_weights.R | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/SEQTaRget/R/internal_weights.R b/SEQTaRget/R/internal_weights.R
index 628456d9..27676534 100644
--- a/SEQTaRget/R/internal_weights.R
+++ b/SEQTaRget/R/internal_weights.R
@@ -126,8 +126,8 @@ internal.weights <- function(DT, data, params, cache) {
             out[tx_lag == level, `:=`(numerator = 1, denominator = 1)]
           } else {
             out[tx_lag == level, `:=`(
-              numerator = inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = params@multinomial, target = level),
-              denominator = inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = params@multinomial, target = level))]
+              numerator = inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = params@multinomial, target = level, cache = cache),
+              denominator = inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = params@multinomial, target = level, cache = cache))]
 
             if (i == 1) {
               out[tx_lag == level & get(params@treatment) == params@treat.level[[i]],
@@ -146,7 +146,7 @@ internal.weights <- function(DT, data, params, cache) {
           
           if (!is.na(col)) {
             out[tx_lag == level & get(col) != 1, 
-                denominator := inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = multi, target = level)]
+                denominator := inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = multi, target = level, cache = cache)]
             if (i == 1) {
               out[tx_lag == level & 
                     get(params@treatment) == params@treat.level[[i]] & 
@@ -170,7 +170,7 @@ internal.weights <- function(DT, data, params, cache) {
             col <- if (params@excused) params@excused.cols[[i]] else params@deviation.excused_cols[[i]]
             if (!is.na(col)) {
               out[get(params@treatment) == level & get(col) == 0, 
-                  numerator := inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = multi, target = level)
+                  numerator := inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = multi, target = level, cache = cache)
                   ]
             }
           }
@@ -181,15 +181,15 @@ internal.weights <- function(DT, data, params, cache) {
 
     if (params@LTFU) {
       if (params@method == "ITT") out <- out[, `:=` (numerator = 1, denominator = 1)]
-      out <- out[, `:=` (cense1.numerator = inline.pred(cense.numerator, .SD, params, "numerator", "LTFU"),
-                         cense1.denominator = inline.pred(cense.denominator, .SD, params, "denominator", "LTFU"))
+      out <- out[, `:=` (cense1.numerator = inline.pred(cense.numerator, .SD, params, "numerator", "LTFU", cache = cache),
+                         cense1.denominator = inline.pred(cense.denominator, .SD, params, "denominator", "LTFU", cache = cache))
                  ][, cense1 := cense1.numerator / cense1.denominator]
     }
 
     if (!is.na(params@visit)) {
       if (params@method == "ITT") out <- out[, `:=` (numerator = 1, denominator = 1)]
-      out <- out[, `:=` (visit.numerator = inline.pred(visit.numerator, .SD, params, "numerator", "visit"),
-                         visit.denominator = inline.pred(visit.denominator, .SD, params, "denominator", "visit"))
+      out <- out[, `:=` (visit.numerator = inline.pred(visit.numerator, .SD, params, "numerator", "visit", cache = cache),
+                         visit.denominator = inline.pred(visit.denominator, .SD, params, "denominator", "visit", cache = cache))
       ][, visit := visit.numerator / visit.denominator]
     }
 

From 702b73920fa6946a965ed73f62935ce5e8aea59d Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Sun, 7 Jun 2026 07:30:56 +0100
Subject: [PATCH 21/23] Fix argument ordering

---
 SEQTaRget/R/SEQopts.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/SEQTaRget/R/SEQopts.R b/SEQTaRget/R/SEQopts.R
index 5c215348..5d2d7c13 100644
--- a/SEQTaRget/R/SEQopts.R
+++ b/SEQTaRget/R/SEQopts.R
@@ -22,14 +22,13 @@
 #' @param excused Logical: in the case of censoring, whether there is an excused condition, default is `FALSE`
 #' @param excused.cols List: list of column names for treatment switch excuses - should be the same length, and ordered the same as \code{treat.level}
 #' @param fastglm.method Integer: decomposition method for fastglm (`0L`-column-pivoted QR, `1L`-unpivoted QR, `2L`-LLT Cholesky, `3L`-LDLT Cholesky), default is `2L`
-#' @param glm.package Character: package to use for fitting GLMs, either `"fastglm"` (default) or `"parglm"`. When `"parglm"` is selected the `nthreads` option controls the number of threads passed to `parglm::parglm.fit()`. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) `"fastglm"` is faster; `"parglm"` may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.
-#' @param parglm.control A control object from [parglm::parglm.control()] to pass to `parglm::parglm.fit()`. Only used when `glm.package = "parglm"`. Defaults to `parglm::parglm.control(method = "FAST")`. If you encounter a `chol(): decomposition failed` error (e.g. with near-singular model matrices on large datasets), pass `parglm.control = parglm::parglm.control(method = "LAPACK")` to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.
 #' @param followup.class Logical: treat followup as a class, e.g. expands every time to it's own indicator column, default is `FALSE`
 #' @param followup.include Logical: whether or not to include 'followup' and 'followup_squared' in the outcome model, default is `TRUE`
 #' @param followup.max Numeric: maximum time to expand about, default is `Inf` (no maximum)
 #' @param followup.min Numeric: minimum follow-up time since trial enrollment to include, must be non-negative, default is `0`
 #' @param followup.spline Logical: treat followup as a natural cubic spline (`splines::ns()`), default is `FALSE`
 #' @param followup.spline.df Integer: degrees of freedom passed to `splines::ns()` when `followup.spline = TRUE`. With `df = k`, `ns()` places `k - 1` interior knots at quantiles of `followup`. Must be `>= 1`; `df = 1` is equivalent to a linear term and is generally not what you want. Default is `4` (3 interior knots).
+#' @param glm.package Character: package to use for fitting GLMs, either `"fastglm"` (default) or `"parglm"`. When `"parglm"` is selected the `nthreads` option controls the number of threads passed to `parglm::parglm.fit()`. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) `"fastglm"` is faster; `"parglm"` may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.
 #' @param hazard Logical: hazard error calculation instead of survival estimation, default is `FALSE`
 #' @param indicator.baseline String: identifier for baseline variables in \code{covariates, numerator, denominator} - intended as an override
 #' @param indicator.squared String: identifier for squared variables in \code{covariates, numerator, denominator} - intended as an override
@@ -39,6 +38,7 @@
 #' @param nthreads Integer: number of threads to use for data.table processing, default is [data.table::getDTthreads()]
 #' @param numerator String: numerator covariates to the right hand side of a formula object
 #' @param parallel Logical: define if the SEQuential process is run in parallel, default is `FALSE`
+#' @param parglm.control A control object from [parglm::parglm.control()] to pass to `parglm::parglm.fit()`. Only used when `glm.package = "parglm"`. Defaults to `parglm::parglm.control(method = "FAST")`. If you encounter a `chol(): decomposition failed` error (e.g. with near-singular model matrices on large datasets), pass `parglm.control = parglm::parglm.control(method = "LAPACK")` to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.
 #' @param plot.colors Character: Colors for output plot if \code{km.curves = TRUE}, defaulted to ggplot2 defaults
 #' @param plot.labels Character: Color labels for output plot if \code{km.curves = TRUE} in order e.g. \code{c("risk.0", "risk.1")}
 #' @param plot.subtitle Character: Subtitle for output plot if \code{km.curves = TRUE}
@@ -72,11 +72,12 @@ SEQopts <- function(bootstrap = FALSE, bootstrap.nboot = 100, bootstrap.sample =
                     cense = NA, cense.denominator = NA, cense.eligible = NA, cense.numerator = NA,
                     compevent = NA, covariates = NA, data.return = FALSE, denominator = NA,
                     deviation = FALSE, deviation.col = NA, deviation.conditions = c(NA, NA), deviation.excused = FALSE, deviation.excused_cols = c(NA, NA),
-                    excused = FALSE, excused.cols = c(NA, NA), expand.only = FALSE, fastglm.method = 2L, glm.package = "fastglm", parglm.control = NULL,
+                    excused = FALSE, excused.cols = c(NA, NA), expand.only = FALSE, fastglm.method = 2L,
                     followup.class = FALSE, followup.include = TRUE, followup.max = Inf, followup.min = 0, followup.spline = FALSE, followup.spline.df = 4L,
+                    glm.package = "fastglm",
                     hazard = FALSE, indicator.baseline = "_bas", indicator.squared = "_sq",
                     km.curves = FALSE, multinomial = FALSE, ncores = availableCores(omit = 1L), nthreads = getDTthreads(),
-                    numerator = NA, parallel = FALSE, plot.colors = c("#F8766D", "#00BFC4", "#555555"), plot.labels = NA, plot.subtitle = NA, plot.title = NA, plot.type = "survival",
+                    numerator = NA, parallel = FALSE, parglm.control = NULL, plot.colors = c("#F8766D", "#00BFC4", "#555555"), plot.labels = NA, plot.subtitle = NA, plot.title = NA, plot.type = "survival",
                     risk.times = NA,
                     seed = NULL, selection.first_trial = FALSE, selection.prob = 0.8, selection.random = FALSE, subgroup = NA, survival.max = Inf,
                     treat.level = c(0, 1), trial.include = TRUE,

From 9536e79e4e3c15f09142fb0635059c720904f41d Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Sun, 7 Jun 2026 07:31:04 +0100
Subject: [PATCH 22/23] Document

---
 SEQTaRget/man/SEQopts.Rd | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/SEQTaRget/man/SEQopts.Rd b/SEQTaRget/man/SEQopts.Rd
index 73c171ed..ace32f20 100644
--- a/SEQTaRget/man/SEQopts.Rd
+++ b/SEQTaRget/man/SEQopts.Rd
@@ -27,14 +27,13 @@ SEQopts(
   excused.cols = c(NA, NA),
   expand.only = FALSE,
   fastglm.method = 2L,
-  glm.package = "fastglm",
-  parglm.control = NULL,
   followup.class = FALSE,
   followup.include = TRUE,
   followup.max = Inf,
   followup.min = 0,
   followup.spline = FALSE,
   followup.spline.df = 4L,
+  glm.package = "fastglm",
   hazard = FALSE,
   indicator.baseline = "_bas",
   indicator.squared = "_sq",
@@ -44,6 +43,7 @@ SEQopts(
   nthreads = getDTthreads(),
   numerator = NA,
   parallel = FALSE,
+  parglm.control = NULL,
   plot.colors = c("#F8766D", "#00BFC4", "#555555"),
   plot.labels = NA,
   plot.subtitle = NA,
@@ -115,10 +115,6 @@ SEQopts(
 
 \item{fastglm.method}{Integer: decomposition method for fastglm (\code{0L}-column-pivoted QR, \code{1L}-unpivoted QR, \code{2L}-LLT Cholesky, \code{3L}-LDLT Cholesky), default is \code{2L}}
 
-\item{glm.package}{Character: package to use for fitting GLMs, either \code{"fastglm"} (default) or \code{"parglm"}. When \code{"parglm"} is selected the \code{nthreads} option controls the number of threads passed to \code{parglm::parglm.fit()}. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) \code{"fastglm"} is faster; \code{"parglm"} may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.}
-
-\item{parglm.control}{A control object from \code{\link[parglm:parglm.control]{parglm::parglm.control()}} to pass to \code{parglm::parglm.fit()}. Only used when \code{glm.package = "parglm"}. Defaults to \code{parglm::parglm.control(method = "FAST")}. If you encounter a \verb{chol(): decomposition failed} error (e.g. with near-singular model matrices on large datasets), pass \code{parglm.control = parglm::parglm.control(method = "LAPACK")} to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.}
-
 \item{followup.class}{Logical: treat followup as a class, e.g. expands every time to it's own indicator column, default is \code{FALSE}}
 
 \item{followup.include}{Logical: whether or not to include 'followup' and 'followup_squared' in the outcome model, default is \code{TRUE}}
@@ -131,6 +127,8 @@ SEQopts(
 
 \item{followup.spline.df}{Integer: degrees of freedom passed to \code{splines::ns()} when \code{followup.spline = TRUE}. With \code{df = k}, \code{ns()} places \code{k - 1} interior knots at quantiles of \code{followup}. Must be \verb{>= 1}; \code{df = 1} is equivalent to a linear term and is generally not what you want. Default is \code{4} (3 interior knots).}
 
+\item{glm.package}{Character: package to use for fitting GLMs, either \code{"fastglm"} (default) or \code{"parglm"}. When \code{"parglm"} is selected the \code{nthreads} option controls the number of threads passed to \code{parglm::parglm.fit()}. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) \code{"fastglm"} is faster; \code{"parglm"} may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.}
+
 \item{hazard}{Logical: hazard error calculation instead of survival estimation, default is \code{FALSE}}
 
 \item{indicator.baseline}{String: identifier for baseline variables in \code{covariates, numerator, denominator} - intended as an override}
@@ -149,6 +147,8 @@ SEQopts(
 
 \item{parallel}{Logical: define if the SEQuential process is run in parallel, default is \code{FALSE}}
 
+\item{parglm.control}{A control object from \code{\link[parglm:parglm.control]{parglm::parglm.control()}} to pass to \code{parglm::parglm.fit()}. Only used when \code{glm.package = "parglm"}. Defaults to \code{parglm::parglm.control(method = "FAST")}. If you encounter a \verb{chol(): decomposition failed} error (e.g. with near-singular model matrices on large datasets), pass \code{parglm.control = parglm::parglm.control(method = "LAPACK")} to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.}
+
 \item{plot.colors}{Character: Colors for output plot if \code{km.curves = TRUE}, defaulted to ggplot2 defaults}
 
 \item{plot.labels}{Character: Color labels for output plot if \code{km.curves = TRUE} in order e.g. \code{c("risk.0", "risk.1")}}

From 650d9c9ef7d0f314020ab294420af1cca5f03d87 Mon Sep 17 00:00:00 2001
From: Tom Palmer <remlapmot@hotmail.com>
Date: Tue, 2 Jun 2026 17:32:10 +0100
Subject: [PATCH 23/23] Update NEWS.md

---
 SEQTaRget/NEWS.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md
index 0b634d46..6c90a7ea 100644
--- a/SEQTaRget/NEWS.md
+++ b/SEQTaRget/NEWS.md
@@ -7,6 +7,13 @@
 * Report competing events per treatment arm in the `@info` slot as `info$compevent.unique` and `info$compevent.nonunique`, mirroring the structure of `info$outcome.unique` / `info$outcome.nonunique`. Both are grouped by baseline treatment; the non-unique table counts all competing event occurrences in the expanded data and the unique table counts distinct subjects who experienced the competing event. Both are `NA` when no `compevent` is specified.
 * From a `SEQuential()` fit, populate `weight.statistics` and `outcome.model` when `hazard = TRUE`.
 * Warn when the `numerator` and `denominator` weight models are given identical covariates. In that case the stabilized weights all equal 1 (i.e., no weighting), which is usually a typo in the `denominator` argument.
+* Improve the `SEQuential()` helpfile by adding a per-protocol example.
+* Add behavioural tests that `selection.random = TRUE` retains all treated trial-starts, subsamples control trial-starts to the requested `selection.prob` fraction, and is reproducible under a fixed seed; rename the previous smoke test that did not actually exercise the feature.
+* Add behavioural tests for; `weight.lower`/`weight.upper` truncation, `weight.p99` truncation, `followup.include`/`trial.include`, `followup.class`, `weight.lag_condition`, `followup.min`/`followup.max`, and `weight.eligible_cols`.
+* Fix `numerator()` and `denominator()` returning `NULL` for every weighted model; they now return the fitted per-arm numerator/denominator weight models.
+* Document that weight truncation applies only to the outcome-model fit.
+* Pass the formula cache to `inline.pred()` in the weight models
+* Fix `SEQOpts()` argument ordering.
 
 # SEQTaRget v1.4.2