From d01ee0fbc4289723f473c9ec182cb48ea1cd80d0 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 15:43:55 +0100 Subject: [PATCH 01/23] Add weight.statistics when hazard = TRUE --- SEQTaRget/R/SEQuential.R | 1 + SEQTaRget/tests/testthat/test_hazard.R | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R index ba420a19..b430cbf7 100644 --- a/SEQTaRget/R/SEQuential.R +++ b/SEQTaRget/R/SEQuential.R @@ -248,6 +248,7 @@ SEQuential <- function(data, id.col, time.col, eligible.col, treatment.col, outc label <- subgroups[[i]] models <- lapply(analytic, function(x) x$model[[i]]) hazard[[label]] <- internal.hazard(models, params, formula_cache) + weights[[label]] <- lapply(analytic, function(x) x$weighted_stats) } } rm(analytic) diff --git a/SEQTaRget/tests/testthat/test_hazard.R b/SEQTaRget/tests/testthat/test_hazard.R index c2a34ea8..0e45594d 100644 --- a/SEQTaRget/tests/testthat/test_hazard.R +++ b/SEQTaRget/tests/testthat/test_hazard.R @@ -5,6 +5,22 @@ test_that("Hazard and vcov", { expect_s4_class(model, "SEQoutput") }) +test_that("Weighted hazard models populate weight.statistics", { + # Regression test: weight.statistics was NULL/empty for hazard = TRUE models + # even when weighted = TRUE, because the weights slot was only populated in the + # non-hazard branch of SEQuential(). The models were still weighted correctly, + # so the statistics should be reported. + data <- data.table::copy(SEQdata) + model <- suppressWarnings(SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), + method = "censoring", + options = SEQopts(hazard = TRUE, weighted = TRUE))) + expect_s4_class(model, "SEQoutput") + expect_true(length(model@weight.statistics) > 0) + expect_false(is.null(model@weight.statistics[[1]])) + expect_true(is.numeric(model@weight.statistics[[1]][[1]]$max)) +}) + test_that("Hazard estimate is reproducible with same seed", { skip_on_cran() args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), From 7b267abbfab1ac0e0a476cc9115ef81e8bc7a226 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 15:44:01 +0100 Subject: [PATCH 02/23] Bump version --- SEQTaRget/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SEQTaRget/DESCRIPTION b/SEQTaRget/DESCRIPTION index 26b22672..d16abb82 100644 --- a/SEQTaRget/DESCRIPTION +++ b/SEQTaRget/DESCRIPTION @@ -1,7 +1,7 @@ Package: SEQTaRget Type: Package Title: Sequential Trial Emulation -Version: 1.4.2.9001 +Version: 1.4.2.9002 Authors@R: c(person(given = "Ryan", family = "O'Dea", role = c("aut", "cre"), From 285eca19f99274856082050bae21edbf60114666 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 15:53:13 +0100 Subject: [PATCH 03/23] Populate outcome.model when hazard = TRUE --- SEQTaRget/R/SEQuential.R | 1 + SEQTaRget/tests/testthat/test_hazard.R | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R index b430cbf7..da4ed14c 100644 --- a/SEQTaRget/R/SEQuential.R +++ b/SEQTaRget/R/SEQuential.R @@ -248,6 +248,7 @@ SEQuential <- function(data, id.col, time.col, eligible.col, treatment.col, outc label <- subgroups[[i]] models <- lapply(analytic, function(x) x$model[[i]]) hazard[[label]] <- internal.hazard(models, params, formula_cache) + outcome[[label]] <- lapply(models, function(x) clean_fastglm(x$model)) weights[[label]] <- lapply(analytic, function(x) x$weighted_stats) } } diff --git a/SEQTaRget/tests/testthat/test_hazard.R b/SEQTaRget/tests/testthat/test_hazard.R index 0e45594d..041d814e 100644 --- a/SEQTaRget/tests/testthat/test_hazard.R +++ b/SEQTaRget/tests/testthat/test_hazard.R @@ -21,6 +21,21 @@ test_that("Weighted hazard models populate weight.statistics", { expect_true(is.numeric(model@weight.statistics[[1]][[1]]$max)) }) +test_that("Hazard models populate outcome.model", { + # Regression test: outcome.model was NULL for hazard = TRUE models even though + # the pooled-logistic outcome models are fit and consumed by internal.hazard(). + # The cleaned outcome models should be returned, as they are for hazard = FALSE. + data <- data.table::copy(SEQdata) + model <- suppressWarnings(SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), + method = "censoring", + options = SEQopts(hazard = TRUE, weighted = TRUE))) + expect_s4_class(model, "SEQoutput") + expect_true(length(model@outcome.model) > 0) + expect_false(is.null(model@outcome.model[[1]])) + expect_true(length(coef(model@outcome.model[[1]][[1]])) > 0) +}) + test_that("Hazard estimate is reproducible with same seed", { skip_on_cran() args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), From c8bf5952441b1a1b82013ce84f181b140576d54e Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 16:40:02 +0100 Subject: [PATCH 04/23] Add warning if numerator and denominator are the same --- SEQTaRget/R/class_setters.R | 8 ++++++++ SEQTaRget/tests/testthat/test_warnings.R | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/SEQTaRget/R/class_setters.R b/SEQTaRget/R/class_setters.R index de390275..8051d7d9 100644 --- a/SEQTaRget/R/class_setters.R +++ b/SEQTaRget/R/class_setters.R @@ -137,6 +137,14 @@ parameter.simplifier <- function(params) { warning("Without LTFU or Visit, weighted ITT model is not supported, automatically changed to weighted = FALSE") params@weighted <- FALSE } + + if (params@weighted && params@method != "ITT" && + !is.na(params@numerator) && !is.na(params@denominator) && + identical(params@numerator, params@denominator)) { + warning("Numerator and denominator weight models use identical covariates ('", params@numerator, + "'); the stabilized weights will all equal 1 (i.e., no weighting). The denominator should ", + "typically include the time-varying confounders that the numerator omits - check for a typo in either or both of 'numerator' and 'denominator'.") + } if (params@followup.class && params@followup.spline) stop("Followup cannot be both a class and a spline, please select one.") if (!params@plot.type %in% c("survival", "risk", "inc")) stop("Supported plot types are 'survival', 'risk', and 'inc' (in the case of censoring), please select one.") diff --git a/SEQTaRget/tests/testthat/test_warnings.R b/SEQTaRget/tests/testthat/test_warnings.R index 294e9b5e..6fbb9bd8 100644 --- a/SEQTaRget/tests/testthat/test_warnings.R +++ b/SEQTaRget/tests/testthat/test_warnings.R @@ -36,3 +36,25 @@ test_that("Unexcused Excused Censoring", { options = SEQopts(weighted = TRUE, excused = TRUE) )) }) + +test_that("Identical numerator and denominator weight models warn about unit weights", { + # A common typo (denominator pointed at the numerator covariates) makes the two + # weight models identical, so every stabilized weight is exactly 1 (no weighting). + data <- data.table::copy(SEQdata) + expect_warning( + SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring", + options = SEQopts(weighted = TRUE, numerator = "N+L+P", denominator = "N+L+P")), + regexp = "identical covariates" + ) +}) + +test_that("Differing numerator and denominator weight models do not warn about unit weights", { + data <- data.table::copy(SEQdata) + ws <- testthat::capture_warnings( + SEQuential(data, "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring", + options = SEQopts(weighted = TRUE, numerator = "sex", denominator = "N+L+P")) + ) + expect_false(any(grepl("identical covariates", ws))) +}) From b05919242135b9bbcfe1840512911e08b30b4830 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 15:54:47 +0100 Subject: [PATCH 05/23] Update NEWS.md --- SEQTaRget/NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md index b23de83a..87fbd26a 100644 --- a/SEQTaRget/NEWS.md +++ b/SEQTaRget/NEWS.md @@ -5,6 +5,8 @@ * Speed up the hazard ratio calculation by fitting the Cox model with the survival C fitters directly on a prebuilt design matrix instead of `coxph(formula, data)`, avoiding the `model.frame`/`model.matrix` rebuild on every bootstrap iteration: `survival::coxph.fit()` for the non-competing-event model and `survival::agreg.fit()` for the competing-event Fine-Gray (counting-process) model. The hazard ratio and CIs are unchanged. * Fix the competing-event Fine-Gray hazard fit to use the `finegray()` case weights (`fgwt`), which are required for a valid subdistribution-hazard estimate and were previously omitted. This is a no-op for the current hazard simulation (which has only administrative censoring, so all `fgwt` are 1) but corrects the estimate should the simulated data ever carry random censoring. * Report competing events per treatment arm in the `@info` slot as `info$compevent.unique` and `info$compevent.nonunique`, mirroring the structure of `info$outcome.unique` / `info$outcome.nonunique`. Both are grouped by baseline treatment; the non-unique table counts all competing event occurrences in the expanded data and the unique table counts distinct subjects who experienced the competing event. Both are `NA` when no `compevent` is specified. +* From a `SEQuential()` fit, populate `weight.statistics` and `outcome.model` when `hazard = TRUE`. +* Warn when the `numerator` and `denominator` weight models are given identical covariates. In that case the stabilized weights all equal 1 (i.e., no weighting), which is usually a typo in the `denominator` argument. # SEQTaRget v1.4.2 From 43546a64feacb01030521fd46d4f5e7905a04bfb Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 17:32:00 +0100 Subject: [PATCH 06/23] Improve SEQuential helpfile --- SEQTaRget/R/SEQuential.R | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/SEQTaRget/R/SEQuential.R b/SEQTaRget/R/SEQuential.R index da4ed14c..b8437c76 100644 --- a/SEQTaRget/R/SEQuential.R +++ b/SEQTaRget/R/SEQuential.R @@ -29,15 +29,37 @@ #' @examples #' \donttest{ #' data <- SEQdata -#' model <- SEQuential(data, id.col = "ID", -#' time.col = "time", -#' eligible.col = "eligible", -#' treatment.col = "tx_init", -#' outcome.col = "outcome", -#' time_varying.cols = c("N", "L", "P"), -#' fixed.cols = "sex", -#' method = "ITT", -#' options = SEQopts()) +#' +#' # Intention-to-treat (ITT) effect: subjects are assigned to the treatment +#' # arm defined by their baseline treatment and followed regardless of any later +#' # treatment changes, so no weighting is required. +#' SEQuential(data, id.col = "ID", +#' time.col = "time", +#' eligible.col = "eligible", +#' treatment.col = "tx_init", +#' outcome.col = "outcome", +#' time_varying.cols = c("N", "L", "P"), +#' fixed.cols = "sex", +#' method = "ITT", +#' options = SEQopts()) +#' +#' # Per-protocol effect via artificial censoring: subjects are censored when they +#' # deviate from their assigned strategy, and inverse-probability-of-censoring +#' # weights adjust for the resulting selection bias. The denominator models the +#' # probability of remaining uncensored given the time-varying confounders, while +#' # the numerator uses only the baseline covariates to stabilize the weights (so +#' # the two formulas must differ - identical formulas give weights of 1). +#' SEQuential(data, id.col = "ID", +#' time.col = "time", +#' eligible.col = "eligible", +#' treatment.col = "tx_init", +#' outcome.col = "outcome", +#' time_varying.cols = c("N", "L", "P"), +#' fixed.cols = "sex", +#' method = "censoring", +#' options = SEQopts(weighted = TRUE, +#' numerator = "sex", +#' denominator = "N + L + P + sex")) #' } #' #' @export From 1107b4924693b53a207e4129c2c7bb430fbfa4e8 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 17:32:06 +0100 Subject: [PATCH 07/23] Document --- SEQTaRget/man/SEQuential.Rd | 40 ++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/SEQTaRget/man/SEQuential.Rd b/SEQTaRget/man/SEQuential.Rd index 94c9a754..7a319d5b 100644 --- a/SEQTaRget/man/SEQuential.Rd +++ b/SEQTaRget/man/SEQuential.Rd @@ -57,15 +57,37 @@ and per-protocol effects, and can adjust for potential selection bias induced by \examples{ \donttest{ data <- SEQdata -model <- SEQuential(data, id.col = "ID", - time.col = "time", - eligible.col = "eligible", - treatment.col = "tx_init", - outcome.col = "outcome", - time_varying.cols = c("N", "L", "P"), - fixed.cols = "sex", - method = "ITT", - options = SEQopts()) + +# Intention-to-treat (ITT) effect: subjects are assigned to the treatment +# arm defined by their baseline treatment and followed regardless of any later +# treatment changes, so no weighting is required. +SEQuential(data, id.col = "ID", + time.col = "time", + eligible.col = "eligible", + treatment.col = "tx_init", + outcome.col = "outcome", + time_varying.cols = c("N", "L", "P"), + fixed.cols = "sex", + method = "ITT", + options = SEQopts()) + +# Per-protocol effect via artificial censoring: subjects are censored when they +# deviate from their assigned strategy, and inverse-probability-of-censoring +# weights adjust for the resulting selection bias. The denominator models the +# probability of remaining uncensored given the time-varying confounders, while +# the numerator uses only the baseline covariates to stabilize the weights (so +# the two formulas must differ - identical formulas give weights of 1). +SEQuential(data, id.col = "ID", + time.col = "time", + eligible.col = "eligible", + treatment.col = "tx_init", + outcome.col = "outcome", + time_varying.cols = c("N", "L", "P"), + fixed.cols = "sex", + method = "censoring", + options = SEQopts(weighted = TRUE, + numerator = "sex", + denominator = "N + L + P + sex")) } } From 7a7b133da83c34585497e9dd679d4ae68c3ab21c Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Wed, 3 Jun 2026 17:08:20 +0100 Subject: [PATCH 08/23] Add proper tests for selection.random --- SEQTaRget/tests/testthat/test_coverage.R | 46 +++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 8fd1f169..3ce73c54 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -221,7 +221,10 @@ test_that("selection.first_trial restricts to first trial per subject", { # ── SEQexpand.R: selection.random ─────────────────────────────────────────── -test_that("selection.random subsamples controls", { +test_that("SEQuential runs with selection.prob set (smoke test)", { + # Smoke test only: selection.prob has no effect unless selection.random = TRUE, + # so this just confirms the option is accepted and the run completes. The actual + # subsampling behaviour is checked in the two tests below. model <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), method = "ITT", @@ -229,6 +232,47 @@ test_that("selection.random subsamples controls", { expect_s4_class(model, "SEQoutput") }) +test_that("selection.random keeps all treated starts and subsamples control starts", { + skip_on_cran() + prob <- 0.5 + + # One row per trial-start (followup == 0), counted by baseline treatment arm. + arm_counts <- function(DT) { + s <- DT[followup == 0, .N, by = tx_init_bas] + stats::setNames(s$N, as.character(s$tx_init_bas)) + } + + base <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), + method = "ITT", + options = SEQopts(data.return = TRUE, seed = 1L), verbose = FALSE)) + sel <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), + method = "ITT", + options = SEQopts(selection.random = TRUE, selection.prob = prob, + data.return = TRUE, seed = 1L), verbose = FALSE)) + + base_counts <- arm_counts(base@DT) + sel_counts <- arm_counts(sel@DT) + + # Treated trial-starts (baseline tx_init = 1) are all retained + expect_equal(sel_counts[["1"]], base_counts[["1"]]) + # Control trial-starts (baseline tx_init = 0) are reduced to the requested fraction + expect_lt(sel_counts[["0"]], base_counts[["0"]]) + expect_equal(sel_counts[["0"]], round(base_counts[["0"]] * prob)) +}) + +test_that("selection.random is reproducible with a fixed seed", { + skip_on_cran() + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "ITT", + options = SEQopts(selection.random = TRUE, selection.prob = 0.5, + data.return = TRUE, seed = 7L)) + run1 <- suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args))) + run2 <- suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args))) + expect_equal(run1@DT, run2@DT) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 9d467d20c91acec2802ac3025b42082925b82bf9 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:46:34 +0100 Subject: [PATCH 09/23] Add behavioural test for weight.lower/weight.upper truncation Replaces reliance on a smoke test: verifies the truncation is actually applied to the weights used in the outcome fit. Two clamp bands entirely above the weight range collapse all weights to a constant and, by GLM scale-invariance, give identical coefficients, while a genuinely varying-weight fit differs. --- SEQTaRget/tests/testthat/test_coverage.R | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 3ce73c54..1730ae86 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -273,6 +273,32 @@ test_that("selection.random is reproducible with a fixed seed", { expect_equal(run1@DT, run2@DT) }) +# ── internal_models.R: weight truncation ──────────────────────────────────── + +test_that("weight.lower/weight.upper truncate the weights used in the outcome fit", { + skip_on_cran() + # Truncation is applied to the weight vector passed to the GLM fit (not the + # returned @DT or weight.statistics), so its effect is checked through the + # fitted coefficients. SEQdata weights span ~0.54-2.16, so a band entirely + # above that range clamps every weight to the same constant. A GLM is invariant + # to a uniform scaling of its weights, so two such all-constant clamps must give + # identical coefficients, while a genuinely varying-weight fit must differ. + cf <- function(m) coef(m@outcome.model[[1]][[1]]) + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring") + mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args, + list(options = opts, verbose = FALSE)))) + + varying <- mk(SEQopts(weighted = TRUE, seed = 1L)) + clamp3 <- mk(SEQopts(weighted = TRUE, weight.lower = 3, weight.upper = 4, seed = 1L)) + clamp10 <- mk(SEQopts(weighted = TRUE, weight.lower = 10, weight.upper = 11, seed = 1L)) + + # Both bands clamp all weights to a constant -> scale-invariant, identical fit + expect_equal(cf(clamp3), cf(clamp10), tolerance = 1e-6) + # Clamping away the real weight variation changes the fit + expect_false(isTRUE(all.equal(cf(clamp3), cf(varying), tolerance = 1e-6))) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From f1666211328cfa27089ff6ceec4375b03602cdb7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:48:52 +0100 Subject: [PATCH 10/23] Add behavioural test for weight.p99 truncation Verifies weight.p99 = TRUE is equivalent to explicitly setting weight.lower/weight.upper to the p01/p99 reported in weight.statistics, and that it differs from an untruncated weighted fit. --- SEQTaRget/tests/testthat/test_coverage.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 1730ae86..638a2900 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -299,6 +299,27 @@ test_that("weight.lower/weight.upper truncate the weights used in the outcome fi expect_false(isTRUE(all.equal(cf(clamp3), cf(varying), tolerance = 1e-6))) }) +test_that("weight.p99 truncates at the 1st/99th percentile weights", { + skip_on_cran() + # weight.p99 = TRUE sets weight.lower/upper to the p01/p99 of the (untruncated) + # weights, which are reported in weight.statistics. So it must be equivalent to + # explicitly passing those percentiles as the bounds, and must differ from an + # untruncated weighted fit. + cf <- function(m) coef(m@outcome.model[[1]][[1]]) + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring") + mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args, + list(options = opts, verbose = FALSE)))) + + p99 <- mk(SEQopts(weighted = TRUE, weight.p99 = TRUE, seed = 1L)) + ws <- p99@weight.statistics[[1]][[1]] + expl <- mk(SEQopts(weighted = TRUE, weight.lower = ws$p01, weight.upper = ws$p99, seed = 1L)) + none <- mk(SEQopts(weighted = TRUE, seed = 1L)) + + expect_equal(cf(p99), cf(expl), tolerance = 1e-8) + expect_false(isTRUE(all.equal(cf(p99), cf(none), tolerance = 1e-6))) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 84309b324990e567663894fa71ccfe3498d40979 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:51:33 +0100 Subject: [PATCH 11/23] Add behavioural test for followup.include/trial.include Verifies each flag adds or drops its follow-up / trial terms (and their squares) in the fitted outcome-model coefficients, leaving the other flag's terms intact. --- SEQTaRget/tests/testthat/test_coverage.R | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 638a2900..14a4f4b9 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -320,6 +320,35 @@ test_that("weight.p99 truncates at the 1st/99th percentile weights", { expect_false(isTRUE(all.equal(cf(p99), cf(none), tolerance = 1e-6))) }) +# ── internal_covariates.R: followup.include / trial.include ────────────────── + +test_that("followup.include / trial.include add or drop their outcome-model terms", { + skip_on_cran() + # These flags control whether the follow-up and trial terms (and their squares) + # enter the outcome model formula (internal_covariates.R), so the effect is + # visible in the fitted coefficient names. + nm <- function(m) names(coef(m@outcome.model[[1]][[1]])) + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "ITT") + mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args, + list(options = opts, verbose = FALSE)))) + + both <- nm(mk(SEQopts())) # both included by default + no_fup <- nm(mk(SEQopts(followup.include = FALSE))) + no_trial <- nm(mk(SEQopts(trial.include = FALSE))) + + # Baseline: all four terms present + expect_true(all(c("followup", "followup_sq", "trial", "trial_sq") %in% both)) + + # followup.include = FALSE drops the follow-up terms but keeps the trial terms + expect_false(any(c("followup", "followup_sq") %in% no_fup)) + expect_true(all(c("trial", "trial_sq") %in% no_fup)) + + # trial.include = FALSE drops the trial terms but keeps the follow-up terms + expect_false(any(c("trial", "trial_sq") %in% no_trial)) + expect_true(all(c("followup", "followup_sq") %in% no_trial)) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 14065fe2dcbcf55dac557cc24652e026d72c29c0 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:54:04 +0100 Subject: [PATCH 12/23] Add behavioural test for followup.class Verifies followup.class = TRUE encodes follow-up as a factor: the linear followup / followup_sq terms are gone and there is one dummy coefficient per non-reference follow-up level. --- SEQTaRget/tests/testthat/test_coverage.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 14a4f4b9..f84bf904 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -349,6 +349,29 @@ test_that("followup.include / trial.include add or drop their outcome-model term expect_true(all(c("followup", "followup_sq") %in% no_trial)) }) +# ── internal_models.R: followup.class ──────────────────────────────────────── + +test_that("followup.class encodes follow-up as a factor (one term per level)", { + skip_on_cran() + # followup.class = TRUE treats follow-up as categorical (internal_models.R coerces + # it to a factor), so the outcome model gains one dummy per non-reference level + # instead of the linear followup / followup_sq pair. It is exclusive with + # followup.include, so that is switched off here. + m <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), method = "ITT", + options = SEQopts(followup.include = FALSE, followup.class = TRUE, + data.return = TRUE), verbose = FALSE)) + cf <- names(coef(m@outcome.model[[1]][[1]])) + dummies <- grep("^followup[0-9]+$", cf, value = TRUE) + + # Categorical, not continuous: no linear follow-up terms + expect_false("followup" %in% cf) + expect_false("followup_sq" %in% cf) + # One dummy per non-reference follow-up level + expect_gt(length(dummies), 2L) + expect_equal(length(dummies), length(unique(m@DT$followup)) - 1L) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 6bb857b020c9c579bd57b8667ca0e080434a6d24 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:56:56 +0100 Subject: [PATCH 13/23] Add behavioural test for weight.lag_condition Previously untested. Verifies weight.lag_condition = TRUE fits each arm's weight model only on its treatment-lag stratum (per-arm observation counts differ and sum to the full data), while FALSE fits both arms on the full data (equal counts). --- SEQTaRget/tests/testthat/test_coverage.R | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index f84bf904..f6c671ad 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -372,6 +372,32 @@ test_that("followup.class encodes follow-up as a factor (one term per level)", { expect_equal(length(dummies), length(unique(m@DT$followup)) - 1L) }) +# ── internal_glmHelpers.R: weight.lag_condition ────────────────────────────── + +test_that("weight.lag_condition conditions each arm's weight model on its treatment lag", { + skip_on_cran() + # weight.lag_condition = TRUE (default) fits each treatment arm's weight model + # only on the rows in that arm's treatment-lag stratum (prepare.data_cached + # subsets on tx_lag == model); = FALSE fits both arms on the full data. The + # number of observations behind each fitted denominator model makes this visible. + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring") + mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(SEQdata)), args, + list(options = opts, verbose = FALSE)))) + # Observations behind each per-arm denominator model (fastglm df.null + 1) + nobs <- function(m) vapply(m@weight.statistics[[1]][[1]]$coef.denominator, + function(x) x$df.null + 1L, integer(1)) + + on <- nobs(mk(SEQopts(weighted = TRUE, weight.lag_condition = TRUE, seed = 1L))) + off <- nobs(mk(SEQopts(weighted = TRUE, weight.lag_condition = FALSE, seed = 1L))) + + # FALSE: both arms fit on the full data -> equal observation counts + expect_equal(off[[1]], off[[2]]) + # TRUE: arms fit on disjoint treatment-lag strata that partition that full data + expect_false(on[[1]] == on[[2]]) + expect_equal(on[[1]] + on[[2]], off[[1]]) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 5a3ddbe91955a879b79a2e094c936bed1affbebb Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 10:58:50 +0100 Subject: [PATCH 14/23] Add behavioural test for followup.min / followup.max Verifies the expanded data (via expand.only) is clamped to the requested [followup.min, followup.max] window, while the unrestricted expansion spans beyond it. --- SEQTaRget/tests/testthat/test_coverage.R | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index f6c671ad..2a25c1c6 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -398,6 +398,27 @@ test_that("weight.lag_condition conditions each arm's weight model on its treatm expect_equal(on[[1]] + on[[2]], off[[1]]) }) +# ── SEQexpand.R: followup.min / followup.max ───────────────────────────────── + +test_that("followup.min / followup.max restrict the expanded follow-up range", { + skip_on_cran() + # SEQexpand.R filters the expanded rows to followup in [followup.min, followup.max]. + # expand.only returns that expanded data.table directly. + mk <- function(opts) suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", + "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "ITT", options = opts, verbose = FALSE)) + + full <- mk(SEQopts(expand.only = TRUE)) + lim <- mk(SEQopts(expand.only = TRUE, followup.min = 3, followup.max = 10)) + + # The unrestricted expansion genuinely extends past the requested window + expect_lt(min(full$followup), 3) + expect_gt(max(full$followup), 10) + # The restricted expansion is clamped to exactly [3, 10] and has fewer rows + expect_equal(range(lim$followup), c(3, 10)) + expect_lt(nrow(lim), nrow(full)) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From f222950d185b2389e513287a651e1dda2af48f81 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 11:02:22 +0100 Subject: [PATCH 15/23] Add behavioural test for weight.eligible_cols Verifies each arm's weight model is fit only on rows where its weight.eligible_cols indicator == 1, by checking the per-arm denominator observation counts drop versus an unfiltered fit. --- SEQTaRget/tests/testthat/test_coverage.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index 2a25c1c6..cad8a0b2 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -419,6 +419,30 @@ test_that("followup.min / followup.max restrict the expanded follow-up range", { expect_lt(nrow(lim), nrow(full)) }) +# ── internal_weights.R: weight.eligible_cols ───────────────────────────────── + +test_that("weight.eligible_cols restricts the weight models to eligible rows", { + skip_on_cran() + # Each arm's weight model is fit only on rows where its weight.eligible_cols + # indicator == 1 (internal_weights.R). With a roughly half-on indicator, the + # per-arm denominator model is fit on fewer observations than without it. + d <- copy(SEQdata) + d[, welig := as.integer(N > median(N))] # balanced 0/1 eligibility indicator + args <- list("ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), + method = "censoring") + mk <- function(opts) suppressWarnings(do.call(SEQuential, c(list(data = copy(d)), args, + list(options = opts, verbose = FALSE)))) + nobs <- function(m) vapply(m@weight.statistics[[1]][[1]]$coef.denominator, + function(x) x$df.null + 1L, integer(1)) + + base <- nobs(mk(SEQopts(weighted = TRUE, seed = 1L))) + elig <- nobs(mk(SEQopts(weighted = TRUE, + weight.eligible_cols = list("welig", "welig"), seed = 1L))) + + # Both arms' models drop the ineligible (welig == 0) rows + expect_true(all(elig < base)) +}) + # ── SEQuential.R: validation paths ────────────────────────────────────────── test_that("SEQuential errors on non-binary outcome", { From 7feb05a1c84d73411d68b5ff3ed27b95948af2ec Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 11:26:29 +0100 Subject: [PATCH 16/23] Fix numerator()/denominator() accessors returning NULL They looked up non-existent fields (n0.coef/d0.coef) instead of the coef.numerator/coef.denominator lists actually stored in weight.statistics, so they silently returned NULL for every weighted model. Index the per-arm models correctly and guard the ITT case where no treatment-weight models exist. Add a regression test. --- SEQTaRget/R/class_methods.R | 14 ++++++++---- SEQTaRget/tests/testthat/test_coverage.R | 28 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/SEQTaRget/R/class_methods.R b/SEQTaRget/R/class_methods.R index 6232a365..b87013ce 100644 --- a/SEQTaRget/R/class_methods.R +++ b/SEQTaRget/R/class_methods.R @@ -184,8 +184,11 @@ setMethod("show", "SEQoutput", function(object) { numerator <- function(object) { if (!is(object, "SEQoutput")) stop("Object is not of class SEQoutput") if (!object@params@weighted) stop("SEQuential process was not weighted") - return(list(numerator0 = lapply(object@weight.statistics, function(x) x[[1]]$n0.coef), - numerator1 = lapply(object@weight.statistics, function(x) x[[1]]$n1.coef))) + arm <- function(i) lapply(object@weight.statistics, function(x) { + coefs <- x[[1]]$coef.numerator + if (length(coefs) >= i) coefs[[i]] else NULL + }) + return(list(numerator0 = arm(1L), numerator1 = arm(2L))) } #' Retrieves Denominator Models from SEQuential object @@ -198,8 +201,11 @@ numerator <- function(object) { denominator <- function(object) { if (!is(object, "SEQoutput")) stop("Object is not of class SEQoutput") if (!object@params@weighted) stop("SEQuential process was not weighted") - return(list(denominator0 = lapply(object@weight.statistics, function(x) x[[1]]$d0.coef), - denominator1 = lapply(object@weight.statistics, function(x) x[[1]]$d1.coef))) + arm <- function(i) lapply(object@weight.statistics, function(x) { + coefs <- x[[1]]$coef.denominator + if (length(coefs) >= i) coefs[[i]] else NULL + }) + return(list(denominator0 = arm(1L), denominator1 = arm(2L))) } #' Retrieves Outcome Models from SEQuential object diff --git a/SEQTaRget/tests/testthat/test_coverage.R b/SEQTaRget/tests/testthat/test_coverage.R index cad8a0b2..5af141c6 100644 --- a/SEQTaRget/tests/testthat/test_coverage.R +++ b/SEQTaRget/tests/testthat/test_coverage.R @@ -79,6 +79,34 @@ test_that("numerator/denominator error on unweighted model", { expect_error(denominator(model), "weighted") }) +test_that("numerator/denominator return the per-arm weight models", { + # Regression test: these accessors looked up non-existent fields (n0.coef etc.) + # and silently returned NULL. They should return the fitted per-arm numerator + # and denominator weight models held in weight.statistics$coef.numerator / + # $coef.denominator. + skip_on_cran() + model <- suppressWarnings(SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", + list("N", "L", "P"), list("sex"), + method = "censoring", options = SEQopts(weighted = TRUE), verbose = FALSE)) + num <- numerator(model) + den <- denominator(model) + expect_named(num, c("numerator0", "numerator1")) + expect_named(den, c("denominator0", "denominator1")) + + # Non-NULL fitted models with extractable coefficients (one per treatment arm) + expect_false(is.null(num$numerator0[[1]])) + expect_false(is.null(num$numerator1[[1]])) + expect_true(length(coef(num$numerator0[[1]])) > 0) + expect_true(length(coef(den$denominator1[[1]])) > 0) + + # They are exactly the models stored in weight.statistics + ws <- model@weight.statistics[[1]][[1]] + expect_identical(num$numerator0[[1]], ws$coef.numerator[[1]]) + expect_identical(num$numerator1[[1]], ws$coef.numerator[[2]]) + expect_identical(den$denominator0[[1]], ws$coef.denominator[[1]]) + expect_identical(den$denominator1[[1]], ws$coef.denominator[[2]]) +}) + test_that("km_curve errors on invalid plot.type", { model <- SEQuential(copy(SEQdata), "ID", "time", "eligible", "tx_init", "outcome", list("N", "L", "P"), list("sex"), From 265f96e0d768734d1ebf0a539f692cfa0ff98cd7 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 11:27:03 +0100 Subject: [PATCH 17/23] Document that weight truncation applies only to the outcome-model fit Clarify in the weight.lower/weight.upper/weight.p99 docs that weight.statistics and the returned data report the untruncated weights; truncation affects only the weights used to fit the outcome model. --- SEQTaRget/R/SEQopts.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SEQTaRget/R/SEQopts.R b/SEQTaRget/R/SEQopts.R index 8e4a038a..5c215348 100644 --- a/SEQTaRget/R/SEQopts.R +++ b/SEQTaRget/R/SEQopts.R @@ -57,11 +57,11 @@ #' @param visit.denominator String: visit denominator covariates to the right hand side of a formula object #' @param visit.numerator String: visit numerator covariates to the right hand side of a formula object #' @param weight.eligible_cols List: list of column names for indicator columns defining which weights are eligible for weight models - in order of \code{treat.level} -#' @param weight.lower Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is `0` +#' @param weight.lower Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is `0`. Truncation is applied only to the weights used to fit the outcome model; the weights reported in \code{weight.statistics} and in the returned data (when \code{data.return = TRUE}) are the untruncated values. #' @param weight.lag_condition Logical: whether weights should be conditioned on treatment lag value, default `TRUE` -#' @param weight.p99 Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower} +#' @param weight.p99 Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}. The percentiles are taken from the untruncated weight distribution (as reported in \code{weight.statistics}), and as with \code{weight.lower}/\code{weight.upper} the truncation affects only the weights used to fit the outcome model. #' @param weight.preexpansion Logical: whether weighting should be done on pre-expanded data, default `TRUE` -#' @param weight.upper Numeric: weights truncated at upper end at this weight, default is `Inf` +#' @param weight.upper Numeric: weights truncated at upper end at this weight, default is `Inf`. As with \code{weight.lower}, truncation affects only the weights used to fit the outcome model, not those reported in \code{weight.statistics} or the returned data. #' @param weighted Logical: whether or not to perform weighted analysis, default is `FALSE` #' @returns An object of class 'SEQopts' #' @export From 607166da1096a1e63e0813baf149ac864dd5b639 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 11:27:10 +0100 Subject: [PATCH 18/23] Document --- SEQTaRget/man/SEQopts.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SEQTaRget/man/SEQopts.Rd b/SEQTaRget/man/SEQopts.Rd index cf6b9d1c..73c171ed 100644 --- a/SEQTaRget/man/SEQopts.Rd +++ b/SEQTaRget/man/SEQopts.Rd @@ -185,15 +185,15 @@ SEQopts( \item{weight.eligible_cols}{List: list of column names for indicator columns defining which weights are eligible for weight models - in order of \code{treat.level}} -\item{weight.lower}{Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is \code{0}} +\item{weight.lower}{Numeric: IPCW weights truncated at this lower bound, must be non-negative, default is \code{0}. Truncation is applied only to the weights used to fit the outcome model; the weights reported in \code{weight.statistics} and in the returned data (when \code{data.return = TRUE}) are the untruncated values.} \item{weight.lag_condition}{Logical: whether weights should be conditioned on treatment lag value, default \code{TRUE}} -\item{weight.p99}{Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}} +\item{weight.p99}{Logical: forces weight truncation at 1st and 99th percentile weights, will override provided \code{weight.upper} and \code{weight.lower}. The percentiles are taken from the untruncated weight distribution (as reported in \code{weight.statistics}), and as with \code{weight.lower}/\code{weight.upper} the truncation affects only the weights used to fit the outcome model.} \item{weight.preexpansion}{Logical: whether weighting should be done on pre-expanded data, default \code{TRUE}} -\item{weight.upper}{Numeric: weights truncated at upper end at this weight, default is \code{Inf}} +\item{weight.upper}{Numeric: weights truncated at upper end at this weight, default is \code{Inf}. As with \code{weight.lower}, truncation affects only the weights used to fit the outcome model, not those reported in \code{weight.statistics} or the returned data.} \item{weighted}{Logical: whether or not to perform weighted analysis, default is \code{FALSE}} } From 9b7603ee5a8e88d0f051001e5390469e63309d99 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 4 Jun 2026 12:37:52 +0100 Subject: [PATCH 19/23] Add thanks to Francesca --- SEQTaRget/NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md index 87fbd26a..0b634d46 100644 --- a/SEQTaRget/NEWS.md +++ b/SEQTaRget/NEWS.md @@ -13,7 +13,7 @@ * Remove mention of units from time in docs. * Improve memory usage in the bootstrapping. * Fix off-by-one labeling in survival output so that `followup = k` correctly represents survival after `k` intervals, adding a row at `followup = survival.max + 1` for the final interval's estimate. -* Fix expansion bug where subjects experiencing the outcome early were incorrectly carried forward with `outcome=0` rows from subsequent periods by truncating each trial at the first event row +* Fix expansion bug where subjects experiencing the outcome early were incorrectly carried forward with `outcome=0` rows from subsequent periods by truncating each trial at the first event row (thanks, @francescazaccagnino) * Add `expand.only` option to `SEQopts()`. When `TRUE`, `SEQuential()` returns the expanded `data.table` directly and skips the analysis steps, for users who want to inspect or store the expanded dataset on its own. * Fix `followup.spline = TRUE` so the basis is genuinely non-linear. Splines are now built into the model formula via `splines::ns()` instead of being applied as a single-column transform of `followup`, and the new `followup.spline.df` option (default `4`) controls the number of basis functions. The treatment-by-followup interaction now uses the same spline basis. Knots are baked from the full expanded `followup` once at fit time so the basis is identical at fit and prediction time across bootstraps and survival grids. Internally, formula column extraction now uses `all.vars()`, so user-supplied covariates may include `ns()`, `bs()`, `I()`, `factor()`, `poly()` etc. without breaking expansion. * Rename `format.time()` to `format_time()` because it wasn't an S3 method and hence was causing roxygen2 to write incorrect information in its helpfile. From ef7ef870810987d37bdeecd734f4181c0ea2e4e5 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Sat, 6 Jun 2026 20:19:07 +0100 Subject: [PATCH 20/23] Pass the formula cache to inline.pred() in the weight models Routes the weight numerator/denominator/LTFU/visit predictions through the cached formula path, matching the survival and hazard paths. Results are bit-identical; this is a consistency cleanup, not a perf change. --- SEQTaRget/R/internal_weights.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/SEQTaRget/R/internal_weights.R b/SEQTaRget/R/internal_weights.R index 628456d9..27676534 100644 --- a/SEQTaRget/R/internal_weights.R +++ b/SEQTaRget/R/internal_weights.R @@ -126,8 +126,8 @@ internal.weights <- function(DT, data, params, cache) { out[tx_lag == level, `:=`(numerator = 1, denominator = 1)] } else { out[tx_lag == level, `:=`( - numerator = inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = params@multinomial, target = level), - denominator = inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = params@multinomial, target = level))] + numerator = inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = params@multinomial, target = level, cache = cache), + denominator = inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = params@multinomial, target = level, cache = cache))] if (i == 1) { out[tx_lag == level & get(params@treatment) == params@treat.level[[i]], @@ -146,7 +146,7 @@ internal.weights <- function(DT, data, params, cache) { if (!is.na(col)) { out[tx_lag == level & get(col) != 1, - denominator := inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = multi, target = level)] + denominator := inline.pred(denominator_models[[i]], .SD, params, "denominator", multi = multi, target = level, cache = cache)] if (i == 1) { out[tx_lag == level & get(params@treatment) == params@treat.level[[i]] & @@ -170,7 +170,7 @@ internal.weights <- function(DT, data, params, cache) { col <- if (params@excused) params@excused.cols[[i]] else params@deviation.excused_cols[[i]] if (!is.na(col)) { out[get(params@treatment) == level & get(col) == 0, - numerator := inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = multi, target = level) + numerator := inline.pred(numerator_models[[i]], .SD, params, "numerator", multi = multi, target = level, cache = cache) ] } } @@ -181,15 +181,15 @@ internal.weights <- function(DT, data, params, cache) { if (params@LTFU) { if (params@method == "ITT") out <- out[, `:=` (numerator = 1, denominator = 1)] - out <- out[, `:=` (cense1.numerator = inline.pred(cense.numerator, .SD, params, "numerator", "LTFU"), - cense1.denominator = inline.pred(cense.denominator, .SD, params, "denominator", "LTFU")) + out <- out[, `:=` (cense1.numerator = inline.pred(cense.numerator, .SD, params, "numerator", "LTFU", cache = cache), + cense1.denominator = inline.pred(cense.denominator, .SD, params, "denominator", "LTFU", cache = cache)) ][, cense1 := cense1.numerator / cense1.denominator] } if (!is.na(params@visit)) { if (params@method == "ITT") out <- out[, `:=` (numerator = 1, denominator = 1)] - out <- out[, `:=` (visit.numerator = inline.pred(visit.numerator, .SD, params, "numerator", "visit"), - visit.denominator = inline.pred(visit.denominator, .SD, params, "denominator", "visit")) + out <- out[, `:=` (visit.numerator = inline.pred(visit.numerator, .SD, params, "numerator", "visit", cache = cache), + visit.denominator = inline.pred(visit.denominator, .SD, params, "denominator", "visit", cache = cache)) ][, visit := visit.numerator / visit.denominator] } From 702b73920fa6946a965ed73f62935ce5e8aea59d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Sun, 7 Jun 2026 07:30:56 +0100 Subject: [PATCH 21/23] Fix argument ordering --- SEQTaRget/R/SEQopts.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/SEQTaRget/R/SEQopts.R b/SEQTaRget/R/SEQopts.R index 5c215348..5d2d7c13 100644 --- a/SEQTaRget/R/SEQopts.R +++ b/SEQTaRget/R/SEQopts.R @@ -22,14 +22,13 @@ #' @param excused Logical: in the case of censoring, whether there is an excused condition, default is `FALSE` #' @param excused.cols List: list of column names for treatment switch excuses - should be the same length, and ordered the same as \code{treat.level} #' @param fastglm.method Integer: decomposition method for fastglm (`0L`-column-pivoted QR, `1L`-unpivoted QR, `2L`-LLT Cholesky, `3L`-LDLT Cholesky), default is `2L` -#' @param glm.package Character: package to use for fitting GLMs, either `"fastglm"` (default) or `"parglm"`. When `"parglm"` is selected the `nthreads` option controls the number of threads passed to `parglm::parglm.fit()`. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) `"fastglm"` is faster; `"parglm"` may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead. -#' @param parglm.control A control object from [parglm::parglm.control()] to pass to `parglm::parglm.fit()`. Only used when `glm.package = "parglm"`. Defaults to `parglm::parglm.control(method = "FAST")`. If you encounter a `chol(): decomposition failed` error (e.g. with near-singular model matrices on large datasets), pass `parglm.control = parglm::parglm.control(method = "LAPACK")` to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend. #' @param followup.class Logical: treat followup as a class, e.g. expands every time to it's own indicator column, default is `FALSE` #' @param followup.include Logical: whether or not to include 'followup' and 'followup_squared' in the outcome model, default is `TRUE` #' @param followup.max Numeric: maximum time to expand about, default is `Inf` (no maximum) #' @param followup.min Numeric: minimum follow-up time since trial enrollment to include, must be non-negative, default is `0` #' @param followup.spline Logical: treat followup as a natural cubic spline (`splines::ns()`), default is `FALSE` #' @param followup.spline.df Integer: degrees of freedom passed to `splines::ns()` when `followup.spline = TRUE`. With `df = k`, `ns()` places `k - 1` interior knots at quantiles of `followup`. Must be `>= 1`; `df = 1` is equivalent to a linear term and is generally not what you want. Default is `4` (3 interior knots). +#' @param glm.package Character: package to use for fitting GLMs, either `"fastglm"` (default) or `"parglm"`. When `"parglm"` is selected the `nthreads` option controls the number of threads passed to `parglm::parglm.fit()`. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) `"fastglm"` is faster; `"parglm"` may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead. #' @param hazard Logical: hazard error calculation instead of survival estimation, default is `FALSE` #' @param indicator.baseline String: identifier for baseline variables in \code{covariates, numerator, denominator} - intended as an override #' @param indicator.squared String: identifier for squared variables in \code{covariates, numerator, denominator} - intended as an override @@ -39,6 +38,7 @@ #' @param nthreads Integer: number of threads to use for data.table processing, default is [data.table::getDTthreads()] #' @param numerator String: numerator covariates to the right hand side of a formula object #' @param parallel Logical: define if the SEQuential process is run in parallel, default is `FALSE` +#' @param parglm.control A control object from [parglm::parglm.control()] to pass to `parglm::parglm.fit()`. Only used when `glm.package = "parglm"`. Defaults to `parglm::parglm.control(method = "FAST")`. If you encounter a `chol(): decomposition failed` error (e.g. with near-singular model matrices on large datasets), pass `parglm.control = parglm::parglm.control(method = "LAPACK")` to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend. #' @param plot.colors Character: Colors for output plot if \code{km.curves = TRUE}, defaulted to ggplot2 defaults #' @param plot.labels Character: Color labels for output plot if \code{km.curves = TRUE} in order e.g. \code{c("risk.0", "risk.1")} #' @param plot.subtitle Character: Subtitle for output plot if \code{km.curves = TRUE} @@ -72,11 +72,12 @@ SEQopts <- function(bootstrap = FALSE, bootstrap.nboot = 100, bootstrap.sample = cense = NA, cense.denominator = NA, cense.eligible = NA, cense.numerator = NA, compevent = NA, covariates = NA, data.return = FALSE, denominator = NA, deviation = FALSE, deviation.col = NA, deviation.conditions = c(NA, NA), deviation.excused = FALSE, deviation.excused_cols = c(NA, NA), - excused = FALSE, excused.cols = c(NA, NA), expand.only = FALSE, fastglm.method = 2L, glm.package = "fastglm", parglm.control = NULL, + excused = FALSE, excused.cols = c(NA, NA), expand.only = FALSE, fastglm.method = 2L, followup.class = FALSE, followup.include = TRUE, followup.max = Inf, followup.min = 0, followup.spline = FALSE, followup.spline.df = 4L, + glm.package = "fastglm", hazard = FALSE, indicator.baseline = "_bas", indicator.squared = "_sq", km.curves = FALSE, multinomial = FALSE, ncores = availableCores(omit = 1L), nthreads = getDTthreads(), - numerator = NA, parallel = FALSE, plot.colors = c("#F8766D", "#00BFC4", "#555555"), plot.labels = NA, plot.subtitle = NA, plot.title = NA, plot.type = "survival", + numerator = NA, parallel = FALSE, parglm.control = NULL, plot.colors = c("#F8766D", "#00BFC4", "#555555"), plot.labels = NA, plot.subtitle = NA, plot.title = NA, plot.type = "survival", risk.times = NA, seed = NULL, selection.first_trial = FALSE, selection.prob = 0.8, selection.random = FALSE, subgroup = NA, survival.max = Inf, treat.level = c(0, 1), trial.include = TRUE, From 9536e79e4e3c15f09142fb0635059c720904f41d Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Sun, 7 Jun 2026 07:31:04 +0100 Subject: [PATCH 22/23] Document --- SEQTaRget/man/SEQopts.Rd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/SEQTaRget/man/SEQopts.Rd b/SEQTaRget/man/SEQopts.Rd index 73c171ed..ace32f20 100644 --- a/SEQTaRget/man/SEQopts.Rd +++ b/SEQTaRget/man/SEQopts.Rd @@ -27,14 +27,13 @@ SEQopts( excused.cols = c(NA, NA), expand.only = FALSE, fastglm.method = 2L, - glm.package = "fastglm", - parglm.control = NULL, followup.class = FALSE, followup.include = TRUE, followup.max = Inf, followup.min = 0, followup.spline = FALSE, followup.spline.df = 4L, + glm.package = "fastglm", hazard = FALSE, indicator.baseline = "_bas", indicator.squared = "_sq", @@ -44,6 +43,7 @@ SEQopts( nthreads = getDTthreads(), numerator = NA, parallel = FALSE, + parglm.control = NULL, plot.colors = c("#F8766D", "#00BFC4", "#555555"), plot.labels = NA, plot.subtitle = NA, @@ -115,10 +115,6 @@ SEQopts( \item{fastglm.method}{Integer: decomposition method for fastglm (\code{0L}-column-pivoted QR, \code{1L}-unpivoted QR, \code{2L}-LLT Cholesky, \code{3L}-LDLT Cholesky), default is \code{2L}} -\item{glm.package}{Character: package to use for fitting GLMs, either \code{"fastglm"} (default) or \code{"parglm"}. When \code{"parglm"} is selected the \code{nthreads} option controls the number of threads passed to \code{parglm::parglm.fit()}. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) \code{"fastglm"} is faster; \code{"parglm"} may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.} - -\item{parglm.control}{A control object from \code{\link[parglm:parglm.control]{parglm::parglm.control()}} to pass to \code{parglm::parglm.fit()}. Only used when \code{glm.package = "parglm"}. Defaults to \code{parglm::parglm.control(method = "FAST")}. If you encounter a \verb{chol(): decomposition failed} error (e.g. with near-singular model matrices on large datasets), pass \code{parglm.control = parglm::parglm.control(method = "LAPACK")} to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.} - \item{followup.class}{Logical: treat followup as a class, e.g. expands every time to it's own indicator column, default is \code{FALSE}} \item{followup.include}{Logical: whether or not to include 'followup' and 'followup_squared' in the outcome model, default is \code{TRUE}} @@ -131,6 +127,8 @@ SEQopts( \item{followup.spline.df}{Integer: degrees of freedom passed to \code{splines::ns()} when \code{followup.spline = TRUE}. With \code{df = k}, \code{ns()} places \code{k - 1} interior knots at quantiles of \code{followup}. Must be \verb{>= 1}; \code{df = 1} is equivalent to a linear term and is generally not what you want. Default is \code{4} (3 interior knots).} +\item{glm.package}{Character: package to use for fitting GLMs, either \code{"fastglm"} (default) or \code{"parglm"}. When \code{"parglm"} is selected the \code{nthreads} option controls the number of threads passed to \code{parglm::parglm.fit()}. For most realistic SEQTaRget workloads (expanded datasets up to approximately a few million rows) \code{"fastglm"} is faster; \code{"parglm"} may help only on substantially larger datasets where the parallel chunking outweighs its setup overhead.} + \item{hazard}{Logical: hazard error calculation instead of survival estimation, default is \code{FALSE}} \item{indicator.baseline}{String: identifier for baseline variables in \code{covariates, numerator, denominator} - intended as an override} @@ -149,6 +147,8 @@ SEQopts( \item{parallel}{Logical: define if the SEQuential process is run in parallel, default is \code{FALSE}} +\item{parglm.control}{A control object from \code{\link[parglm:parglm.control]{parglm::parglm.control()}} to pass to \code{parglm::parglm.fit()}. Only used when \code{glm.package = "parglm"}. Defaults to \code{parglm::parglm.control(method = "FAST")}. If you encounter a \verb{chol(): decomposition failed} error (e.g. with near-singular model matrices on large datasets), pass \code{parglm.control = parglm::parglm.control(method = "LAPACK")} to use the more numerically stable QR decomposition instead, or switch to using the fastglm backend.} + \item{plot.colors}{Character: Colors for output plot if \code{km.curves = TRUE}, defaulted to ggplot2 defaults} \item{plot.labels}{Character: Color labels for output plot if \code{km.curves = TRUE} in order e.g. \code{c("risk.0", "risk.1")}} From 650d9c9ef7d0f314020ab294420af1cca5f03d87 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 2 Jun 2026 17:32:10 +0100 Subject: [PATCH 23/23] Update NEWS.md --- SEQTaRget/NEWS.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/SEQTaRget/NEWS.md b/SEQTaRget/NEWS.md index 0b634d46..6c90a7ea 100644 --- a/SEQTaRget/NEWS.md +++ b/SEQTaRget/NEWS.md @@ -7,6 +7,13 @@ * Report competing events per treatment arm in the `@info` slot as `info$compevent.unique` and `info$compevent.nonunique`, mirroring the structure of `info$outcome.unique` / `info$outcome.nonunique`. Both are grouped by baseline treatment; the non-unique table counts all competing event occurrences in the expanded data and the unique table counts distinct subjects who experienced the competing event. Both are `NA` when no `compevent` is specified. * From a `SEQuential()` fit, populate `weight.statistics` and `outcome.model` when `hazard = TRUE`. * Warn when the `numerator` and `denominator` weight models are given identical covariates. In that case the stabilized weights all equal 1 (i.e., no weighting), which is usually a typo in the `denominator` argument. +* Improve the `SEQuential()` helpfile by adding a per-protocol example. +* Add behavioural tests that `selection.random = TRUE` retains all treated trial-starts, subsamples control trial-starts to the requested `selection.prob` fraction, and is reproducible under a fixed seed; rename the previous smoke test that did not actually exercise the feature. +* Add behavioural tests for; `weight.lower`/`weight.upper` truncation, `weight.p99` truncation, `followup.include`/`trial.include`, `followup.class`, `weight.lag_condition`, `followup.min`/`followup.max`, and `weight.eligible_cols`. +* Fix `numerator()` and `denominator()` returning `NULL` for every weighted model; they now return the fitted per-arm numerator/denominator weight models. +* Document that weight truncation applies only to the outcome-model fit. +* Pass the formula cache to `inline.pred()` in the weight models +* Fix `SEQOpts()` argument ordering. # SEQTaRget v1.4.2