From cc81d097192d3e5e3c79d7fdfb3c9a37bad0ee88 Mon Sep 17 00:00:00 2001
From: Jamie Lentin <jm@lentin.co.uk>
Date: Mon, 17 Nov 2025 12:32:26 +0000
Subject: [PATCH 1/2] g3_iterative: Add g3_iterative_default_grouping

Guess a standard grouping from likelihood naming, use this by default.

This won't get everything, but you can use the output and modify it.
---
 NAMESPACE           |  1 +
 R/g3_iterative.R    | 50 ++++++++++++++++++++++++++++++++++++++++++++-
 man/g3_iterative.Rd | 20 +++++++++++++++++-
 3 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/NAMESPACE b/NAMESPACE
index dba280e..ba060ea 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,6 +14,7 @@ export(g3_data)
 export(g3_fit)
 export(g3_init_guess)
 export(g3_iterative)
+export(g3_iterative_default_grouping)
 export(g3_iterative_setup)
 export(g3_jitter)
 export(g3_leaveout)
diff --git a/R/g3_iterative.R b/R/g3_iterative.R
index 9d96c49..1dbf7a6 100644
--- a/R/g3_iterative.R
+++ b/R/g3_iterative.R
@@ -75,7 +75,7 @@
 #' @export
 g3_iterative <- function(gd, wgts = 'WGTS',
                           model, params.in, 
-                          grouping = list(),
+                          grouping = g3_iterative_default_grouping(params.in),
                           use_parscale = TRUE,
                           method = 'BFGS',
                           control = list(),
@@ -374,6 +374,54 @@ g3_iterative <- function(gd, wgts = 'WGTS',
   return(params_final)  
 }
 
+# Generate default grouping, combine all fleet likelihoods into one group
+# NB: For this to work, nll_names need to be in the form (nll_source)_(nll_dist), where (nll_dist) matches one of the (nll_dist_names)
+#' @param params.in Initial parameters to use with the model
+#' @param nll_dist_names Character vector of postfixes to consider when looking for groupings
+#' @return
+#' \subsection{g3_iterative_default_grouping}{
+#' A list of component groups to component names, as required by the \var{grouping} parameter
+#' }
+#' @details
+#' \subsection{g3_iterative_default_grouping}{
+#' This assumes that your likelihood component names are of the form ``(nll_group)_(nll_dist)``,
+#' where ``(nll_dist)`` matches one of the regexes in \var{nll_dist_names}.
+#' For example, ``afleet_ldist``, ``afleet_aldist``, ``bfleet_ldist``. ``afleet`` & ``bfleet`` will be the groups used.
+#' }
+#' @rdname g3_iterative
+#' @export
+g3_iterative_default_grouping <- function (params.in, nll_dist_names = c("ldist", "aldist", "matp", "sexdist", "SI", "len\\d+SI")) {
+  # Extract all likelihood component weight names from params.in
+  weight_re <- paste0(
+    "^",
+    "(?<dist>.dist|.sparse)_",
+    "(?<function>surveyindices_log|[a-z]+)_",
+    "(?<nll_source>.+)_",
+    "(?<nll_dist>", paste0(nll_dist_names, collapse = "|"), ")_",
+    "weight$"
+  )
+
+  # Break up names into a data.frame of param_name -> regex groups
+  weight_names <- grep(weight_re, rownames(params.in), value = TRUE, perl = TRUE)
+  weight_parts <- as.data.frame(do.call(rbind, regmatches(weight_names, regexec(weight_re, weight_names, perl = TRUE))))
+  names(weight_parts)[[1]] <- "param_name"
+  weight_parts$value <- params.in[weight_parts$param_name, "value"]
+
+  # Remove any zero-weighted parameters
+  zero_value <- weight_parts[weight_parts$value == 0, "param_name"]
+  if (length(zero_value) > 0) {
+    warning("Parameters ", paste(zero_value, collapse = ", ") , " have a value of 0, removing from grouping")
+    weight_parts <- weight_parts[weight_parts$value > 0,]
+  }
+
+  # Group rows together into a list of nll_source -> vector of (nll_source)_(nll_dist)
+  sapply(
+    unique(weight_parts$nll_source),
+    function (nll_source) paste0(nll_source, "_", weight_parts[weight_parts$nll_source == nll_source, "nll_dist"]),
+    simplify = FALSE
+  )
+}
+
 #' @title Initial parameters for iterative re-weighting
 #' @param lik_out A likelihood summary dataframe. The output of g3_lik_out(model, param)
 #' @param grouping A list describing how to group likelihood components for iterative re-weighting
diff --git a/man/g3_iterative.Rd b/man/g3_iterative.Rd
index 658264d..3c042b5 100644
--- a/man/g3_iterative.Rd
+++ b/man/g3_iterative.Rd
@@ -2,6 +2,7 @@
 % Please edit documentation in R/g3_iterative.R
 \name{g3_iterative}
 \alias{g3_iterative}
+\alias{g3_iterative_default_grouping}
 \title{Iterative re-weighting}
 \usage{
 g3_iterative(
@@ -9,7 +10,7 @@ g3_iterative(
   wgts = "WGTS",
   model,
   params.in,
-  grouping = list(),
+  grouping = g3_iterative_default_grouping(params.in),
   use_parscale = TRUE,
   method = "BFGS",
   control = list(),
@@ -19,6 +20,11 @@ g3_iterative(
   serial_compile = FALSE,
   mc.cores = parallel::detectCores()
 )
+
+g3_iterative_default_grouping(
+  params.in,
+  nll_dist_names = c("ldist", "aldist", "matp", "sexdist", "SI", "len\\\\d+SI")
+)
 }
 \arguments{
 \item{gd}{Directory to store output}
@@ -46,9 +52,15 @@ g3_iterative(
 \item{serial_compile}{g3_tmb_adfun will be run in serial mode (i.e., not in parallel), potentially helping with memory issues}
 
 \item{mc.cores}{number of cores used, defaults to the number of available cores}
+
+\item{nll_dist_names}{Character vector of postfixes to consider when looking for groupings}
 }
 \value{
 Final set of parameters
+
+\subsection{g3_iterative_default_grouping}{
+A list of component groups to component names, as required by the \var{grouping} parameter
+}
 }
 \description{
 Perform multiple optimisation runs of a model, reweighting with each run
@@ -109,4 +121,10 @@ cv_floor setting. The cv_floor parameter sets the minimum of the estimated compo
 variance and thus the maximum of the inverser variance.
 
 Weights are calculated using inverse-variance weighting (\eqn{1/\sigma^2}), and as \code{1/pmax(variance, cv_floor)}, hence the minimum value for survey components is 1/\code{cv_floor}. Use smaller \code{cv_floor} values to increase the weight of survey components.
+
+\subsection{g3_iterative_default_grouping}{
+This assumes that your likelihood component names are of the form \verb{(nll_group)_(nll_dist)},
+where \code{(nll_dist)} matches one of the regexes in \var{nll_dist_names}.
+For example, \code{afleet_ldist}, \code{afleet_aldist}, \code{bfleet_ldist}. \code{afleet} & \code{bfleet} will be the groups used.
+}
 }

From 8c2065250dab871c2ef58ea17919d42758d6daf3 Mon Sep 17 00:00:00 2001
From: Jamie Lentin <jm@lentin.co.uk>
Date: Tue, 18 Nov 2025 09:53:12 +0000
Subject: [PATCH 2/2] g3_iterative: Add test for g3_iterative_default_grouping

---
 tests/test-g3_iterative-default_grouping.R | 49 ++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tests/test-g3_iterative-default_grouping.R

diff --git a/tests/test-g3_iterative-default_grouping.R b/tests/test-g3_iterative-default_grouping.R
new file mode 100644
index 0000000..a2b64f8
--- /dev/null
+++ b/tests/test-g3_iterative-default_grouping.R
@@ -0,0 +1,49 @@
+library(unittest)
+library(gadgetutils)
+
+library(unittest)
+
+# Convert a string into a data.frame
+table_string <- function (text, ...) {
+  out <- read.table(
+    text = text,
+    blank.lines.skip = TRUE,
+    header = TRUE,
+    stringsAsFactors = FALSE,
+    ...)
+  rownames(out) <- out$switch
+  return(out)
+}
+
+ok(ut_cmp_identical(g3_iterative_default_grouping(table_string('
+switch					value
+cdist_sumofsquares_comm_ldist_weight	1
+cdist_sumofsquares_comm_aldist_weight	1
+cdist_sumofsquares_comm_argle_weight	1
+cdist_sumofsquares_comm_matp_weight	1
+cdist_sumofsquares_fgn_ldist_weight	1
+cdist_sumofsquares_fgn_aldist_weight	1
+cdist_surveyindices_log_surv_si_weight	1
+'), nll_dist_names = c("ldist", "aldist", "matp", "si")), list(
+  # NB: argle is missing
+  comm = c("comm_ldist", "comm_aldist", "comm_matp"),
+  fgn = c("fgn_ldist", "fgn_aldist"),
+  # NB: parsed the awkward surveyindices_log
+  surv = c("surv_si")
+)))
+
+ok(ut_cmp_identical(suppressWarnings(g3_iterative_default_grouping(table_string('
+switch					value
+cdist_sumofsquares_comm_ldist_weight	1
+cdist_sumofsquares_comm_aldist_weight	1
+cdist_sumofsquares_comm_argle_weight	1
+cdist_sumofsquares_comm_matp_weight	1
+cdist_sumofsquares_fgn_ldist_weight	1
+cdist_sumofsquares_fgn_aldist_weight	0
+cdist_surveyindices_log_surv_si_weight	1
+'), nll_dist_names = c("ldist", "aldist", "matp", "si"))), list(
+  comm = c("comm_ldist", "comm_aldist", "comm_matp"),
+  # NB: zero-weighted doesn't count
+  fgn = c("fgn_ldist"),
+  surv = c("surv_si")
+)))