From cc81d097192d3e5e3c79d7fdfb3c9a37bad0ee88 Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Mon, 17 Nov 2025 12:32:26 +0000 Subject: [PATCH 1/2] g3_iterative: Add g3_iterative_default_grouping Guess a standard grouping from likelihood naming, use this by default. This won't get everything, but you can use the output and modify it. --- NAMESPACE | 1 + R/g3_iterative.R | 50 ++++++++++++++++++++++++++++++++++++++++++++- man/g3_iterative.Rd | 20 +++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index dba280e..ba060ea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(g3_data) export(g3_fit) export(g3_init_guess) export(g3_iterative) +export(g3_iterative_default_grouping) export(g3_iterative_setup) export(g3_jitter) export(g3_leaveout) diff --git a/R/g3_iterative.R b/R/g3_iterative.R index 9d96c49..1dbf7a6 100644 --- a/R/g3_iterative.R +++ b/R/g3_iterative.R @@ -75,7 +75,7 @@ #' @export g3_iterative <- function(gd, wgts = 'WGTS', model, params.in, - grouping = list(), + grouping = g3_iterative_default_grouping(params.in), use_parscale = TRUE, method = 'BFGS', control = list(), @@ -374,6 +374,54 @@ g3_iterative <- function(gd, wgts = 'WGTS', return(params_final) } +# Generate default grouping, combine all fleet likelihoods into one group +# NB: For this to work, nll_names need to be in the form (nll_source)_(nll_dist), where (nll_dist) matches one of the (nll_dist_names) +#' @param params.in Initial parameters to use with the model +#' @param nll_dist_names Character vector of postfixes to consider when looking for groupings +#' @return +#' \subsection{g3_iterative_default_grouping}{ +#' A list of component groups to component names, as required by the \var{grouping} parameter +#' } +#' @details +#' \subsection{g3_iterative_default_grouping}{ +#' This assumes that your likelihood component names are of the form ``(nll_group)_(nll_dist)``, +#' where ``(nll_dist)`` matches one of the regexes in \var{nll_dist_names}. +#' For example, ``afleet_ldist``, ``afleet_aldist``, ``bfleet_ldist``. ``afleet`` & ``bfleet`` will be the groups used. +#' } +#' @rdname g3_iterative +#' @export +g3_iterative_default_grouping <- function (params.in, nll_dist_names = c("ldist", "aldist", "matp", "sexdist", "SI", "len\\d+SI")) { + # Extract all likelihood component weight names from params.in + weight_re <- paste0( + "^", + "(?.dist|.sparse)_", + "(?surveyindices_log|[a-z]+)_", + "(?.+)_", + "(?", paste0(nll_dist_names, collapse = "|"), ")_", + "weight$" + ) + + # Break up names into a data.frame of param_name -> regex groups + weight_names <- grep(weight_re, rownames(params.in), value = TRUE, perl = TRUE) + weight_parts <- as.data.frame(do.call(rbind, regmatches(weight_names, regexec(weight_re, weight_names, perl = TRUE)))) + names(weight_parts)[[1]] <- "param_name" + weight_parts$value <- params.in[weight_parts$param_name, "value"] + + # Remove any zero-weighted parameters + zero_value <- weight_parts[weight_parts$value == 0, "param_name"] + if (length(zero_value) > 0) { + warning("Parameters ", paste(zero_value, collapse = ", ") , " have a value of 0, removing from grouping") + weight_parts <- weight_parts[weight_parts$value > 0,] + } + + # Group rows together into a list of nll_source -> vector of (nll_source)_(nll_dist) + sapply( + unique(weight_parts$nll_source), + function (nll_source) paste0(nll_source, "_", weight_parts[weight_parts$nll_source == nll_source, "nll_dist"]), + simplify = FALSE + ) +} + #' @title Initial parameters for iterative re-weighting #' @param lik_out A likelihood summary dataframe. The output of g3_lik_out(model, param) #' @param grouping A list describing how to group likelihood components for iterative re-weighting diff --git a/man/g3_iterative.Rd b/man/g3_iterative.Rd index 658264d..3c042b5 100644 --- a/man/g3_iterative.Rd +++ b/man/g3_iterative.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/g3_iterative.R \name{g3_iterative} \alias{g3_iterative} +\alias{g3_iterative_default_grouping} \title{Iterative re-weighting} \usage{ g3_iterative( @@ -9,7 +10,7 @@ g3_iterative( wgts = "WGTS", model, params.in, - grouping = list(), + grouping = g3_iterative_default_grouping(params.in), use_parscale = TRUE, method = "BFGS", control = list(), @@ -19,6 +20,11 @@ g3_iterative( serial_compile = FALSE, mc.cores = parallel::detectCores() ) + +g3_iterative_default_grouping( + params.in, + nll_dist_names = c("ldist", "aldist", "matp", "sexdist", "SI", "len\\\\d+SI") +) } \arguments{ \item{gd}{Directory to store output} @@ -46,9 +52,15 @@ g3_iterative( \item{serial_compile}{g3_tmb_adfun will be run in serial mode (i.e., not in parallel), potentially helping with memory issues} \item{mc.cores}{number of cores used, defaults to the number of available cores} + +\item{nll_dist_names}{Character vector of postfixes to consider when looking for groupings} } \value{ Final set of parameters + +\subsection{g3_iterative_default_grouping}{ +A list of component groups to component names, as required by the \var{grouping} parameter +} } \description{ Perform multiple optimisation runs of a model, reweighting with each run @@ -109,4 +121,10 @@ cv_floor setting. The cv_floor parameter sets the minimum of the estimated compo variance and thus the maximum of the inverser variance. Weights are calculated using inverse-variance weighting (\eqn{1/\sigma^2}), and as \code{1/pmax(variance, cv_floor)}, hence the minimum value for survey components is 1/\code{cv_floor}. Use smaller \code{cv_floor} values to increase the weight of survey components. + +\subsection{g3_iterative_default_grouping}{ +This assumes that your likelihood component names are of the form \verb{(nll_group)_(nll_dist)}, +where \code{(nll_dist)} matches one of the regexes in \var{nll_dist_names}. +For example, \code{afleet_ldist}, \code{afleet_aldist}, \code{bfleet_ldist}. \code{afleet} & \code{bfleet} will be the groups used. +} } From 8c2065250dab871c2ef58ea17919d42758d6daf3 Mon Sep 17 00:00:00 2001 From: Jamie Lentin Date: Tue, 18 Nov 2025 09:53:12 +0000 Subject: [PATCH 2/2] g3_iterative: Add test for g3_iterative_default_grouping --- tests/test-g3_iterative-default_grouping.R | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/test-g3_iterative-default_grouping.R diff --git a/tests/test-g3_iterative-default_grouping.R b/tests/test-g3_iterative-default_grouping.R new file mode 100644 index 0000000..a2b64f8 --- /dev/null +++ b/tests/test-g3_iterative-default_grouping.R @@ -0,0 +1,49 @@ +library(unittest) +library(gadgetutils) + +library(unittest) + +# Convert a string into a data.frame +table_string <- function (text, ...) { + out <- read.table( + text = text, + blank.lines.skip = TRUE, + header = TRUE, + stringsAsFactors = FALSE, + ...) + rownames(out) <- out$switch + return(out) +} + +ok(ut_cmp_identical(g3_iterative_default_grouping(table_string(' +switch value +cdist_sumofsquares_comm_ldist_weight 1 +cdist_sumofsquares_comm_aldist_weight 1 +cdist_sumofsquares_comm_argle_weight 1 +cdist_sumofsquares_comm_matp_weight 1 +cdist_sumofsquares_fgn_ldist_weight 1 +cdist_sumofsquares_fgn_aldist_weight 1 +cdist_surveyindices_log_surv_si_weight 1 +'), nll_dist_names = c("ldist", "aldist", "matp", "si")), list( + # NB: argle is missing + comm = c("comm_ldist", "comm_aldist", "comm_matp"), + fgn = c("fgn_ldist", "fgn_aldist"), + # NB: parsed the awkward surveyindices_log + surv = c("surv_si") +))) + +ok(ut_cmp_identical(suppressWarnings(g3_iterative_default_grouping(table_string(' +switch value +cdist_sumofsquares_comm_ldist_weight 1 +cdist_sumofsquares_comm_aldist_weight 1 +cdist_sumofsquares_comm_argle_weight 1 +cdist_sumofsquares_comm_matp_weight 1 +cdist_sumofsquares_fgn_ldist_weight 1 +cdist_sumofsquares_fgn_aldist_weight 0 +cdist_surveyindices_log_surv_si_weight 1 +'), nll_dist_names = c("ldist", "aldist", "matp", "si"))), list( + comm = c("comm_ldist", "comm_aldist", "comm_matp"), + # NB: zero-weighted doesn't count + fgn = c("fgn_ldist"), + surv = c("surv_si") +)))