Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions R/dataProcess.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ dataProcess = function(

peptides_dict = makePeptidesDictionary(as.data.table(unclass(raw)), normalization)
input = MSstatsPrepareForDataProcess(raw, logTrans, fix_missing)
rm(raw)
input = MSstatsNormalize(input, normalization, peptides_dict, nameStandards)
rm(peptides_dict)
input = MSstatsMergeFractions(input)
input = MSstatsHandleMissing(input, summaryMethod, MBimpute,
censoredInt, maxQuantileforCensored)
Expand All @@ -173,6 +175,7 @@ dataProcess = function(
"== Summarization is done.")
getOption("MSstatsMsg")("INFO",
" == Summarization is done.")
gc(verbose = FALSE)
output = MSstatsSummarizationOutput(input, summarized, processed,
summaryMethod, MBimpute, censoredInt)
output
Expand Down Expand Up @@ -412,7 +415,7 @@ MSstatsSummarizeSingleLinear = function(single_protein,
pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
list(pred$fit, pred$se.fit^2 + sigma2)
}]

rm(survival_fit)
if (is_labeled_reference) {
single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]
Expand Down Expand Up @@ -489,7 +492,7 @@ MSstatsSummarizeSingleLinear = function(single_protein,
# Todo: enable SRM linear summarization
result[, LABEL := if (is_labeled_reference) "L" else unique(single_protein$LABEL)]
}

rm(fit)
return(list(result, survival))
}
}
Expand Down Expand Up @@ -567,7 +570,7 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
} else {
single_protein[, predicted := NA_real_]
}

rm(survival_fit)
if (is_labeled_reference) {
single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]
Expand Down
4 changes: 3 additions & 1 deletion R/utils_imputation.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@
data = input, dist = "gaussian",
control = list(maxiter=aft_iterations))
}
}
}
}
fit$y = NULL
fit$linear.predictors = NULL
fit
}

Expand Down
27 changes: 12 additions & 15 deletions R/utils_summarization.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,11 @@
}
}
if (!equal_variances) {
linear_model = .updateUnequalVariances(input = input,
linear_model = .updateUnequalVariances(input = input,
fit = linear_model,
num_iter = 1)
}
linear_model$model = NULL
linear_model
}

Expand All @@ -198,26 +199,22 @@
#' @keywords internal
.updateUnequalVariances = function(input, fit, num_iter) {
weight = NULL

input = as.data.frame(input)
for (i in seq_len(num_iter)) {
if (i == 1) {
abs.resids = data.frame(abs.resids = abs(fit$residuals))
fitted = data.frame(fitted = fit$fitted.values)
input = data.frame(input,
"abs.resids" = abs.resids,
"fitted" = fitted)
input[["abs.resids"]] = abs(fit$residuals)
input[["fitted"]] = fit$fitted.values
}
fit.loess = loess(abs.resids ~ fitted, data = input)
loess.fitted = data.frame(loess.fitted = fitted(fit.loess))
input = data.frame(input, "loess.fitted" = loess.fitted)
## loess fitted valuaes are predicted sd
input$weight = 1 / (input$loess.fitted ^ 2)
input = input[, !(colnames(input) %in% "abs.resids")]
input[["loess.fitted"]] = fitted(fit.loess)
## loess fitted values are predicted sd
input[["weight"]] = 1 / (input[["loess.fitted"]] ^ 2)
input[["abs.resids"]] = NULL
## re-fit using weight
wls.fit = lm(formula(fit), data = input, weights = weight)
abs.resids = data.frame(abs.resids = abs(wls.fit$residuals))
input = data.frame(input, "abs.resids" = abs.resids)
input = input[, -which(colnames(input) %in% c("loess.fitted", "weight"))]
input[["abs.resids"]] = abs(wls.fit$residuals)
input[["loess.fitted"]] = NULL
input[["weight"]] = NULL
}
Comment on lines 203 to 218
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Refresh fitted after each weighted refit.

At Line 220 you refresh abs.resids from wls.fit, but fitted is never refreshed after Line 211. For num_iter > 1, later loess iterations use stale fitted values and inconsistent residual/fitted pairs.

Suggested fix
     for (i in seq_len(num_iter)) {
         if (i == 1) {
             input[["abs.resids"]] = abs(fit$residuals)
             input[["fitted"]] = fit$fitted.values
         }
         fit.loess = loess(abs.resids ~ fitted, data = input)
         input[["loess.fitted"]] = fitted(fit.loess)
         ## loess fitted values are predicted sd
         input[["weight"]] = 1 / (input[["loess.fitted"]] ^ 2)
         input[["abs.resids"]] = NULL
         ## re-fit using weight
         wls.fit = lm(formula(fit), data = input, weights = weight)
         input[["abs.resids"]] = abs(wls.fit$residuals)
+        input[["fitted"]] = wls.fit$fitted.values
         input[["loess.fitted"]] = NULL
         input[["weight"]] = NULL
     }
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@R/utils_summarization.R` around lines 208 - 223, The loess iterations reuse
stale fitted values because input[["fitted"]] is only set from the initial fit;
after each weighted refit (wls.fit) update the stored fitted values so
subsequent loess calls use consistent residual/fitted pairs — e.g. inside the
loop after computing wls.fit (the variable wls.fit) assign input[["fitted"]] <-
fitted(wls.fit) or input[["fitted"]] <- wls.fit$fitted.values before
recalculating input[["abs.resids"]] and proceeding to the next iteration.

wls.fit
}
Expand Down
82 changes: 82 additions & 0 deletions inst/tinytest/test_memory_optimization_fits.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Tests that .fitSurvival() and .fitLinearModel() return model objects with
# heavy fields stripped, and that downstream predict/summary/vcov still work.


# --- .fitSurvival: $y and $linear.predictors are stripped ---------------------
#
# Neither field is needed by predict.survreg().

surv_input = data.table::data.table(
newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3,
10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6),
cen = c(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1),
RUN = factor(rep(1:4, each = 4)),
FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4))
)
# cen = 0 marks left-censored rows; set them to the upper-bound threshold.
surv_input[cen == 0, newABUNDANCE := 7.0]

surv_fit = MSstats:::.fitSurvival(surv_input, aft_iterations = 90)

expect_true(is.null(surv_fit$y),
info = "survreg $y should be stripped")
expect_true(is.null(surv_fit$linear.predictors),
info = "survreg $linear.predictors should be stripped")

expect_false(is.null(surv_fit$coefficients),
info = "survreg $coefficients must survive stripping")
expect_false(is.null(surv_fit$scale),
info = "survreg $scale must survive stripping")

predictions = predict(surv_fit, newdata = surv_input)
expect_equal(length(predictions), nrow(surv_input),
info = "predict() must work on the stripped survreg object")
expect_true(all(is.finite(predictions)),
info = "predict() should return finite values")

unstripped_fit = survival::survreg(
survival::Surv(newABUNDANCE, cen, type = "left") ~ FEATURE + RUN,
data = surv_input, dist = "gaussian")
expect_true(object.size(surv_fit) < object.size(unstripped_fit),
info = paste("Stripped survreg should be smaller.",
"Stripped:", object.size(surv_fit),
"Unstripped:", object.size(unstripped_fit)))


# --- .fitLinearModel: $model is stripped --------------------------------------
#
# $model is not needed by summary() or vcov().

lm_input = data.table::data.table(
newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3,
10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6),
RUN = factor(rep(1:4, each = 4)),
FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4)),
weights = NA
)

lm_fit = MSstats:::.fitLinearModel(lm_input, is_single_feature = FALSE,
is_labeled = FALSE, equal_variances = TRUE)

expect_true(is.null(lm_fit$model),
info = "lm $model should be stripped")

expect_false(is.null(lm_fit$coefficients),
info = "lm $coefficients must survive stripping")
expect_false(is.null(lm_fit$qr),
info = "lm $qr must survive stripping (needed by summary and vcov)")
expect_false(is.null(lm_fit$residuals),
info = "lm $residuals must survive stripping (needed by summary)")

lm_summary = summary(lm_fit)
expect_false(is.null(lm_summary$coefficients),
info = "summary() must work on the stripped lm object")
lm_vcov = vcov(lm_fit)
expect_true(is.matrix(lm_vcov),
info = "vcov() must work on the stripped lm object")

unstripped_lm = lm(newABUNDANCE ~ FEATURE + RUN, data = lm_input)
expect_true(object.size(lm_fit) < object.size(unstripped_lm),
info = paste("Stripped lm should be smaller.",
"Stripped:", object.size(lm_fit),
"Unstripped:", object.size(unstripped_lm)))
Loading