Vitek-Lab · tonywu1999 · May 14, 2026 · May 15, 2026 · coderabbitai · May 14, 2026
diff --git a/R/dataProcess.R b/R/dataProcess.R
@@ -146,7 +146,9 @@ dataProcess = function(
 
     peptides_dict = makePeptidesDictionary(as.data.table(unclass(raw)), normalization)
     input = MSstatsPrepareForDataProcess(raw, logTrans, fix_missing)
+    rm(raw)
     input = MSstatsNormalize(input, normalization, peptides_dict, nameStandards)
+    rm(peptides_dict)
     input = MSstatsMergeFractions(input)
     input = MSstatsHandleMissing(input, summaryMethod, MBimpute,
                                  censoredInt, maxQuantileforCensored)
@@ -173,6 +175,7 @@ dataProcess = function(
                             "== Summarization is done.")
     getOption("MSstatsMsg")("INFO",
                             " == Summarization is done.")
+    gc(verbose = FALSE)
     output = MSstatsSummarizationOutput(input, summarized, processed,
                                         summaryMethod, MBimpute, censoredInt)
     output
@@ -412,7 +415,7 @@ MSstatsSummarizeSingleLinear = function(single_protein,
             pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
             list(pred$fit, pred$se.fit^2 + sigma2)
         }]
-
+        rm(survival_fit)
         if (is_labeled_reference) {
             single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
             single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]
@@ -489,7 +492,7 @@ MSstatsSummarizeSingleLinear = function(single_protein,
             # Todo: enable SRM linear summarization
             result[, LABEL := if (is_labeled_reference) "L" else unique(single_protein$LABEL)]
         }
-
+        rm(fit)
         return(list(result, survival))
     }
 }
@@ -567,7 +570,7 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
         } else {
             single_protein[, predicted := NA_real_]
         }
-
+        rm(survival_fit)
         if (is_labeled_reference) {
             single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
             single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]

diff --git a/R/utils_imputation.R b/R/utils_imputation.R
@@ -46,8 +46,10 @@
                                         data = input, dist = "gaussian", 
                               control = list(maxiter=aft_iterations))
             }
-        }  
+        }
     }
+    fit$y = NULL
+    fit$linear.predictors = NULL
     fit
 }
 

diff --git a/R/utils_summarization.R b/R/utils_summarization.R
@@ -180,10 +180,11 @@
         }
     }
     if (!equal_variances) {
-        linear_model = .updateUnequalVariances(input = input, 
+        linear_model = .updateUnequalVariances(input = input,
                                                fit = linear_model,
                                                num_iter = 1)
     }
+    linear_model$model = NULL
     linear_model
 }
 
@@ -198,26 +199,22 @@
 #' @keywords internal
 .updateUnequalVariances = function(input, fit, num_iter) {
     weight = NULL
-
+    input = as.data.frame(input)
     for (i in seq_len(num_iter)) {
         if (i == 1) {
-            abs.resids = data.frame(abs.resids = abs(fit$residuals))
-            fitted = data.frame(fitted = fit$fitted.values)
-            input = data.frame(input, 
-                               "abs.resids" = abs.resids, 
-                               "fitted" = fitted)
+            input[["abs.resids"]] = abs(fit$residuals)
+            input[["fitted"]] = fit$fitted.values
         }
         fit.loess = loess(abs.resids ~ fitted, data = input)
-        loess.fitted = data.frame(loess.fitted = fitted(fit.loess))
-        input = data.frame(input, "loess.fitted" = loess.fitted)
-        ## loess fitted valuaes are predicted sd
-        input$weight = 1 / (input$loess.fitted ^ 2)
-        input = input[, !(colnames(input) %in% "abs.resids")]
+        input[["loess.fitted"]] = fitted(fit.loess)
+        ## loess fitted values are predicted sd
+        input[["weight"]] = 1 / (input[["loess.fitted"]] ^ 2)
+        input[["abs.resids"]] = NULL
         ## re-fit using weight
         wls.fit = lm(formula(fit), data = input, weights = weight)
-        abs.resids = data.frame(abs.resids = abs(wls.fit$residuals))
-        input = data.frame(input, "abs.resids" = abs.resids)
-        input = input[, -which(colnames(input) %in% c("loess.fitted", "weight"))]
+        input[["abs.resids"]] = abs(wls.fit$residuals)
+        input[["loess.fitted"]] = NULL
+        input[["weight"]] = NULL
     }
     wls.fit
 }

diff --git a/inst/tinytest/test_memory_optimization_fits.R b/inst/tinytest/test_memory_optimization_fits.R
@@ -0,0 +1,82 @@
+# Tests that .fitSurvival() and .fitLinearModel() return model objects with
+# heavy fields stripped, and that downstream predict/summary/vcov still work.
+
+
+# --- .fitSurvival: $y and $linear.predictors are stripped ---------------------
+#
+# Neither field is needed by predict.survreg().
+
+surv_input = data.table::data.table(
+    newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3,
+                     10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6),
+    cen = c(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1),
+    RUN = factor(rep(1:4, each = 4)),
+    FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4))
+)
+# cen = 0 marks left-censored rows; set them to the upper-bound threshold.
+surv_input[cen == 0, newABUNDANCE := 7.0]
+
+surv_fit = MSstats:::.fitSurvival(surv_input, aft_iterations = 90)
+
+expect_true(is.null(surv_fit$y),
+            info = "survreg $y should be stripped")
+expect_true(is.null(surv_fit$linear.predictors),
+            info = "survreg $linear.predictors should be stripped")
+
+expect_false(is.null(surv_fit$coefficients),
+             info = "survreg $coefficients must survive stripping")
+expect_false(is.null(surv_fit$scale),
+             info = "survreg $scale must survive stripping")
+
+predictions = predict(surv_fit, newdata = surv_input)
+expect_equal(length(predictions), nrow(surv_input),
+             info = "predict() must work on the stripped survreg object")
+expect_true(all(is.finite(predictions)),
+            info = "predict() should return finite values")
+
+unstripped_fit = survival::survreg(
+    survival::Surv(newABUNDANCE, cen, type = "left") ~ FEATURE + RUN,
+    data = surv_input, dist = "gaussian")
+expect_true(object.size(surv_fit) < object.size(unstripped_fit),
+            info = paste("Stripped survreg should be smaller.",
+                         "Stripped:", object.size(surv_fit),
+                         "Unstripped:", object.size(unstripped_fit)))
+
+
+# --- .fitLinearModel: $model is stripped --------------------------------------
+#
+# $model is not needed by summary() or vcov().
+
+lm_input = data.table::data.table(
+    newABUNDANCE = c(10.1, 11.2, 9.5, 10.8, 12.0, 9.0, 11.5, 10.3,
+                     10.5, 11.0, 9.8, 10.2, 12.2, 9.3, 11.8, 10.6),
+    RUN = factor(rep(1:4, each = 4)),
+    FEATURE = factor(rep(c("feat1", "feat2", "feat3", "feat4"), 4)),
+    weights = NA
+)
+
+lm_fit = MSstats:::.fitLinearModel(lm_input, is_single_feature = FALSE,
+                                    is_labeled = FALSE, equal_variances = TRUE)
+
+expect_true(is.null(lm_fit$model),
+            info = "lm $model should be stripped")
+
+expect_false(is.null(lm_fit$coefficients),
+             info = "lm $coefficients must survive stripping")
+expect_false(is.null(lm_fit$qr),
+             info = "lm $qr must survive stripping (needed by summary and vcov)")
+expect_false(is.null(lm_fit$residuals),
+             info = "lm $residuals must survive stripping (needed by summary)")
+
+lm_summary = summary(lm_fit)
+expect_false(is.null(lm_summary$coefficients),
+             info = "summary() must work on the stripped lm object")
+lm_vcov = vcov(lm_fit)
+expect_true(is.matrix(lm_vcov),
+            info = "vcov() must work on the stripped lm object")
+
+unstripped_lm = lm(newABUNDANCE ~ FEATURE + RUN, data = lm_input)
+expect_true(object.size(lm_fit) < object.size(unstripped_lm),
+            info = paste("Stripped lm should be smaller.",
+                         "Stripped:", object.size(lm_fit),
+                         "Unstripped:", object.size(unstripped_lm)))