Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 65 additions & 14 deletions R/module-loadpage-server.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,22 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa
}
})

# Read first 100 rows for Metamorpheus PTM preview.
# TODO: Extend preview reading to other input formats (e.g., Spectronaut, MaxQuant)
# for dynamic UI updates. Currently limited to Metamorpheus.
# Read first 100 rows for preview-based UI features.
# Supported: Metamorpheus PTM (modification ID dropdown), DIANN (version auto-detection).
# TODO: Extend to other input formats (Spectronaut, MaxQuant) as needed.
observe({
if (isTRUE(input$filetype == "meta") && isTRUE(input$BIO == "PTM")) {
should_preview <- (isTRUE(input$filetype == "meta") && isTRUE(input$BIO == "PTM")) ||
(isTRUE(input$filetype == "diann") && isTRUE(input$BIO != "PTM"))
if (should_preview) {
file_info <- main_data_file()
if (!is.null(file_info)) {
preview <- tryCatch(
data.table::fread(file_info$datapath, nrows = 100, header = TRUE),
error = function(e) {
showNotification(paste("Could not preview file:", conditionMessage(e)),
type = "warning", duration = 5)
NULL
}
)
# Reset DIANN detection tracker so a new file re-triggers the notification
last_detected_diann_format(NULL)
preview <- .read_preview(file_info$datapath, file_info$name)
if (is.null(preview)) {
showNotification("Could not preview file. Please verify the file format.",
type = "warning", duration = 5)
}
preview_data(preview)
} else {
preview_data(NULL)
Expand All @@ -106,6 +107,48 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa
}
})

# Track last detected DIANN format to avoid redundant notifications
last_detected_diann_format <- reactiveVal(NULL)

# Auto-toggle DIANN 2.0+ checkbox based on detected file format
observe({
req(input$filetype == "diann", input$BIO != "PTM")
preview <- preview_data()
if (is.null(preview)) return()

is_2plus <- .is_diann_2plus(preview)
previous <- last_detected_diann_format()
# Only update and notify when the detected state actually changes
if (is.null(previous) || previous != is_2plus) {
updateCheckboxInput(session, "diann_2plus", value = is_2plus)
if (is_2plus) {
showNotification("Detected DIANN 2.0+ format (per-fragment columns).",
type = "message", duration = 5)
} else {
showNotification("Detected DIANN 1.x format (legacy fragment column).",
type = "message", duration = 5)
}
last_detected_diann_format(is_2plus)
}
})
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Warn user if they manually set DIANN 2.0+ checkbox to a value that conflicts with detected format
observeEvent(input$diann_2plus, {
req(input$filetype == "diann", input$BIO != "PTM")
preview <- preview_data()
if (is.null(preview)) return()
detected_2plus <- .is_diann_2plus(preview)
if (isTRUE(input$diann_2plus) != detected_2plus) {
showNotification(
paste0("Warning: You've ",
if (isTRUE(input$diann_2plus)) "checked" else "unchecked",
" DIANN 2.0+, but the uploaded file appears to be ",
if (detected_2plus) "DIANN 2.0+ format" else "DIANN 1.x format",
". This mismatch may cause upload to fail."),
type = "warning", duration = 10)
}
}, ignoreInit = TRUE)

# ========= METAMORPHEUS PTM: Dynamic modification ID dropdown =========
output$mod_id_meta_ui <- renderUI({
ns <- session$ns
Expand Down Expand Up @@ -412,9 +455,17 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa
getMaxqPtmSites(input)
})


get_data = eventReactive(input$proceed1, {
getData(input)
tryCatch(
getData(input),
error = function(e) {
tryCatch(remove_modal_spinner(), error = function(e2) NULL)
showNotification(
paste("Failed to load data:", conditionMessage(e)),
type = "error", duration = 12)
NULL
}
)
})


Expand Down
2 changes: 1 addition & 1 deletion R/module-loadpage-ui.R
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ create_label_free_options <- function(ns) {
# DIANN specific options
conditionalPanel(
condition = "input['loadpage-filetype'] == 'diann' && input['loadpage-DDA_DIA'] == 'LType'",
checkboxInput(ns("diann_2plus"), "DIANN 2.0+", value = TRUE),
checkboxInput(ns("diann_2plus"), "DIANN 2.0+", value = FALSE),
conditionalPanel(
condition = "!input['loadpage-diann_2plus']",
textInput(ns("intensity_column"),
Expand Down
44 changes: 44 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,47 @@
#' Read preview columns from a data file (handles CSV, TSV, and Parquet)
#'
#' @param filepath Path to the file.
#' @param filename Original filename (used to detect parquet extension).
#' @param nrows Number of rows to read. Default 100. Parquet returns columns only.
#' @return A data frame with up to `nrows` rows, or NULL on error.
#' @noRd
.read_preview <- function(filepath, filename = NULL, nrows = 100) {
ext <- if (!is.null(filename)) tolower(tools::file_ext(basename(filename))) else ""
tryCatch({
if (ext %in% c("parquet", "pq")) {
# For parquet, read only the schema (column names) to avoid OOM on large files.
# Return an empty data frame with the correct column structure for detection.
schema <- arrow::open_dataset(filepath, format = "parquet")$schema
col_names <- schema$names
empty_df <- as.data.frame(
setNames(lapply(col_names, function(x) logical(0)), col_names)
)
empty_df
} else {
data.table::fread(filepath, nrows = nrows, header = TRUE)
}
}, error = function(e) NULL)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

#' Detect whether a DIANN preview is in 2.0+ format
#'
#' DIANN 2.0+ files have per-fragment columns (Fr.0.Quantity, Fr.1.Quantity, etc.)
#' and no FragmentQuantCorrected column. Older versions use a single
#' Fragment.Quant.Corrected / FragmentQuantCorrected column.
#'
#' @param preview_df Data frame preview of the DIANN file.
#' @return Logical. TRUE if the file appears to be DIANN 2.0+.
#' @noRd
.is_diann_2plus <- function(preview_df) {
if (is.null(preview_df) || ncol(preview_df) == 0) return(FALSE)
cols <- names(preview_df)
# DIANN 2.0+ signature: numbered fragment columns like "Fr.0.Quantity"
has_numbered_fragments <- any(grepl("^Fr\\.[0-9]+\\.Quantity$", cols))
# DIANN 1.x signature: the legacy fragment column
has_legacy_fragments <- any(cols %in% c("Fragment.Quant.Corrected", "FragmentQuantCorrected"))
has_numbered_fragments && !has_legacy_fragments
}

#' Extract unique modification IDs from preview data
#'
#' Parses the Full Sequence column to find bracket-enclosed modification IDs.
Expand Down
204 changes: 204 additions & 0 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -1650,4 +1650,208 @@ test_that("extract_mod_ids_from_preview handles consecutive modifications", {
result <- MSstatsShiny:::.extract_mod_ids_from_preview(preview)
expect_equal(length(result), 3)
expect_true(all(c("[Mod1]", "[Mod2]", "[Mod3]") %in% result))
})

# ============================================================================
# DIANN FORMAT DETECTION TESTS
# ============================================================================

test_that("is_diann_2plus returns TRUE for DIANN 2.0+ format with numbered fragment columns", {
preview <- data.frame(
Run = "run1",
Protein.Group = "P1",
Fr.0.Quantity = 100,
Fr.1.Quantity = 200,
Fr.2.Quantity = 300,
Precursor.Charge = 2
)
expect_true(MSstatsShiny:::.is_diann_2plus(preview))
})

test_that("is_diann_2plus returns FALSE for DIANN 1.x format with legacy Fragment.Quant.Corrected", {
preview <- data.frame(
Run = "run1",
Protein.Group = "P1",
Fragment.Quant.Corrected = 100,
Fragment.Quant.Raw = 95,
Precursor.Charge = 2
)
expect_false(MSstatsShiny:::.is_diann_2plus(preview))
})

test_that("is_diann_2plus returns FALSE for DIANN 1.x format with FragmentQuantCorrected (no dots)", {
preview <- data.frame(
Run = "run1",
FragmentQuantCorrected = 100,
Precursor.Charge = 2
)
expect_false(MSstatsShiny:::.is_diann_2plus(preview))
})

test_that("is_diann_2plus returns FALSE when both formats are present (legacy takes precedence)", {
preview <- data.frame(
Run = "run1",
Fragment.Quant.Corrected = 100,
Fr.0.Quantity = 200
)
expect_false(MSstatsShiny:::.is_diann_2plus(preview))
})

test_that("is_diann_2plus returns FALSE for NULL preview", {
expect_false(MSstatsShiny:::.is_diann_2plus(NULL))
})

test_that("is_diann_2plus returns FALSE for empty data frame", {
expect_false(MSstatsShiny:::.is_diann_2plus(data.frame()))
})

test_that("is_diann_2plus returns FALSE for data with no fragment columns", {
preview <- data.frame(
Run = "run1",
Protein.Group = "P1",
Precursor.Charge = 2
)
expect_false(MSstatsShiny:::.is_diann_2plus(preview))
})

test_that("is_diann_2plus detects DIANN 2.0+ with many numbered fragment columns", {
# Real DIANN 2.0+ files can have Fr.0 through Fr.11
cols <- c("Run", "Protein.Group", paste0("Fr.", 0:11, ".Quantity"),
paste0("Fr.", 0:11, ".Index"), paste0("Fr.", 0:11, ".Score"))
preview <- as.data.frame(setNames(
lapply(cols, function(x) if (grepl("Quantity", x)) runif(1) else "x"),
cols
))
expect_true(MSstatsShiny:::.is_diann_2plus(preview))
})

# ============================================================================
# PREVIEW READER TESTS
# ============================================================================

test_that("read_preview reads CSV files with nrows limit", {
tmp <- tempfile(fileext = ".csv")
df <- data.frame(a = 1:200, b = letters[1:26][1:200 %% 26 + 1])
write.csv(df, tmp, row.names = FALSE)

preview <- MSstatsShiny:::.read_preview(tmp, "test.csv", nrows = 100)
expect_false(is.null(preview))
expect_equal(nrow(preview), 100)
expect_true(all(c("a", "b") %in% names(preview)))

unlink(tmp)
})

test_that("read_preview reads TSV files", {
tmp <- tempfile(fileext = ".tsv")
df <- data.frame(a = 1:50, b = letters[1:50 %% 26 + 1])
write.table(df, tmp, sep = "\t", row.names = FALSE, quote = FALSE)

preview <- MSstatsShiny:::.read_preview(tmp, "test.tsv", nrows = 100)
expect_false(is.null(preview))
expect_equal(nrow(preview), 50)

unlink(tmp)
})

test_that("read_preview returns NULL for non-existent files", {
preview <- MSstatsShiny:::.read_preview("/nonexistent/path.csv", "test.csv")
expect_null(preview)
})

test_that("read_preview returns NULL for malformed files", {
tmp <- tempfile(fileext = ".csv")
writeBin(as.raw(c(0xFF, 0xFE, 0x00, 0x00)), tmp) # Garbage bytes
preview <- MSstatsShiny:::.read_preview(tmp, "test.csv")
# Either NULL or a data frame (fread can sometimes parse garbage) — both acceptable
expect_true(is.null(preview) || is.data.frame(preview))
unlink(tmp)
})

test_that("read_preview handles NULL filename gracefully", {
tmp <- tempfile(fileext = ".csv")
df <- data.frame(a = 1:10)
write.csv(df, tmp, row.names = FALSE)

# Should default to CSV reading path
preview <- MSstatsShiny:::.read_preview(tmp, NULL)
expect_false(is.null(preview))
expect_equal(nrow(preview), 10)

unlink(tmp)
})

test_that("read_preview dispatches parquet files to arrow schema reader", {
skip_if_not_installed("arrow")

tmp <- tempfile(fileext = ".parquet")
df <- data.frame(a = 1:50, b = runif(50))
arrow::write_parquet(df, tmp)

preview <- MSstatsShiny:::.read_preview(tmp, "test.parquet")
expect_false(is.null(preview))
# Schema-only read returns 0 rows but correct column names
expect_equal(nrow(preview), 0)
expect_true(all(c("a", "b") %in% names(preview)))

unlink(tmp)
})

test_that("read_preview recognizes .pq extension as parquet", {
skip_if_not_installed("arrow")

tmp <- tempfile(fileext = ".pq")
df <- data.frame(a = 1:10)
arrow::write_parquet(df, tmp)

preview <- MSstatsShiny:::.read_preview(tmp, "test.pq")
expect_false(is.null(preview))

unlink(tmp)
})

# ============================================================================
# INTEGRATION TESTS: PREVIEW + DIANN DETECTION
# ============================================================================

test_that("DIANN 1.x CSV file is correctly detected as not 2.0+", {
tmp <- tempfile(fileext = ".csv")
df <- data.frame(
Run = paste0("run", 1:10),
Protein.Group = "P1",
Fragment.Quant.Corrected = runif(10) * 1000,
Fragment.Quant.Raw = runif(10) * 1000,
Precursor.Charge = 2,
Q.Value = runif(10, 0, 0.01)
)
write.csv(df, tmp, row.names = FALSE)

preview <- MSstatsShiny:::.read_preview(tmp, "diann_1x.csv")
expect_false(MSstatsShiny:::.is_diann_2plus(preview))

unlink(tmp)
})

test_that("DIANN 2.0 parquet file is correctly detected as 2.0+", {
skip_if_not_installed("arrow")

tmp <- tempfile(fileext = ".parquet")
df <- data.frame(
Run = paste0("run", 1:10),
Protein.Group = "P1",
Fr.0.Quantity = runif(10) * 1000,
Fr.0.Index = 1L,
Fr.1.Quantity = runif(10) * 1000,
Fr.1.Index = 2L,
Fr.2.Quantity = runif(10) * 1000,
Fr.2.Index = 3L,
Precursor.Charge = 2,
Q.Value = runif(10, 0, 0.01)
)
arrow::write_parquet(df, tmp)

preview <- MSstatsShiny:::.read_preview(tmp, "diann_2plus.parquet")
expect_true(MSstatsShiny:::.is_diann_2plus(preview))

unlink(tmp)
})
Loading