diff --git a/R/module-loadpage-server.R b/R/module-loadpage-server.R index 6c2eb4f..1bca0ba 100644 --- a/R/module-loadpage-server.R +++ b/R/module-loadpage-server.R @@ -82,21 +82,22 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa } }) - # Read first 100 rows for Metamorpheus PTM preview. - # TODO: Extend preview reading to other input formats (e.g., Spectronaut, MaxQuant) - # for dynamic UI updates. Currently limited to Metamorpheus. + # Read first 100 rows for preview-based UI features. + # Supported: Metamorpheus PTM (modification ID dropdown), DIANN (version auto-detection). + # TODO: Extend to other input formats (Spectronaut, MaxQuant) as needed. observe({ - if (isTRUE(input$filetype == "meta") && isTRUE(input$BIO == "PTM")) { + should_preview <- (isTRUE(input$filetype == "meta") && isTRUE(input$BIO == "PTM")) || + (isTRUE(input$filetype == "diann") && isTRUE(input$BIO != "PTM")) + if (should_preview) { file_info <- main_data_file() if (!is.null(file_info)) { - preview <- tryCatch( - data.table::fread(file_info$datapath, nrows = 100, header = TRUE), - error = function(e) { - showNotification(paste("Could not preview file:", conditionMessage(e)), - type = "warning", duration = 5) - NULL - } - ) + # Reset DIANN detection tracker so a new file re-triggers the notification + last_detected_diann_format(NULL) + preview <- .read_preview(file_info$datapath, file_info$name) + if (is.null(preview)) { + showNotification("Could not preview file. Please verify the file format.", + type = "warning", duration = 5) + } preview_data(preview) } else { preview_data(NULL) @@ -106,6 +107,48 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa } }) + # Track last detected DIANN format to avoid redundant notifications + last_detected_diann_format <- reactiveVal(NULL) + + # Auto-toggle DIANN 2.0+ checkbox based on detected file format + observe({ + req(input$filetype == "diann", input$BIO != "PTM") + preview <- preview_data() + if (is.null(preview)) return() + + is_2plus <- .is_diann_2plus(preview) + previous <- last_detected_diann_format() + # Only update and notify when the detected state actually changes + if (is.null(previous) || previous != is_2plus) { + updateCheckboxInput(session, "diann_2plus", value = is_2plus) + if (is_2plus) { + showNotification("Detected DIANN 2.0+ format (per-fragment columns).", + type = "message", duration = 5) + } else { + showNotification("Detected DIANN 1.x format (legacy fragment column).", + type = "message", duration = 5) + } + last_detected_diann_format(is_2plus) + } + }) + + # Warn user if they manually set DIANN 2.0+ checkbox to a value that conflicts with detected format + observeEvent(input$diann_2plus, { + req(input$filetype == "diann", input$BIO != "PTM") + preview <- preview_data() + if (is.null(preview)) return() + detected_2plus <- .is_diann_2plus(preview) + if (isTRUE(input$diann_2plus) != detected_2plus) { + showNotification( + paste0("Warning: You've ", + if (isTRUE(input$diann_2plus)) "checked" else "unchecked", + " DIANN 2.0+, but the uploaded file appears to be ", + if (detected_2plus) "DIANN 2.0+ format" else "DIANN 1.x format", + ". This mismatch may cause upload to fail."), + type = "warning", duration = 10) + } + }, ignoreInit = TRUE) + # ========= METAMORPHEUS PTM: Dynamic modification ID dropdown ========= output$mod_id_meta_ui <- renderUI({ ns <- session$ns @@ -412,9 +455,17 @@ loadpageServer <- function(id, parent_session, is_web_server = FALSE, app_templa getMaxqPtmSites(input) }) - get_data = eventReactive(input$proceed1, { - getData(input) + tryCatch( + getData(input), + error = function(e) { + tryCatch(remove_modal_spinner(), error = function(e2) NULL) + showNotification( + paste("Failed to load data:", conditionMessage(e)), + type = "error", duration = 12) + NULL + } + ) }) diff --git a/R/module-loadpage-ui.R b/R/module-loadpage-ui.R index c4c629b..5e4b476 100644 --- a/R/module-loadpage-ui.R +++ b/R/module-loadpage-ui.R @@ -602,7 +602,7 @@ create_label_free_options <- function(ns) { # DIANN specific options conditionalPanel( condition = "input['loadpage-filetype'] == 'diann' && input['loadpage-DDA_DIA'] == 'LType'", - checkboxInput(ns("diann_2plus"), "DIANN 2.0+", value = TRUE), + checkboxInput(ns("diann_2plus"), "DIANN 2.0+", value = FALSE), conditionalPanel( condition = "!input['loadpage-diann_2plus']", textInput(ns("intensity_column"), diff --git a/R/utils.R b/R/utils.R index f77f64c..e3af653 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,3 +1,47 @@ +#' Read preview columns from a data file (handles CSV, TSV, and Parquet) +#' +#' @param filepath Path to the file. +#' @param filename Original filename (used to detect parquet extension). +#' @param nrows Number of rows to read. Default 100. Parquet returns columns only. +#' @return A data frame with up to `nrows` rows, or NULL on error. +#' @noRd +.read_preview <- function(filepath, filename = NULL, nrows = 100) { + ext <- if (!is.null(filename)) tolower(tools::file_ext(basename(filename))) else "" + tryCatch({ + if (ext %in% c("parquet", "pq")) { + # For parquet, read only the schema (column names) to avoid OOM on large files. + # Return an empty data frame with the correct column structure for detection. + schema <- arrow::open_dataset(filepath, format = "parquet")$schema + col_names <- schema$names + empty_df <- as.data.frame( + setNames(lapply(col_names, function(x) logical(0)), col_names) + ) + empty_df + } else { + data.table::fread(filepath, nrows = nrows, header = TRUE) + } + }, error = function(e) NULL) +} + +#' Detect whether a DIANN preview is in 2.0+ format +#' +#' DIANN 2.0+ files have per-fragment columns (Fr.0.Quantity, Fr.1.Quantity, etc.) +#' and no FragmentQuantCorrected column. Older versions use a single +#' Fragment.Quant.Corrected / FragmentQuantCorrected column. +#' +#' @param preview_df Data frame preview of the DIANN file. +#' @return Logical. TRUE if the file appears to be DIANN 2.0+. +#' @noRd +.is_diann_2plus <- function(preview_df) { + if (is.null(preview_df) || ncol(preview_df) == 0) return(FALSE) + cols <- names(preview_df) + # DIANN 2.0+ signature: numbered fragment columns like "Fr.0.Quantity" + has_numbered_fragments <- any(grepl("^Fr\\.[0-9]+\\.Quantity$", cols)) + # DIANN 1.x signature: the legacy fragment column + has_legacy_fragments <- any(cols %in% c("Fragment.Quant.Corrected", "FragmentQuantCorrected")) + has_numbered_fragments && !has_legacy_fragments +} + #' Extract unique modification IDs from preview data #' #' Parses the Full Sequence column to find bracket-enclosed modification IDs. diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 19072d6..9f84b74 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1650,4 +1650,208 @@ test_that("extract_mod_ids_from_preview handles consecutive modifications", { result <- MSstatsShiny:::.extract_mod_ids_from_preview(preview) expect_equal(length(result), 3) expect_true(all(c("[Mod1]", "[Mod2]", "[Mod3]") %in% result)) +}) + +# ============================================================================ +# DIANN FORMAT DETECTION TESTS +# ============================================================================ + +test_that("is_diann_2plus returns TRUE for DIANN 2.0+ format with numbered fragment columns", { + preview <- data.frame( + Run = "run1", + Protein.Group = "P1", + Fr.0.Quantity = 100, + Fr.1.Quantity = 200, + Fr.2.Quantity = 300, + Precursor.Charge = 2 + ) + expect_true(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +test_that("is_diann_2plus returns FALSE for DIANN 1.x format with legacy Fragment.Quant.Corrected", { + preview <- data.frame( + Run = "run1", + Protein.Group = "P1", + Fragment.Quant.Corrected = 100, + Fragment.Quant.Raw = 95, + Precursor.Charge = 2 + ) + expect_false(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +test_that("is_diann_2plus returns FALSE for DIANN 1.x format with FragmentQuantCorrected (no dots)", { + preview <- data.frame( + Run = "run1", + FragmentQuantCorrected = 100, + Precursor.Charge = 2 + ) + expect_false(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +test_that("is_diann_2plus returns FALSE when both formats are present (legacy takes precedence)", { + preview <- data.frame( + Run = "run1", + Fragment.Quant.Corrected = 100, + Fr.0.Quantity = 200 + ) + expect_false(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +test_that("is_diann_2plus returns FALSE for NULL preview", { + expect_false(MSstatsShiny:::.is_diann_2plus(NULL)) +}) + +test_that("is_diann_2plus returns FALSE for empty data frame", { + expect_false(MSstatsShiny:::.is_diann_2plus(data.frame())) +}) + +test_that("is_diann_2plus returns FALSE for data with no fragment columns", { + preview <- data.frame( + Run = "run1", + Protein.Group = "P1", + Precursor.Charge = 2 + ) + expect_false(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +test_that("is_diann_2plus detects DIANN 2.0+ with many numbered fragment columns", { + # Real DIANN 2.0+ files can have Fr.0 through Fr.11 + cols <- c("Run", "Protein.Group", paste0("Fr.", 0:11, ".Quantity"), + paste0("Fr.", 0:11, ".Index"), paste0("Fr.", 0:11, ".Score")) + preview <- as.data.frame(setNames( + lapply(cols, function(x) if (grepl("Quantity", x)) runif(1) else "x"), + cols + )) + expect_true(MSstatsShiny:::.is_diann_2plus(preview)) +}) + +# ============================================================================ +# PREVIEW READER TESTS +# ============================================================================ + +test_that("read_preview reads CSV files with nrows limit", { + tmp <- tempfile(fileext = ".csv") + df <- data.frame(a = 1:200, b = letters[1:26][1:200 %% 26 + 1]) + write.csv(df, tmp, row.names = FALSE) + + preview <- MSstatsShiny:::.read_preview(tmp, "test.csv", nrows = 100) + expect_false(is.null(preview)) + expect_equal(nrow(preview), 100) + expect_true(all(c("a", "b") %in% names(preview))) + + unlink(tmp) +}) + +test_that("read_preview reads TSV files", { + tmp <- tempfile(fileext = ".tsv") + df <- data.frame(a = 1:50, b = letters[1:50 %% 26 + 1]) + write.table(df, tmp, sep = "\t", row.names = FALSE, quote = FALSE) + + preview <- MSstatsShiny:::.read_preview(tmp, "test.tsv", nrows = 100) + expect_false(is.null(preview)) + expect_equal(nrow(preview), 50) + + unlink(tmp) +}) + +test_that("read_preview returns NULL for non-existent files", { + preview <- MSstatsShiny:::.read_preview("/nonexistent/path.csv", "test.csv") + expect_null(preview) +}) + +test_that("read_preview returns NULL for malformed files", { + tmp <- tempfile(fileext = ".csv") + writeBin(as.raw(c(0xFF, 0xFE, 0x00, 0x00)), tmp) # Garbage bytes + preview <- MSstatsShiny:::.read_preview(tmp, "test.csv") + # Either NULL or a data frame (fread can sometimes parse garbage) — both acceptable + expect_true(is.null(preview) || is.data.frame(preview)) + unlink(tmp) +}) + +test_that("read_preview handles NULL filename gracefully", { + tmp <- tempfile(fileext = ".csv") + df <- data.frame(a = 1:10) + write.csv(df, tmp, row.names = FALSE) + + # Should default to CSV reading path + preview <- MSstatsShiny:::.read_preview(tmp, NULL) + expect_false(is.null(preview)) + expect_equal(nrow(preview), 10) + + unlink(tmp) +}) + +test_that("read_preview dispatches parquet files to arrow schema reader", { + skip_if_not_installed("arrow") + + tmp <- tempfile(fileext = ".parquet") + df <- data.frame(a = 1:50, b = runif(50)) + arrow::write_parquet(df, tmp) + + preview <- MSstatsShiny:::.read_preview(tmp, "test.parquet") + expect_false(is.null(preview)) + # Schema-only read returns 0 rows but correct column names + expect_equal(nrow(preview), 0) + expect_true(all(c("a", "b") %in% names(preview))) + + unlink(tmp) +}) + +test_that("read_preview recognizes .pq extension as parquet", { + skip_if_not_installed("arrow") + + tmp <- tempfile(fileext = ".pq") + df <- data.frame(a = 1:10) + arrow::write_parquet(df, tmp) + + preview <- MSstatsShiny:::.read_preview(tmp, "test.pq") + expect_false(is.null(preview)) + + unlink(tmp) +}) + +# ============================================================================ +# INTEGRATION TESTS: PREVIEW + DIANN DETECTION +# ============================================================================ + +test_that("DIANN 1.x CSV file is correctly detected as not 2.0+", { + tmp <- tempfile(fileext = ".csv") + df <- data.frame( + Run = paste0("run", 1:10), + Protein.Group = "P1", + Fragment.Quant.Corrected = runif(10) * 1000, + Fragment.Quant.Raw = runif(10) * 1000, + Precursor.Charge = 2, + Q.Value = runif(10, 0, 0.01) + ) + write.csv(df, tmp, row.names = FALSE) + + preview <- MSstatsShiny:::.read_preview(tmp, "diann_1x.csv") + expect_false(MSstatsShiny:::.is_diann_2plus(preview)) + + unlink(tmp) +}) + +test_that("DIANN 2.0 parquet file is correctly detected as 2.0+", { + skip_if_not_installed("arrow") + + tmp <- tempfile(fileext = ".parquet") + df <- data.frame( + Run = paste0("run", 1:10), + Protein.Group = "P1", + Fr.0.Quantity = runif(10) * 1000, + Fr.0.Index = 1L, + Fr.1.Quantity = runif(10) * 1000, + Fr.1.Index = 2L, + Fr.2.Quantity = runif(10) * 1000, + Fr.2.Index = 3L, + Precursor.Charge = 2, + Q.Value = runif(10, 0, 0.01) + ) + arrow::write_parquet(df, tmp) + + preview <- MSstatsShiny:::.read_preview(tmp, "diann_2plus.parquet") + expect_true(MSstatsShiny:::.is_diann_2plus(preview)) + + unlink(tmp) }) \ No newline at end of file