diff --git a/scripts/build_msigdb_cache.R b/scripts/build_msigdb_cache.R new file mode 100644 index 0000000..d87c497 --- /dev/null +++ b/scripts/build_msigdb_cache.R @@ -0,0 +1,113 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages({ + library(dplyr) + library(msigdbr) +}) + +species_to_cache <- c( + "Homo sapiens", + "Mus musculus" +) + +default_output_dir <- file.path("shiny", "data", "msigdb_genesets") +output_dir <- Sys.getenv("MSIGDB_CACHE_DIR", unset = default_output_dir) + +dir.create(output_dir, recursive = TRUE, showWarnings = FALSE) + +slugify <- function(x) { + x <- gsub("[^A-Za-z0-9]+", "_", x) + x <- gsub("_+", "_", x) + x <- gsub("^_|_$", "", x) + x +} + +fetch_msigdb <- function(species, collection, subcollection = "") { + msigdbr_args <- list(species = species) + msigdbr_formals <- names(formals(msigdbr::msigdbr)) + + if ("collection" %in% msigdbr_formals) { + msigdbr_args$collection <- collection + } else if ("category" %in% msigdbr_formals) { + msigdbr_args$category <- collection + } + + if (nzchar(subcollection)) { + if ("subcollection" %in% msigdbr_formals) { + msigdbr_args$subcollection <- subcollection + } else if ("subcategory" %in% msigdbr_formals) { + msigdbr_args$subcategory <- subcollection + } + } + + do.call(msigdbr::msigdbr, msigdbr_args) +} + +geneset_cache_path <- function(species, collection, subcollection) { + species_slug <- slugify(species) + subcollection_slug <- if (nzchar(subcollection)) slugify(subcollection) else "all" + file.path( + output_dir, + sprintf("%s__%s__%s.rds", species_slug, collection, subcollection_slug) + ) +} + +collections <- msigdbr::msigdbr_collections() |> + dplyr::select(gs_collection, gs_subcollection) |> + dplyr::distinct() |> + dplyr::arrange(gs_collection, gs_subcollection) + +manifest <- list() + +for (species in species_to_cache) { + message("Building MSigDB cache for: ", species) + + for (i in seq_len(nrow(collections))) { + collection <- collections$gs_collection[[i]] + subcollection <- collections$gs_subcollection[[i]] + cache_file <- geneset_cache_path(species, collection, subcollection) + + message( + " - ", + collection, + if (nzchar(subcollection)) paste0(" / ", subcollection) else "", + " -> ", + cache_file + ) + + msigdb_tbl <- fetch_msigdb( + species = species, + collection = collection, + subcollection = subcollection + ) |> + dplyr::select(gs_name, gene_symbol) + + genesets <- split(msigdb_tbl$gene_symbol, msigdb_tbl$gs_name) + genesets <- lapply(genesets, unique) + + saveRDS(genesets, cache_file, compress = "xz") + + manifest[[length(manifest) + 1]] <- data.frame( + species = species, + gs_collection = collection, + gs_subcollection = subcollection, + file = basename(cache_file), + genesets = length(genesets), + genes = length(unique(unlist(genesets, use.names = FALSE))), + stringsAsFactors = FALSE + ) + + rm(msigdb_tbl, genesets) + gc(verbose = FALSE) + } +} + +manifest_df <- do.call(rbind, manifest) +manifest_file <- file.path(output_dir, "manifest.rds") +manifest_csv <- file.path(output_dir, "manifest.csv") + +saveRDS(manifest_df, manifest_file, compress = "xz") +utils::write.csv(manifest_df, manifest_csv, row.names = FALSE) + +message("MSigDB cache complete.") +message("Manifest: ", manifest_file) diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C1__all.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C1__all.rds new file mode 100644 index 0000000..b8a1fc1 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C1__all.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CGP.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CGP.rds new file mode 100644 index 0000000..96169f0 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CGP.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP.rds new file mode 100644 index 0000000..b400309 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_BIOCARTA.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_BIOCARTA.rds new file mode 100644 index 0000000..cde2ef3 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_BIOCARTA.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_LEGACY.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_LEGACY.rds new file mode 100644 index 0000000..16695de Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_MEDICUS.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_MEDICUS.rds new file mode 100644 index 0000000..85136ae Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_KEGG_MEDICUS.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_PID.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_PID.rds new file mode 100644 index 0000000..1d74bbd Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_PID.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_REACTOME.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_REACTOME.rds new file mode 100644 index 0000000..a16c92c Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_REACTOME.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_WIKIPATHWAYS.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_WIKIPATHWAYS.rds new file mode 100644 index 0000000..534ece9 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C2__CP_WIKIPATHWAYS.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIRDB.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIRDB.rds new file mode 100644 index 0000000..a82fe93 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIRDB.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIR_LEGACY.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIR_LEGACY.rds new file mode 100644 index 0000000..614a671 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C3__MIR_MIR_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_GTRD.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_GTRD.rds new file mode 100644 index 0000000..bddf44a Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_GTRD.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_TFT_LEGACY.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_TFT_LEGACY.rds new file mode 100644 index 0000000..3b00548 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C3__TFT_TFT_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C4__3CA.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C4__3CA.rds new file mode 100644 index 0000000..238c63a Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C4__3CA.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C4__CGN.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C4__CGN.rds new file mode 100644 index 0000000..e0d689f Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C4__CGN.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C4__CM.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C4__CM.rds new file mode 100644 index 0000000..b72da22 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C4__CM.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_BP.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_BP.rds new file mode 100644 index 0000000..71d5b7b Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_BP.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_CC.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_CC.rds new file mode 100644 index 0000000..9055f22 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_CC.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_MF.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_MF.rds new file mode 100644 index 0000000..17d8169 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C5__GO_MF.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C5__HPO.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C5__HPO.rds new file mode 100644 index 0000000..2f23209 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C5__HPO.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C6__all.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C6__all.rds new file mode 100644 index 0000000..b9e0557 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C6__all.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C7__IMMUNESIGDB.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C7__IMMUNESIGDB.rds new file mode 100644 index 0000000..f0326f2 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C7__IMMUNESIGDB.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C7__VAX.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C7__VAX.rds new file mode 100644 index 0000000..b4bb8a5 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C7__VAX.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__C8__all.rds b/shiny/data/msigdb_genesets/Homo_sapiens__C8__all.rds new file mode 100644 index 0000000..abc24f8 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__C8__all.rds differ diff --git a/shiny/data/msigdb_genesets/Homo_sapiens__H__all.rds b/shiny/data/msigdb_genesets/Homo_sapiens__H__all.rds new file mode 100644 index 0000000..4bbd527 Binary files /dev/null and b/shiny/data/msigdb_genesets/Homo_sapiens__H__all.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C1__all.rds b/shiny/data/msigdb_genesets/Mus_musculus__C1__all.rds new file mode 100644 index 0000000..90bb264 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C1__all.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CGP.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CGP.rds new file mode 100644 index 0000000..2337801 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CGP.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP.rds new file mode 100644 index 0000000..e0bef7b Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_BIOCARTA.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_BIOCARTA.rds new file mode 100644 index 0000000..3974fef Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_BIOCARTA.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_LEGACY.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_LEGACY.rds new file mode 100644 index 0000000..bc0f68d Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_MEDICUS.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_MEDICUS.rds new file mode 100644 index 0000000..76f3d39 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_KEGG_MEDICUS.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_PID.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_PID.rds new file mode 100644 index 0000000..4c42186 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_PID.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_REACTOME.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_REACTOME.rds new file mode 100644 index 0000000..a4a3b69 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_REACTOME.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_WIKIPATHWAYS.rds b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_WIKIPATHWAYS.rds new file mode 100644 index 0000000..fd1195f Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C2__CP_WIKIPATHWAYS.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIRDB.rds b/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIRDB.rds new file mode 100644 index 0000000..9252e73 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIRDB.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIR_LEGACY.rds b/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIR_LEGACY.rds new file mode 100644 index 0000000..4abeabb Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C3__MIR_MIR_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_GTRD.rds b/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_GTRD.rds new file mode 100644 index 0000000..e66a388 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_GTRD.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_TFT_LEGACY.rds b/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_TFT_LEGACY.rds new file mode 100644 index 0000000..3c63516 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C3__TFT_TFT_LEGACY.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C4__3CA.rds b/shiny/data/msigdb_genesets/Mus_musculus__C4__3CA.rds new file mode 100644 index 0000000..da8e1cc Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C4__3CA.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C4__CGN.rds b/shiny/data/msigdb_genesets/Mus_musculus__C4__CGN.rds new file mode 100644 index 0000000..656c260 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C4__CGN.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C4__CM.rds b/shiny/data/msigdb_genesets/Mus_musculus__C4__CM.rds new file mode 100644 index 0000000..e10275d Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C4__CM.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_BP.rds b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_BP.rds new file mode 100644 index 0000000..780f8e7 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_BP.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_CC.rds b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_CC.rds new file mode 100644 index 0000000..cb630ee Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_CC.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_MF.rds b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_MF.rds new file mode 100644 index 0000000..dd191bf Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C5__GO_MF.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C5__HPO.rds b/shiny/data/msigdb_genesets/Mus_musculus__C5__HPO.rds new file mode 100644 index 0000000..cf2c945 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C5__HPO.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C6__all.rds b/shiny/data/msigdb_genesets/Mus_musculus__C6__all.rds new file mode 100644 index 0000000..4fb4db2 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C6__all.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C7__IMMUNESIGDB.rds b/shiny/data/msigdb_genesets/Mus_musculus__C7__IMMUNESIGDB.rds new file mode 100644 index 0000000..61724f3 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C7__IMMUNESIGDB.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C7__VAX.rds b/shiny/data/msigdb_genesets/Mus_musculus__C7__VAX.rds new file mode 100644 index 0000000..0cd87e7 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C7__VAX.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__C8__all.rds b/shiny/data/msigdb_genesets/Mus_musculus__C8__all.rds new file mode 100644 index 0000000..8499ad1 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__C8__all.rds differ diff --git a/shiny/data/msigdb_genesets/Mus_musculus__H__all.rds b/shiny/data/msigdb_genesets/Mus_musculus__H__all.rds new file mode 100644 index 0000000..d751fd8 Binary files /dev/null and b/shiny/data/msigdb_genesets/Mus_musculus__H__all.rds differ diff --git a/shiny/data/msigdb_genesets/manifest.csv b/shiny/data/msigdb_genesets/manifest.csv new file mode 100644 index 0000000..3b2d2fc --- /dev/null +++ b/shiny/data/msigdb_genesets/manifest.csv @@ -0,0 +1,51 @@ +"species","gs_collection","gs_subcollection","file","genesets","genes" +"Homo sapiens","C1","","Homo_sapiens__C1__all.rds",302,43321 +"Homo sapiens","C2","CGP","Homo_sapiens__C2__CGP.rds",3538,21705 +"Homo sapiens","C2","CP","Homo_sapiens__C2__CP.rds",19,349 +"Homo sapiens","C2","CP:BIOCARTA","Homo_sapiens__C2__CP_BIOCARTA.rds",292,1509 +"Homo sapiens","C2","CP:KEGG_LEGACY","Homo_sapiens__C2__CP_KEGG_LEGACY.rds",186,5245 +"Homo sapiens","C2","CP:KEGG_MEDICUS","Homo_sapiens__C2__CP_KEGG_MEDICUS.rds",658,2788 +"Homo sapiens","C2","CP:PID","Homo_sapiens__C2__CP_PID.rds",196,2534 +"Homo sapiens","C2","CP:REACTOME","Homo_sapiens__C2__CP_REACTOME.rds",1787,11369 +"Homo sapiens","C2","CP:WIKIPATHWAYS","Homo_sapiens__C2__CP_WIKIPATHWAYS.rds",885,9127 +"Homo sapiens","C3","MIR:MIRDB","Homo_sapiens__C3__MIR_MIRDB.rds",2377,16652 +"Homo sapiens","C3","MIR:MIR_LEGACY","Homo_sapiens__C3__MIR_MIR_LEGACY.rds",221,7450 +"Homo sapiens","C3","TFT:GTRD","Homo_sapiens__C3__TFT_GTRD.rds",505,26928 +"Homo sapiens","C3","TFT:TFT_LEGACY","Homo_sapiens__C3__TFT_TFT_LEGACY.rds",610,12779 +"Homo sapiens","C4","3CA","Homo_sapiens__C4__3CA.rds",148,2992 +"Homo sapiens","C4","CGN","Homo_sapiens__C4__CGN.rds",427,4883 +"Homo sapiens","C4","CM","Homo_sapiens__C4__CM.rds",431,8359 +"Homo sapiens","C5","GO:BP","Homo_sapiens__C5__GO_BP.rds",7583,18000 +"Homo sapiens","C5","GO:CC","Homo_sapiens__C5__GO_CC.rds",1042,14869 +"Homo sapiens","C5","GO:MF","Homo_sapiens__C5__GO_MF.rds",1855,15699 +"Homo sapiens","C5","HPO","Homo_sapiens__C5__HPO.rds",5748,5112 +"Homo sapiens","C6","","Homo_sapiens__C6__all.rds",189,10927 +"Homo sapiens","C7","IMMUNESIGDB","Homo_sapiens__C7__IMMUNESIGDB.rds",4872,20440 +"Homo sapiens","C7","VAX","Homo_sapiens__C7__VAX.rds",347,13431 +"Homo sapiens","C8","","Homo_sapiens__C8__all.rds",866,20533 +"Homo sapiens","H","","Homo_sapiens__H__all.rds",50,4384 +"Mus musculus","C1","","Mus_musculus__C1__all.rds",297,17961 +"Mus musculus","C2","CGP","Mus_musculus__C2__CGP.rds",3537,17514 +"Mus musculus","C2","CP","Mus_musculus__C2__CP.rds",19,350 +"Mus musculus","C2","CP:BIOCARTA","Mus_musculus__C2__CP_BIOCARTA.rds",292,1513 +"Mus musculus","C2","CP:KEGG_LEGACY","Mus_musculus__C2__CP_KEGG_LEGACY.rds",186,5032 +"Mus musculus","C2","CP:KEGG_MEDICUS","Mus_musculus__C2__CP_KEGG_MEDICUS.rds",658,2718 +"Mus musculus","C2","CP:PID","Mus_musculus__C2__CP_PID.rds",196,2546 +"Mus musculus","C2","CP:REACTOME","Mus_musculus__C2__CP_REACTOME.rds",1787,10688 +"Mus musculus","C2","CP:WIKIPATHWAYS","Mus_musculus__C2__CP_WIKIPATHWAYS.rds",885,8516 +"Mus musculus","C3","MIR:MIRDB","Mus_musculus__C3__MIR_MIRDB.rds",2377,15499 +"Mus musculus","C3","MIR:MIR_LEGACY","Mus_musculus__C3__MIR_MIR_LEGACY.rds",221,7303 +"Mus musculus","C3","TFT:GTRD","Mus_musculus__C3__TFT_GTRD.rds",502,16143 +"Mus musculus","C3","TFT:TFT_LEGACY","Mus_musculus__C3__TFT_TFT_LEGACY.rds",610,12357 +"Mus musculus","C4","3CA","Mus_musculus__C4__3CA.rds",148,2875 +"Mus musculus","C4","CGN","Mus_musculus__C4__CGN.rds",427,4751 +"Mus musculus","C4","CM","Mus_musculus__C4__CM.rds",431,8079 +"Mus musculus","C5","GO:BP","Mus_musculus__C5__GO_BP.rds",7580,15844 +"Mus musculus","C5","GO:CC","Mus_musculus__C5__GO_CC.rds",1042,13330 +"Mus musculus","C5","GO:MF","Mus_musculus__C5__GO_MF.rds",1852,14226 +"Mus musculus","C5","HPO","Mus_musculus__C5__HPO.rds",5748,5027 +"Mus musculus","C6","","Mus_musculus__C6__all.rds",189,10255 +"Mus musculus","C7","IMMUNESIGDB","Mus_musculus__C7__IMMUNESIGDB.rds",4872,17329 +"Mus musculus","C7","VAX","Mus_musculus__C7__VAX.rds",346,11828 +"Mus musculus","C8","","Mus_musculus__C8__all.rds",866,15683 +"Mus musculus","H","","Mus_musculus__H__all.rds",50,4393 diff --git a/shiny/data/msigdb_genesets/manifest.rds b/shiny/data/msigdb_genesets/manifest.rds new file mode 100644 index 0000000..37ea109 Binary files /dev/null and b/shiny/data/msigdb_genesets/manifest.rds differ diff --git a/shiny/modules/hypeR_module.R b/shiny/modules/hypeR_module.R index cf9faac..4dfc818 100644 --- a/shiny/modules/hypeR_module.R +++ b/shiny/modules/hypeR_module.R @@ -26,6 +26,90 @@ msigdb_collection_metadata <- data.frame( ) +fetch_msigdb_table <- function(species, collection, subcollection = "") { + msigdbr_args <- list(species = species) + msigdbr_formals <- names(formals(msigdbr::msigdbr)) + + if ("collection" %in% msigdbr_formals) { + msigdbr_args$collection <- collection + } else if ("category" %in% msigdbr_formals) { + msigdbr_args$category <- collection + } + + if (!is.null(subcollection) && nzchar(subcollection)) { + if ("subcollection" %in% msigdbr_formals) { + msigdbr_args$subcollection <- subcollection + } else if ("subcategory" %in% msigdbr_formals) { + msigdbr_args$subcategory <- subcollection + } + } + + msigdb_tbl <- do.call(msigdbr::msigdbr, msigdbr_args) + + required_columns <- c("gs_name", "gs_collection", "gs_subcollection", "gene_symbol") + missing_columns <- setdiff(required_columns, names(msigdb_tbl)) + + if (length(missing_columns) > 0) { + stop( + sprintf( + "MSigDB result is missing required columns: %s", + paste(missing_columns, collapse = ", ") + ), + call. = FALSE + ) + } + + msigdb_tbl |> + dplyr::select( + gs_name, + gs_collection, + gs_subcollection, + gene_symbol + ) +} + + +msigdb_cache_dir <- function() { + Sys.getenv( + "MSIGDB_CACHE_DIR", + unset = file.path("shiny", "data", "msigdb_genesets") + ) +} + + +msigdb_slugify <- function(x) { + x <- gsub("[^A-Za-z0-9]+", "_", x) + x <- gsub("_+", "_", x) + gsub("^_|_$", "", x) +} + + +msigdb_cache_file <- function(species, collection, subcollection = "") { + species_slug <- msigdb_slugify(species) + subcollection_slug <- if (!is.null(subcollection) && nzchar(subcollection)) { + msigdb_slugify(subcollection) + } else { + "all" + } + + file.path( + msigdb_cache_dir(), + sprintf("%s__%s__%s.rds", species_slug, collection, subcollection_slug) + ) +} + + +load_cached_msigdb_genesets <- function(species, collection, subcollection = "") { + cache_file <- msigdb_cache_file(species, collection, subcollection) + + if (!file.exists(cache_file)) { + return(NULL) + } + + readRDS(cache_file) +} + + # hypeR genests ui rewrite #' Shiny UI for MSigDB subcategory selection #' @@ -136,18 +220,61 @@ genesets_hypeR_Server <- function(id, species, clean = FALSE) { req(input$collection) req(!is.null(input$subcategory)) - filtered_tbl <- msigdbr::msigdbr(species = species()) |> - dplyr::select( - gs_name, - gs_collection, - gs_subcollection, - gene_symbol - ) |> - dplyr::filter(gs_collection == input$collection) - - if (!identical(input$subcategory, "")) { - filtered_tbl <- filtered_tbl |> - dplyr::filter(gs_subcollection == input$subcategory) + selected_genesets(list()) + + cached_genesets <- tryCatch( + load_cached_msigdb_genesets( + species = species(), + collection = input$collection, + subcollection = input$subcategory + ), + error = function(err) { + showNotification( + sprintf("Failed to load cached genesets: %s", conditionMessage(err)), + type = "error", + duration = 10 + ) + NULL + } + ) + + if (!is.null(cached_genesets)) { + if (clean) { + names(cached_genesets) <- clean_genesets(names(cached_genesets)) + } + + selected_genesets(cached_genesets) + showNotification("Loaded genesets from local cache.", type = "message") + return() + } + + filtered_tbl <- tryCatch( + { + shiny::withProgress( + message = "Fetching MSigDB genesets...", + value = 0.25, + { + fetch_msigdb_table( + species = species(), + collection = input$collection, + subcollection = input$subcategory + ) + } + ) + }, + error = function(err) { + showNotification( + sprintf("Failed to fetch genesets: %s", conditionMessage(err)), + type = "error", + duration = 10 + ) + NULL + } + ) + + if (is.null(filtered_tbl)) { + selected_genesets(list()) + return() } if (nrow(filtered_tbl) == 0) { @@ -156,15 +283,16 @@ genesets_hypeR_Server <- function(id, species, clean = FALSE) { return() } - gs <- filtered_tbl |> - (\(df) split(df, df$gs_name))() |> - (\(lst) lapply(lst, function(x) unique(x$gene_symbol)))() + gs <- split(filtered_tbl$gene_symbol, filtered_tbl$gs_name) + gs <- lapply(gs, unique) if (clean) { names(gs) <- clean_genesets(names(gs)) } selected_genesets(gs) + rm(filtered_tbl) + gc(verbose = FALSE) }) # Status message