diff --git a/NAMESPACE b/NAMESPACE index ec3a7ec..d83a3b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,9 +19,11 @@ export(gsvaRowNorm) export(gsvaScores) export(guessGeneIdType) export(igsva) +export(loadHDF5GSVA) export(loadHDF5GSVAranks) export(plageParam) export(readGMT) +export(saveHDF5GSVA) export(saveHDF5GSVAranks) export(spatCor) export(ssgseaParam) diff --git a/R/GSVA-pkg-deprecated.R b/R/GSVA-pkg-deprecated.R index 03e818a..a8e4b9a 100644 --- a/R/GSVA-pkg-deprecated.R +++ b/R/GSVA-pkg-deprecated.R @@ -66,7 +66,7 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), cli_alert_info(sprintf("Calculating GSVA ranks")) kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarownr <- .compute_row_norm(expr=filtDataMatrix, + gsvarnorm <- .compute_row_norm(expr=filtDataMatrix, kcdf=.get_kcdf(param), kcdf.min.ssize=kcdfminssize, sparse=.get_sparse(param), @@ -76,7 +76,7 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), BPPARAM=BPPARAM, maxmem=maxmem) - gsvarnks <- .compute_gsva_ranks(Z=gsvarownr, + gsvarnks <- .compute_gsva_ranks(Z=gsvarnorm, verbose=verbose, BPPARAM=BPPARAM, maxmem=maxmem) @@ -205,3 +205,62 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), return(rval) }) + +#' @description The `saveHDF5GSVAranks()` function is deprecated. Please use +#' `saveHDF5GSVA()` instead. +#' +#' @param rankExprData A column-rank expression data set obtained with +#' [`gsvaColRanks`]. Must be one of the classes supported by +#' [`GsvaExprData-class`]. For a list of these classes, see its help page +#' using `help(GsvaExprData)`. +#' +#' @return For `saveHDF5GSVAranks()`, the path to the directory where the data +#' has been saved is returned invisibly. For `loadHDF5GSVAranks()`, an object +#' is returned containing the corresponding loaded GSVA row-normalized or rank +#' expression values, and their corresponding metadata. If the saved GSVA +#' output was originally stored in a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object +#' or one of its derived classes, then the returned object will be a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment]. +#' Otherwise, the returned object will be a +#' [`DelayedMatrix`][DelayedArray::DelayedMatrix] object. +#' +#' @name saveHDF5GSVAranks +#' @rdname GSVA-pkg-deprecated +#' +#' @export +saveHDF5GSVAranks <- function(rankExprData, dir, ...) { + .Deprecated(new="saveHDF5GSVA", old="saveHDF5GSVAranks", + msg=paste("The 'saveHDF5GSVAranks()' function is deprecated.", + "Please use 'saveHDF5GSVA()' instead.")) + + saveHDF5GSVA(rankExprData, dir, assay="gsvaranks", ...) +} + +#' @description The `loadHDF5GSVAranks()` function is deprecated. Please use +#' `loadHDF5GSVA()` instead. +#' +#' @param dir The path to the directory where to save or load the GSVA rank +#' values. +#' +#' @param ... Additional arguments to be passed to the underlying HDF5 +#' saving/loading functions +#' [`saveHDF5SummarizedExperiment`][HDF5Array::saveHDF5SummarizedExperiment] +#' and [`loadHDF5SummarizedExperiment`][HDF5Array::loadHDF5SummarizedExperiment], +#' respectively. +#' +#' @name loadHDF5GSVAranks +#' @rdname GSVA-pkg-deprecated +#' +#' @export +loadHDF5GSVAranks <- function(dir, ...) { + + .Deprecated(new="loadHDF5GSVA", old="loadHDF5GSVAranks", + msg=paste("The 'loadHDF5GSVAranks()' function is deprecated.", + "Please use 'loadHDF5GSVA()' instead.")) + + rankscontainer <- loadHDF5GSVA(dir, assay="gsvaranks", ...) + + return(rankscontainer) +} + diff --git a/R/gsva-serialization.R b/R/gsva-serialization.R new file mode 100644 index 0000000..4a3dd25 --- /dev/null +++ b/R/gsva-serialization.R @@ -0,0 +1,219 @@ +#' @title Save/load GSVA output to disk using HDF5 format +#' +#' @description The functions `saveHDF5GSVA()` and `loadHDF5GSVA()` allow one +#' to save and load the output from GSVA to/from disk. The `saveHDF5GSVA()` +#' function takes the output of [`gsvaRowNorm`] or [`gsvaColRanks`] as input, +#' and saves the output from these methods with the relevant metadata to a +#' specified directory. The `loadHDF5GSVA()` function reads the saved data from +#' the specified directory and returns an object with the corresponding +#' GSVA row-normalized or rank expression values, and their corresponding +#' metadata. +#' +#' @param gsvaExprData An object obtained with [`gsvaRowNorm`] or +#' [`gsvaColRanks`]. Must be one of the classes supported by +#' [`GsvaExprData-class`]. For a list of these classes, see its help page +#' using `help(GsvaExprData)`. +#' +#' @param dir The path to the directory where to save or load the GSVA output +#' data. +#' +#' @param assay A single character string specifying the assay that contains +#' the GSVA output to be saved or loaded. By default, `assay="auto"`, which in +#' the case of saving and a `gsvaExprData` object that is a +#' `SummarizedExperiment` or one of its derivatives, it will look for an assay +#' named `gsvaranks`, and if not found, it will look for an assay named +#' `gsvarnorm`. If `gsvaExprData` is not a `SummarizedExperiment` or one of its +#' derivatives, then the assay to be saved will be determined by the `assay` +#' attribute of the `gsvaExprData` object. +#' +#' @param ... Additional arguments to be passed to the underlying HDF5 +#' saving/loading functions +#' [`saveHDF5SummarizedExperiment`][HDF5Array::saveHDF5SummarizedExperiment] +#' and [`loadHDF5SummarizedExperiment`][HDF5Array::loadHDF5SummarizedExperiment], +#' respectively. +#' +#' @return For `saveHDF5GSVA()` the path to the directory where the data has +#' been saved is returned invisibly. For `loadHDF5GSVA()`, an object is returned +#' containing the corresponding loaded GSVA row-normalized or rank expression +#' values, and their corresponding metadata. If the saved GSVA output was +#' originally stored in a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object +#' or one of its derived classes, then the returned object will be a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment]. +#' Otherwise, the returned object will be a +#' [`DelayedMatrix`][DelayedArray::DelayedMatrix] object. +#' +#' @examples +#' +#' p <- 10 ## number of genes +#' n <- 30 ## number of samples +#' nGrp1 <- 15 ## number of samples in group 1 +#' nGrp2 <- n - nGrp1 ## number of samples in group 2 +#' +#' ## consider three disjoint gene sets +#' geneSets <- list(gset1=paste0("g", 1:3), +#' gset2=paste0("g", 4:6), +#' gset3=paste0("g", 7:10)) +#' +#' ## sample data from a normal distribution with mean 0 and st.dev. 1 +#' y <- matrix(rnorm(n*p), nrow=p, ncol=n, +#' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) +#' +#' ## build GSVA parameter object +#' gsvapar <- gsvaParam(y, geneSets) +#' +#' ## calculate row-normalized expression values +#' gsvarownorm <- gsvaRowNorm(gsvapar) +#' +#' ## calculate GSVA column ranks +#' gsvacolranks <- gsvaColRanks(gsvarownorm) +#' +#' ## calculate GSVA scores +#' es <- gsvaColScores(gsvacolranks) +#' +#' ## save the GSVA row-normalized expression values to disk +#' rnormdir <- tempfile() +#' saveHDF5GSVA(gsvarownorm, rnormdir) +#' +#' ## save the GSVA rank values to disk +#' ranksdir <- tempfile() +#' saveHDF5GSVA(gsvacolranks, ranksdir) +#' +#' ## load the GSVA row-normalized values from disk +#' loaded_gsvarownorm <- loadHDF5GSVA(rnormdir) +#' +#' ## check that the loaded row-normalized values provide the +#' ## same ranks as the ones calculated from the original values +#' gsvacolranks_from_loaded_gsvarownorm <- gsvaColRanks(loaded_gsvarownorm) +#' +#' identical(gsvacolranks, gsvacolranks_from_loaded_gsvarownorm) +#' +#' ## load the GSVA ranks from disk +#' loaded_gsvacolranks <- loadHDF5GSVA(ranksdir) +#' +#' ## check that the loaded ranks provide the +#' ## same scores as the original ranks +#' loaded_es <- gsvaColScores(loaded_gsvacolranks) +#' identical(es, loaded_es) +#' +#' @importFrom cli cli_abort +#' @importFrom HDF5Array saveHDF5SummarizedExperiment +#' @importFrom S4Vectors metadata "metadata<-" +#' @importFrom SummarizedExperiment SummarizedExperiment assayNames "assay<-" +#' +#' @rdname gsva-serialization +#' +#' @export +saveHDF5GSVA <- function(gsvaExprData, dir, assay="auto", ...) { + if (!is(gsvaExprData, "GsvaExprData")) { + msg <- paste("The input object in 'gsvaExprData' must a subclass of", + "'GsvaExprData'. See 'help(GsvaExprData)' for details.") + cli_abort(msg) + } + + se <- gsvaExprData + if (!is(se, "SummarizedExperiment")) { + if (!is.null(attributes(gsvaExprData)$assay)) + assay <- attributes(gsvaExprData)$assay + if (!assay %in% c("gsvarnorm", "gsvaranks")) + cli_abort(c("x"=paste("The object in 'gsvaExprData' does not have", + "GSVA output."))) + param <- .pull_param(gsvaExprData) + first <- last <- NA_real_ + whdim <- NA_integer_ + annot <- NULL + if (!is.null(attributes(gsvaExprData)$annotation) && + is(attributes(gsvaExprData)$annotation, "GeneIdentifierType")) { + annot <- attributes(gsvaExprData)$annotation + attributes(gsvaExprData)$annotation <- NULL + } + if (!is.null(attributes(gsvaExprData)$restrict)) { + first <- attributes(gsvaExprData)$restrict$first + last <- attributes(gsvaExprData)$restrict$last + whdim <- attributes(gsvaExprData)$restrict$whdim + attributes(gsvaExprData)$restrict <- NULL + } + se <- SummarizedExperiment(assays=list(dummy=gsvaExprData)) + if (!is.null(annot)) + gsvaAnnotation(se) <- annot + se <- wrapData(se, gsvaExprData, param, assay, first=first, + last=last, whdim=whdim, dropAssays=TRUE) + + } else { + ## 'SummarizedExperiment' object, remove all assays except the selected GSVA assay + an <- assayNames(se) + assay <- .check_assay_ranks_rnorm(an, assay) + + if (is.null(metadata(se)$gsvaParam)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "input object given in the 'gsvaExprData' parameter.") + cli_abort(c("x"=msg)) + } + + for (a in an) ## remove all assays except the selected one + if (a != assay) + assay(se, a) <- NULL + } + + saveHDF5SummarizedExperiment(se, dir, ...) + + invisible(dir) +} + + +#' @importFrom cli cli_abort +#' @importFrom HDF5Array loadHDF5SummarizedExperiment +#' @importFrom SummarizedExperiment SummarizedExperiment assayNames +#' +#' @rdname gsva-serialization +#' +#' @export +loadHDF5GSVA <- function(dir, assay="auto", ...) { + + gsvacontainer <- loadHDF5SummarizedExperiment(dir, ...) + + assay <- .check_assay_ranks_rnorm(assayNames(gsvacontainer), assay) + + if (is.null(metadata(gsvacontainer)$gsvaParam)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "loaded GSVA container object.") + cli_abort(c("x"=msg)) + } + + if (is.null(metadata(gsvacontainer)$gsvaParam$originalClassWasSE)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "loaded GSVA container object.") + cli_abort(c("x"=msg)) + } + + if (!metadata(gsvacontainer)$gsvaParam$originalClassWasSE) { + gsvapar <- metadata(gsvacontainer)$gsvaParam + restrict <- metadata(gsvacontainer)$restrict + annotation <- metadata(gsvacontainer)$annotation + gsvacontainer <- unwrapData(gsvacontainer, assay) + attr(gsvacontainer, "gsvaParam") <- gsvapar + attr(gsvacontainer, "assay") <- assay + if (!is.null(annotation)) + attr(gsvacontainer, "geneIdType") <- annotation + if (!is.null(restrict)) + attr(gsvacontainer, "restrict") <- restrict + } + + return(gsvacontainer) +} + +#' @importFrom SummarizedExperiment assayNames +.check_assay_ranks_rnorm <- function(an, assay) { + if (assay == "auto") { + if ("gsvaranks" %in% an) + assay <- "gsvaranks" + else if ("gsvarnorm" %in% an) + assay <- "gsvarnorm" + else + cli_abort(c("x"="Cannot find a GSVA assay in the object.")) + } else + if (!assay %in% an) + cli_abort(c("x"="Cannot find assay {assay} in the object.")) + + assay +} diff --git a/R/gsva.R b/R/gsva.R index acb3434..9dca73d 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -156,11 +156,11 @@ setMethod("gsva", signature(param="gsvaParam"), .check_bpparam(BPPARAM) - gsvarownr <- gsvaRowNorm(param=param, verbose=verbose, + gsvarnorm <- gsvaRowNorm(param=param, verbose=verbose, dropExistingAssays=TRUE, BPPARAM=BPPARAM, maxmem=maxmem) - gsvaranks <- gsvaColRanks(rowNormExprData=gsvarownr, + gsvaranks <- gsvaColRanks(rowNormExprData=gsvarnorm, verbose=verbose, dropExistingAssays=TRUE, BPPARAM=BPPARAM, @@ -684,10 +684,10 @@ setMethod("details", return(lst) } -## by now this is only called from gsvaColRanks(), i.e., no need -## to care about other methods +## Internal helper used by gsva*() methods and serialization helpers; not intended +## to support arbitrary external container types beyond those handled below #' @importFrom S4Vectors metadata -.pull_param <- function(exprData, assay) { +.pull_param <- function(exprData) { p <- NULL if (is(exprData, "matrix") || is(exprData, "dgCMatrix") || @@ -699,13 +699,13 @@ setMethod("details", cli_abort(c("x"="Missing metadata in the input expression data.")) p <- attr(exprData, "gsvaParam") a <- attr(exprData, "assay") - if (!a %in% c("gsvarownr", "gsvaranks")) + if (!a %in% c("gsvarnorm", "gsvaranks")) cli_abort(c("x"="Wrong metadata in the input expression data.")) } else { ## a SummarizedExperiment derivative if (is.null(metadata(exprData)$gsvaParam)) cli_abort(c("x"="Missing metadata in the input expression data")) p <- metadata(exprData)$gsvaParam - if (!any(assayNames(exprData) %in% c("gsvarownr", "gsvaranks"))) + if (!any(assayNames(exprData) %in% c("gsvarnorm", "gsvaranks"))) cli_abort(c("x"="Wrong metadata in the input expression data.")) } @@ -829,7 +829,7 @@ setMethod("details", #' object will have metadata with a copy of the input `gsvaParam` object, #' except for the `exprData` slot, and in the case of being a derivative of a #' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object, -#' an additional assay called "gsvarownr" storing the row-normalized expression +#' an additional assay called "gsvarnorm" storing the row-normalized expression #' values. #' #' @aliases gsvaRowNorm,gsvaParam-method @@ -889,7 +889,7 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), cli_alert_info(sprintf("Normalizing rows")) kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarownr <- .compute_row_norm(expr=filtDataMatrix, + gsvarnorm <- .compute_row_norm(expr=filtDataMatrix, kcdf=.get_kcdf(param), kcdf.min.ssize=kcdfminssize, sparse=.get_sparse(param), @@ -899,11 +899,11 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), BPPARAM=BPPARAM, maxmem=maxmem) - rownames(gsvarownr) <- rownames(filtDataMatrix) - colnames(gsvarownr) <- colnames(filtDataMatrix) + rownames(gsvarnorm) <- rownames(filtDataMatrix) + colnames(gsvarnorm) <- colnames(filtDataMatrix) - rval <- wrapData(get_exprData(param), gsvarownr, param, - "gsvarownr", first, last, whdim=1, + rval <- wrapData(get_exprData(param), gsvarnorm, param, + "gsvarnorm", first, last, whdim=1, dropExistingAssays) if (verbose && gsva_global$show_start_and_end_messages) @@ -942,7 +942,7 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { - param <- .pull_param(rowNormExprData, "gsvarownr") + param <- .pull_param(rowNormExprData) if (verbose && gsva_global$show_start_and_end_messages) { pkgversion <- packageDescription("GSVA")[["Version"]] @@ -951,16 +951,16 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), .check_bpparam(BPPARAM) - dataMatrix <- unwrapData(rowNormExprData, "gsvarownr") + dataMatrix <- unwrapData(rowNormExprData, "gsvarnorm") checkedfl <- .check_first_last_values(dataMatrix, ncol, "columns", first, last) first <- checkedfl$first last <- checkedfl$last - maxmem <- .check_maxmem(param, assay="gsvarownr", maxmem=maxmem, + maxmem <- .check_maxmem(param, assay="gsvarnorm", maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, assay="gsvarownr", first=first, + ondisk <- .check_ondisk(param, assay="gsvarnorm", first=first, last=last, whdim=2, maxmem=maxmem, verbose=verbose) @@ -1018,7 +1018,7 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { - param <- .pull_param(rankExprData, "gsvaranks") + param <- .pull_param(rankExprData) if (!missing(geneSets)) { if (!is(geneSets, "GsvaGeneSets")) @@ -1215,7 +1215,7 @@ setMethod("gsvaEnrichment", signature(rankExprData="GsvaExprData"), } else cli_abort(c("x"="'column' should be a positive integer.")) - param <- .pull_param(rankExprData, "gsvaranks") + param <- .pull_param(rankExprData) plot <- match.arg(plot) diff --git a/R/gsvaRanks_serialization.R b/R/gsvaRanks_serialization.R deleted file mode 100644 index 6a21f2b..0000000 --- a/R/gsvaRanks_serialization.R +++ /dev/null @@ -1,177 +0,0 @@ -#' @title Save/load GSVA rank values to disk using HDF5 format -#' -#' @description The functions `saveHDF5GSVAranks` and `loadHDF5GSVAranks` can -#' be used to save and load the GSVA rank values to/from disk, respectively. -#' The `saveHDF5GSVAranks()` function takes the output of [`gsvaColRanks`] as -#' input, and saves the rank values along with the relevant metadata to a -#' specified directory. The `loadHDF5GSVAranks()` function reads the saved data -#' from the specified directory and returns an object with the GSVA rank values -#' and their corresponding metadata. -#' -#' @param rankExprData A column-rank expression data set obtained with -#' [`gsvaColRanks`]. Must be one of the classes supported by -#' [`GsvaExprData-class`]. For a list of these classes, see its help page -#' using `help(GsvaExprData)`. -#' -#' @param dir The path to the directory where to save or load the GSVA ranks -#' data. -#' -#' @param ... Additional arguments to be passed to the underlying HDF5 -#' saving/loading functions -#' [`saveHDF5SummarizedExperiment`][HDF5Array::saveHDF5SummarizedExperiment] -#' and [`loadHDF5SummarizedExperiment`][HDF5Array::loadHDF5SummarizedExperiment], -#' respectively. -#' -#' @return For `saveHDF5GSVAranks`, the path to the directory where the data -#' has been saved is returned invisibly. For `loadHDF5GSVAranks`, a -#' an object is returned containing the loaded GSVA rank values and their -#' corresponding metadata. If the saved ranks were originally stored in a -#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object -#' or one of its derived classes, then the returned object will be a -#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment]. -#' Otherwise, the returned object will be a -#' [`DelayedMatrix`][DelayedArray::DelayedMatrix] object. -#' -#' @examples -#' -#' p <- 10 ## number of genes -#' n <- 30 ## number of samples -#' nGrp1 <- 15 ## number of samples in group 1 -#' nGrp2 <- n - nGrp1 ## number of samples in group 2 -#' -#' ## consider three disjoint gene sets -#' geneSets <- list(gset1=paste0("g", 1:3), -#' gset2=paste0("g", 4:6), -#' gset3=paste0("g", 7:10)) -#' -#' ## sample data from a normal distribution with mean 0 and st.dev. 1 -#' y <- matrix(rnorm(n*p), nrow=p, ncol=n, -#' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) -#' -#' ## build GSVA parameter object -#' gsvapar <- gsvaParam(y, geneSets) -#' -#' ## calculate row-normalized expression values -#' gsvarownorm <- gsvaRowNorm(gsvapar) -#' -#' ## calculate GSVA column ranks -#' gsvacolranks <- gsvaColRanks(gsvarownorm) -#' -#' ## calculate GSVA scores -#' es <- gsvaColScores(gsvacolranks) -#' -#' ## save the GSVA ranks to disk -#' dir <- tempfile() -#' saveHDF5GSVAranks(gsvacolranks, dir) -#' -#' ## load the GSVA ranks from disk -#' loaded_gsvacolranks <- loadHDF5GSVAranks(dir) -#' -#' ## check that the loaded ranks provide the -#' ## same scores as the original ranks -#' loaded_es <- gsvaColScores(loaded_gsvacolranks) -#' identical(es, loaded_es) -#' -#' @importFrom cli cli_abort -#' @importFrom HDF5Array saveHDF5SummarizedExperiment -#' @importFrom S4Vectors metadata "metadata<-" -#' @importFrom SummarizedExperiment SummarizedExperiment assayNames "assay<-" -#' -#' @rdname gsvaRanks_serialization -#' -#' @export -saveHDF5GSVAranks <- function(rankExprData, dir, ...) { - if (!is(rankExprData, "GsvaExprData")) { - msg <- paste("The input object in 'rankExprData' must a subclass of", - "'GsvaExprData'. See 'help(GsvaExprData)' for details.") - cli_abort(msg) - } - - se <- rankExprData - if (!is(se, "SummarizedExperiment")) { - param <- .pull_param(rankExprData, "gsvaranks") - first <- last <- NA_real_ - whdim <- NA_integer_ - annot <- NULL - if (!is.null(attributes(rankExprData)$annotation) && - is(attributes(rankExprData)$annotation, "GeneIdentifierType")) { - annot <- attributes(rankExprData)$annotation - attributes(rankExprData)$annotation <- NULL - } - if (!is.null(attributes(rankExprData)$restrict)) { - first <- attributes(rankExprData)$restrict$first - last <- attributes(rankExprData)$restrict$last - whdim <- attributes(rankExprData)$restrict$whdim - attributes(rankExprData)$restrict <- NULL - } - se <- SummarizedExperiment(assays=list(dummy=rankExprData)) - if (!is.null(annot)) - gsvaAnnotation(se) <- annot - se <- wrapData(se, rankExprData, param, "gsvaranks", first=first, - last=last, whdim=whdim, dropAssays=TRUE) - - } else { ## 'SummarizedExperiment' object, remove all assays except 'gsvaranks' - an <- assayNames(se) - if (!"gsvaranks" %in% an) { - msg <- paste("Cannot find the ranks in the input object given in the", - "'rankExprData' parameter.") - cli_abort(c("x"=msg)) - } - if (is.null(metadata(se)$gsvaParam)) { - msg <- paste("Cannot find the GSVA parameters in the metadata of the", - "input object given in the 'rankExprData' parameter.") - cli_abort(c("x"=msg)) - } - - for (a in an) ## remove all assays except the one with the ranks - if (a != "gsvaranks") - assay(se, a) <- NULL - } - - saveHDF5SummarizedExperiment(se, dir, ...) - - invisible(dir) -} - -#' @importFrom cli cli_abort -#' @importFrom HDF5Array loadHDF5SummarizedExperiment -#' @importFrom SummarizedExperiment SummarizedExperiment assayNames "assay<-" -#' -#' @rdname gsvaRanks_serialization -#' -#' @export -loadHDF5GSVAranks <- function(dir, ...) { - - rankscontainer <- loadHDF5SummarizedExperiment(dir, ...) - - an <- assayNames(rankscontainer) - if (!"gsvaranks" %in% an) - cli_abort("Cannot find the ranks in the loaded object.") - - if (is.null(metadata(rankscontainer)$gsvaParam)) { - msg <- paste("Cannot find the GSVA parameters in the metadata of the", - "loaded GSVA ranks object.") - cli_abort(c("x"=msg)) - } - - if (is.null(metadata(rankscontainer)$gsvaParam$originalClassWasSE)) { - msg <- paste("Cannot find the GSVA parameters in the metadata of the", - "loaded GSVA ranks object.") - cli_abort(c("x"=msg)) - } - - if (!metadata(rankscontainer)$gsvaParam$originalClassWasSE) { - gsvapar <- metadata(rankscontainer)$gsvaParam - restrict <- metadata(rankscontainer)$restrict - annotation <- metadata(rankscontainer)$annotation - rankscontainer <- unwrapData(rankscontainer, "gsvaranks") - attr(rankscontainer, "gsvaParam") <- gsvapar - attr(rankscontainer, "assay") <- "gsvaranks" - if (!is.null(annotation)) - attr(rankscontainer, "geneIdType") <- annotation - if (!is.null(restrict)) - attr(rankscontainer, "restrict") <- restrict - } - - return(rankscontainer) -} diff --git a/inst/unitTests/test_ranksserialization.R b/inst/unitTests/test_ranksserialization.R deleted file mode 100644 index 9c719e1..0000000 --- a/inst/unitTests/test_ranksserialization.R +++ /dev/null @@ -1,62 +0,0 @@ -test_ranksserialization <- function() { - - message("Running unit tests for ranks serialization") - - suppressPackageStartupMessages(library(Matrix)) - suppressPackageStartupMessages(library(GSEABase)) - suppressPackageStartupMessages(library(SummarizedExperiment)) - - p <- 10 ## number of genes - n <- 30 ## number of samples - - ## consider three disjoint gene sets - gsets <- list(gset1=paste0("g", 1:3), - gset2=paste0("g", 4:6), - gset3=paste0("g", 7:10)) - - ## build a random sparse count matrix with 85% sparsity - cnt <- integer(n*p) - idx <- sample(length(cnt), size=round(length(cnt)*0.15)) ## 85% sparsity - cnt[idx] <- rpois(length(idx), lambda=2)+1 - cnt <- matrix(cnt, nrow=p, ncol=n, - dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) - cnt <- Matrix(cnt, sparse=TRUE) - - se <- SummarizedExperiment(assays=list(counts=cnt)) - - ## build GSVA parameter object - gsvapar <- gsvaParam(se, gsets, verbose=FALSE) - - ## calculate row-normalized expression values - gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) - - ## calculate GSVA column ranks - gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) - - ## calculate GSVA scores - es <- gsvaColScores(gsvacolranks, verbose=FALSE) - - ## save the GSVA ranks to disk - dir <- tempfile() - checkException(saveHDF5GSVAranks(gsvapar, dir)) - saveHDF5GSVAranks(gsvacolranks, dir) - - ## load the GSVA ranks from disk - loaded_gsvacolranks <- loadHDF5GSVAranks(dir) - - ## check that the loaded ranks provide the - ## same scores as the original ranks - loaded_es <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) - - checkTrue(identical(es, loaded_es)) - - ## check it again saving and loading the ranks stored in a - ## non-'SummarizedExperiment' object - gsvapar <- gsvaParam(cnt, gsets, verbose=FALSE) - gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) - gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) - saveHDF5GSVAranks(gsvacolranks, dir, replace=TRUE) - loaded_gsvacolranks <- loadHDF5GSVAranks(dir) - loaded_es <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) - checkEqualsNumeric(assay(es), loaded_es) -} diff --git a/inst/unitTests/test_serialization.R b/inst/unitTests/test_serialization.R new file mode 100644 index 0000000..a22860a --- /dev/null +++ b/inst/unitTests/test_serialization.R @@ -0,0 +1,88 @@ +test_ranksserialization <- function() { + + message("Running unit tests for ranks serialization") + + suppressPackageStartupMessages({ + library(Matrix) + library(GSEABase) + library(SummarizedExperiment) + }) + + p <- 10 ## number of genes + n <- 30 ## number of samples + + ## consider three disjoint gene sets + gsets <- list(gset1=paste0("g", 1:3), + gset2=paste0("g", 4:6), + gset3=paste0("g", 7:10)) + + ## build a random sparse count matrix with 85% sparsity + cnt <- integer(n*p) + idx <- sample(length(cnt), size=round(length(cnt)*0.15)) ## 85% sparsity + cnt[idx] <- rpois(length(idx), lambda=2)+1 + cnt <- matrix(cnt, nrow=p, ncol=n, + dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) + cnt <- Matrix(cnt, sparse=TRUE) + + se <- SummarizedExperiment(assays=list(counts=cnt)) + + ## build GSVA parameter object + gsvapar <- gsvaParam(se, gsets, verbose=FALSE) + + ## calculate row-normalized expression values + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + + ## calculate GSVA column ranks + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + + ## calculate GSVA scores + es <- gsvaColScores(gsvacolranks, verbose=FALSE) + + ## check out that 'saveHDF5GSVA()' throws an error when the input + ## object is not one of the classes of the union of classes defined + ## by 'GsvaExprData' + rnormdir <- tempfile() + checkException(saveHDF5GSVA(gsvapar, rnormdir)) + + ## save the GSVA row-normalized expression values to disk + savedrnormdir <- saveHDF5GSVA(gsvarownorm, rnormdir) + checkTrue(file.exists(savedrnormdir) && identical(savedrnormdir, rnormdir)) + + ## save the GSVA rank values to disk + ranksdir <- tempfile() + savedranksdir <- saveHDF5GSVA(gsvacolranks, ranksdir) + checkTrue(file.exists(savedranksdir) && identical(savedranksdir, ranksdir)) + + ## load the GSVA row-normalized values from disk + loaded_gsvarownorm <- loadHDF5GSVA(savedrnormdir) + + ## load the GSVA ranks from disk + loaded_gsvacolranks <- loadHDF5GSVA(savedranksdir) + + ## check that the loaded row-normalized values provide the + ## same ranks as the ones calculated from the original values + gsvacolranks_from_loaded_gsvarownorm <- gsvaColRanks(loaded_gsvarownorm, verbose=FALSE) + checkEqualsNumeric(assay(gsvacolranks, "gsvaranks"), + assay(gsvacolranks_from_loaded_gsvarownorm, "gsvaranks")) + + ## check that the loaded ranks provide the + ## same scores as the original ranks + es_from_loaded_gsvaranks <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) + checkTrue(identical(es, es_from_loaded_gsvaranks)) + + ## check it again saving and loading the ranks stored in a + ## non-'SummarizedExperiment' object + gsvapar <- gsvaParam(cnt, gsets, verbose=FALSE) + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + savedrnormdir <- saveHDF5GSVA(gsvarownorm, rnormdir, replace=TRUE) + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + savedranksdir <- saveHDF5GSVA(gsvacolranks, ranksdir, replace=TRUE) + + loaded_gsvarownorm <- loadHDF5GSVA(savedrnormdir) + gsvacolranks_from_loaded_gsvarownorm <- gsvaColRanks(loaded_gsvarownorm, verbose=FALSE) + checkEqualsNumeric(gsvacolranks, gsvacolranks_from_loaded_gsvarownorm) + + loaded_gsvacolranks <- loadHDF5GSVA(savedranksdir) + es_from_loaded_gsvaranks <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) + checkEqualsNumeric(assay(es), es_from_loaded_gsvaranks) +} diff --git a/man/GSVA-pkg-deprecated.Rd b/man/GSVA-pkg-deprecated.Rd index b92064f..322a6f7 100644 --- a/man/GSVA-pkg-deprecated.Rd +++ b/man/GSVA-pkg-deprecated.Rd @@ -6,6 +6,8 @@ \alias{gsvaRanks,gsvaParam-method} \alias{gsvaScores} \alias{gsvaScores,gsvaRanksParam-method} +\alias{saveHDF5GSVAranks} +\alias{loadHDF5GSVAranks} \title{Deprecated functions in package \code{GSVA}.} \usage{ \S4method{gsvaRanks}{gsvaParam}( @@ -21,9 +23,39 @@ BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) + +saveHDF5GSVAranks(rankExprData, dir, ...) + +loadHDF5GSVAranks(dir, ...) } \arguments{ \item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} + +\item{rankExprData}{A column-rank expression data set obtained with +\code{\link{gsvaColRanks}}. Must be one of the classes supported by +\code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page +using \code{help(GsvaExprData)}.} + +\item{dir}{The path to the directory where to save or load the GSVA rank +values.} + +\item{...}{Additional arguments to be passed to the underlying HDF5 +saving/loading functions +\code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} +and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, +respectively.} +} +\value{ +For \code{saveHDF5GSVAranks()}, the path to the directory where the data +has been saved is returned invisibly. For \code{loadHDF5GSVAranks()}, an object +is returned containing the corresponding loaded GSVA row-normalized or rank +expression values, and their corresponding metadata. If the saved GSVA +output was originally stored in a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object +or one of its derived classes, then the returned object will be a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}. +Otherwise, the returned object will be a +\code{\link[DelayedArray:DelayedMatrix]{DelayedMatrix}} object. } \description{ The functions listed below are deprecated and will be defunct in @@ -35,5 +67,11 @@ and \code{gsvaColRanks()} instead. The \code{gsvaScores()} method is deprecated. Please use \code{gsvaColScores()} instead. + +The \code{saveHDF5GSVAranks()} function is deprecated. Please use +\code{saveHDF5GSVA()} instead. + +The \code{loadHDF5GSVAranks()} function is deprecated. Please use +\code{loadHDF5GSVA()} instead. } \keyword{internal} diff --git a/man/gsva-serialization.Rd b/man/gsva-serialization.Rd new file mode 100644 index 0000000..343777c --- /dev/null +++ b/man/gsva-serialization.Rd @@ -0,0 +1,109 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gsva-serialization.R +\name{saveHDF5GSVA} +\alias{saveHDF5GSVA} +\alias{loadHDF5GSVA} +\title{Save/load GSVA output to disk using HDF5 format} +\usage{ +saveHDF5GSVA(gsvaExprData, dir, assay = "auto", ...) + +loadHDF5GSVA(dir, assay = "auto", ...) +} +\arguments{ +\item{gsvaExprData}{An obtained either with \code{\link{gsvaRowNorm}} or +\code{\link{gsvaColRanks}}. Must be one of the classes supported by +\code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page +using \code{help(GsvaExprData)}.} + +\item{dir}{The path to the directory where to save or load the GSVA output +data.} + +\item{assay}{A single character string specifying the assay that contains +the GSVA output to be saved or loaded. By default, \code{assay="auto"}, which in +the case of saving and a \code{gsvaExprData} object that is a +\code{SummarizedExperiment} or one of its derivatives, it will look for an assay +named \code{gsvaranks}, and if not found, it will look for an assay named +\verb{gsvarnorm'. If }gsvaExprData\verb{is not a}SummarizedExperiment\verb{or one of its derivatives, then the assay to be saved will be determined by the}assay\verb{attribute of the}gsvaExprData` object.} + +\item{...}{Additional arguments to be passed to the underlying HDF5 +saving/loading functions +\code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} +and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, +respectively.} +} +\value{ +For \code{saveHDF5GSVA()} the path to the directory where the data has +been saved is returned invisibly. For \code{loadHDF5GSVA()}, an object is returned +containing the corresponding loaded GSVA row-normalized or rank expression +values, and their corresponding metadata. If the saved GSVA output was +originally stored in a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object +or one of its derived classes, then the returned object will be a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}. +Otherwise, the returned object will be a +\code{\link[DelayedArray:DelayedMatrix]{DelayedMatrix}} object. +} +\description{ +The functions \code{saveHDF5GSVA()} and \code{loadHDF5GSVA()} allow one +to save and load the output from GSVA to/from disk. The \code{saveHDF5GSVA()} +function takes the output of \code{\link{gsvaRowNorm}} or \code{\link{gsvaColRanks}} as input, +and saves the output from these methods with the relevant metadata to a +specified directory. The \code{loadHDF5GSVA()} function reads the saved data from +the specified directory and returns an object with the corresponding +GSVA row-normalized or rank expression values, and their corresponding +metadata. +} +\examples{ + +p <- 10 ## number of genes +n <- 30 ## number of samples +nGrp1 <- 15 ## number of samples in group 1 +nGrp2 <- n - nGrp1 ## number of samples in group 2 + +## consider three disjoint gene sets +geneSets <- list(gset1=paste0("g", 1:3), + gset2=paste0("g", 4:6), + gset3=paste0("g", 7:10)) + +## sample data from a normal distribution with mean 0 and st.dev. 1 +y <- matrix(rnorm(n*p), nrow=p, ncol=n, + dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) + +## build GSVA parameter object +gsvapar <- gsvaParam(y, geneSets) + +## calculate row-normalized expression values +gsvarownorm <- gsvaRowNorm(gsvapar) + +## calculate GSVA column ranks +gsvacolranks <- gsvaColRanks(gsvarownorm) + +## calculate GSVA scores +es <- gsvaColScores(gsvacolranks) + +## save the GSVA row-normalized expression values to disk +rnormdir <- tempfile() +saveHDF5GSVA(gsvarownorm, rnormdir) + +## save the GSVA rank values to disk +ranksdir <- tempfile() +saveHDF5GSVA(gsvacolranks, ranksdir) + +## load the GSVA row-normalized values from disk +loaded_gsvarownorm <- loadHDF5GSVA(rnormdir) + +## check that the loaded row-normalized values provide the +## same ranks as the ones calculated from the original values +gsvacolranks_from_loaded_gsvarownorm <- gsvaColRanks(loaded_gsvarownorm) + +identical(gsvacolranks, gsvacolranks_from_loaded_gsvarownorm) + +## load the GSVA ranks from disk +loaded_gsvacolranks <- loadHDF5GSVA(ranksdir) + +## check that the loaded ranks provide the +## same scores as the original ranks +loaded_es <- gsvaColScores(loaded_gsvacolranks) +identical(es, loaded_es) + +} diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index ecb2d76..8101a11 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -105,7 +105,7 @@ object, containing the row-normalized expression values. The resulting object will have metadata with a copy of the input \code{gsvaParam} object, except for the \code{exprData} slot, and in the case of being a derivative of a \code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object, -an additional assay called "gsvarownr" storing the row-normalized expression +an additional assay called "gsvarnorm" storing the row-normalized expression values. In the case of 'gsvaColRanks()', an object of the same class as the diff --git a/man/gsvaRanks_serialization.Rd b/man/gsvaRanks_serialization.Rd deleted file mode 100644 index 96caff5..0000000 --- a/man/gsvaRanks_serialization.Rd +++ /dev/null @@ -1,87 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gsvaRanks_serialization.R -\name{saveHDF5GSVAranks} -\alias{saveHDF5GSVAranks} -\alias{loadHDF5GSVAranks} -\title{Save/load GSVA rank values to disk using HDF5 format} -\usage{ -saveHDF5GSVAranks(rankExprData, dir, ...) - -loadHDF5GSVAranks(dir, ...) -} -\arguments{ -\item{rankExprData}{A column-rank expression data set obtained with -\code{\link{gsvaColRanks}}. Must be one of the classes supported by -\code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page -using \code{help(GsvaExprData)}.} - -\item{dir}{The path to the directory where to save or load the GSVA ranks -data.} - -\item{...}{Additional arguments to be passed to the underlying HDF5 -saving/loading functions -\code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} -and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, -respectively.} -} -\value{ -For \code{saveHDF5GSVAranks}, the path to the directory where the data -has been saved is returned invisibly. For \code{loadHDF5GSVAranks}, a -an object is returned containing the loaded GSVA rank values and their -corresponding metadata. If the saved ranks were originally stored in a -\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object -or one of its derived classes, then the returned object will be a -\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}. -Otherwise, the returned object will be a -\code{\link[DelayedArray:DelayedMatrix]{DelayedMatrix}} object. -} -\description{ -The functions \code{saveHDF5GSVAranks} and \code{loadHDF5GSVAranks} can -be used to save and load the GSVA rank values to/from disk, respectively. -The \code{saveHDF5GSVAranks()} function takes the output of \code{\link{gsvaColRanks}} as -input, and saves the rank values along with the relevant metadata to a -specified directory. The \code{loadHDF5GSVAranks()} function reads the saved data -from the specified directory and returns an object with the GSVA rank values -and their corresponding metadata. -} -\examples{ - -p <- 10 ## number of genes -n <- 30 ## number of samples -nGrp1 <- 15 ## number of samples in group 1 -nGrp2 <- n - nGrp1 ## number of samples in group 2 - -## consider three disjoint gene sets -geneSets <- list(gset1=paste0("g", 1:3), - gset2=paste0("g", 4:6), - gset3=paste0("g", 7:10)) - -## sample data from a normal distribution with mean 0 and st.dev. 1 -y <- matrix(rnorm(n*p), nrow=p, ncol=n, - dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) - -## build GSVA parameter object -gsvapar <- gsvaParam(y, geneSets) - -## calculate row-normalized expression values -gsvarownorm <- gsvaRowNorm(gsvapar) - -## calculate GSVA column ranks -gsvacolranks <- gsvaColRanks(gsvarownorm) - -## calculate GSVA scores -es <- gsvaColScores(gsvacolranks) - -## save the GSVA ranks to disk -dir <- tempfile() -saveHDF5GSVAranks(gsvacolranks, dir) - -## load the GSVA ranks from disk -loaded_gsvacolranks <- loadHDF5GSVAranks(dir) - -## check that the loaded ranks provide the -## same scores as the original ranks -loaded_es <- gsvaColScores(loaded_gsvacolranks) -identical(es, loaded_es) - -}