From da448ad9e5ab2add6c80bf1b6d66b46f593dbbbf Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Fri, 22 May 2026 05:45:39 +0200 Subject: [PATCH 01/12] Added gsvaRowNorm() function --- DESCRIPTION | 2 +- NAMESPACE | 4 +- R/AllGenerics.R | 7 +- R/gsva.R | 235 ++++++++++++++++------ R/gsvaRanks_serialization.R | 24 +-- R/plage.R | 2 +- R/ssgsea.R | 3 +- R/utils.R | 92 ++++++--- R/zscore.R | 2 +- inst/unitTests/test_genesets.R | 8 +- inst/unitTests/test_inputdatacontainers.R | 11 +- inst/unitTests/test_ondisk.R | 11 +- man/GsvaExprData-class.Rd | 8 +- man/GsvaMethodParam-class.Rd | 12 +- man/geneIdsToGeneSetCollection.Rd | 2 +- man/geneSets.Rd | 4 +- man/gsva.Rd | 14 +- man/gsvaAnnotation.Rd | 2 +- man/gsvaEnrichment.Rd | 2 +- man/gsvaParam-class.Rd | 28 +-- man/gsvaRanks.Rd | 31 ++- man/gsvaRanks_serialization.Rd | 4 +- man/plageParam-class.Rd | 18 +- man/ssgseaParam-class.Rd | 24 +-- man/zscoreParam-class.Rd | 18 +- 25 files changed, 359 insertions(+), 209 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9b59b0a..d58ee72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,4 +30,4 @@ BugReports: https://github.com/rcastelo/GSVA/issues Encoding: UTF-8 biocViews: FunctionalGenomics, Microarray, RNASeq, Pathways, GeneSetEnrichment Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.3 +Config/roxygen2/version: 8.0.0 diff --git a/NAMESPACE b/NAMESPACE index a5e6464..cb7db93 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(gsvaAnnotation) export(gsvaEnrichment) export(gsvaParam) export(gsvaRanks) +export(gsvaRowNorm) export(gsvaScores) export(guessGeneIdType) export(igsva) @@ -39,6 +40,7 @@ exportMethods(gsva) exportMethods(gsvaAnnotation) exportMethods(gsvaEnrichment) exportMethods(gsvaRanks) +exportMethods(gsvaRowNorm) exportMethods(gsvaScores) exportMethods(spatCor) import(methods) @@ -60,8 +62,6 @@ importFrom(Biobase,selectSome) importFrom(BiocGenerics,"annotation<-") importFrom(BiocGenerics,"type<-") importFrom(BiocGenerics,annotation) -importFrom(BiocGenerics,cbind) -importFrom(BiocGenerics,rbind) importFrom(BiocGenerics,type) importFrom(BiocParallel,"bpprogressbar<-") importFrom(BiocParallel,"bpstopOnError<-") diff --git a/R/AllGenerics.R b/R/AllGenerics.R index bc4b4b0..8af4e7f 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -3,6 +3,10 @@ setGeneric("gsva", function(param, ...) standardGeneric("gsva")) +#' @export +setGeneric("gsvaRowNorm", + function(param, ...) standardGeneric("gsvaRowNorm")) + #' @export setGeneric("gsvaRanks", function(param, ...) standardGeneric("gsvaRanks")) @@ -50,7 +54,8 @@ setGeneric("unwrapData", function(container, ...) standardGeneric("unwrapData")) setGeneric("wrapData", - function(container, dataMatrix, geneSets) standardGeneric("wrapData")) + function(container, dataMatrix, param, assay, geneSets) + standardGeneric("wrapData")) setGeneric("mapGeneSetsToAnno", function(geneSets, anno, ...) standardGeneric("mapGeneSetsToAnno")) diff --git a/R/gsva.R b/R/gsva.R index 45856d0..3f17621 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -146,7 +146,6 @@ NULL #' @aliases gsva,gsvaParam-method #' @importFrom cli cli_alert_info cli_alert_success -#' @importFrom BiocParallel bpnworkers #' @importFrom utils packageDescription #' @rdname gsva #' @exportMethod gsva @@ -624,11 +623,48 @@ setMethod("show", cat("filterRows: ", .get_filterRows(object), "\n") }) +.gsvaParam_as_list <- function(x) { + lst <- list(originalClassWasSE=is(get_exprData(x), + "SummarizedExperiment"), + geneSets=get_geneSets(x), + assay=get_assay(x), + annotation=get_annotation(x), + minSize=get_minSize(x), + maxSize=get_maxSize(x), + nzcount=nzcount(x), + ondisk=.get_ondisk(x)) + + if (is(x, "ssgseaParam")) + lst <- c(lst, + alpha=.get_alpha(x), + normalize=.get_normalize(x)) + + if (is(x, "ssgseaParam") || is(x, "gsvaParam")) + lst <- c(lst, + checkNA=.get_checkNA(x), + didCheckNA=.get_didCheckNA(x), + anyNA=anyNA(x), + use=.get_NAuse(x)) + + if (is(x, "gsvaParam")) + lst <- c(lst, + kcdf=.get_kcdf(x), + kcdfNoneMinSampleSize=.get_kcdfNoneMinSampleSize(x), + tau=.get_tau(x), + maxDiff=.get_maxDiff(x), + absRanking=.get_absRanking(x), + sparse=.get_sparse(x), + filterRows=.get_filterRows(x)) + return(lst) +} + + #' @title GSVA ranks and scores #' -#' @description Calculate GSVA scores in two steps: (1) calculate GSVA -#' ranks; and (2) calculate GSVA scores using the previously calculated -#' ranks. +#' @description Calculate GSVA scores in three steps: (1) normalize values of +#' expression by row; (2) calculate GSVA ranks by column from the previous +#' row-normalized values; and (3) calculate GSVA scores by column from the +#' previously calculated column ranks. #' #' @param param A [`gsvaParam-class`] object built using the constructor #' function [`gsvaParam`]. @@ -652,9 +688,6 @@ setMethod("show", #' in an R session or script may depend on other commands and packages used in #' that same session or script. #' -#' @return In the case of the `gsvaRanks()` method, an object of class -#' [`gsvaRanksParam-class`]. -#' #' @seealso [`gsvaParam-class`], [`gsvaRanksParam-class`], [`gsva`], #' [`BiocParallelParam`][BiocParallel::BiocParallelParam-class], #' [`dgCMatrix`][Matrix::dgCMatrix-class], @@ -666,10 +699,6 @@ setMethod("show", ### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ #' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class] #' -#' @aliases gsvaRanks,gsvaParam-method -#' @name gsvaRanks -#' @rdname gsvaRanks -#' #' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set #' variation analysis for microarray and RNA-Seq data. #' *BMC Bioinformatics*, 14:7, 2013. @@ -719,8 +748,88 @@ setMethod("show", #' geneSets(gsvarankspar) <- geneSets2 #' gsvaScores(gsvarankspar) #' +#' @return In the case of the `gsvaRowNorm()` method, an object of class +#' [`gsvaRanksParam-class`]. +#' +#' @aliases gsvaRowNorm,gsvaParam-method +#' @name gsvaRowNorm +#' @rdname gsvaRanks +#' +#' @importFrom cli cli_alert_info cli_alert_success +#' @exportMethod gsvaRowNorm +setMethod("gsvaRowNorm", signature(param="gsvaParam"), + function(param, + verbose=TRUE, + BPPARAM=SerialParam(progressbar=verbose), + maxmem="auto") { + + if (verbose && gsva_global$show_start_and_end_messages) { + pkgversion <- packageDescription("GSVA")[["Version"]] + cli_alert_info("GSVA version {pkgversion}") + } + + .check_bpparam(BPPARAM) + + exprData <- get_exprData(param) + dataMatrix <- unwrapData(exprData, get_assay(param)) + maxmem <- .check_maxmem(param, maxmem, verbose) + ondisk <- .check_ondisk(param, maxmem, verbose) + + dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", + ondisk, verbose) + + filtDataMatrix <- dataMatrix + BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, + minparrows=100, minparcols=100, + verbose) + + if (.get_filterRows(param)) + filtDataMatrix <- .filterGenes(dataMatrix, anyNA(param), + removeConstant=TRUE, + removeNzConstant=TRUE, + verbose, BPPARAM=BPPARAM, + maxmem=maxmem) + else if (verbose) { + msg <- "Skipping filtering of constant rows (filterRows=FALSE)" + cli_alert_warning(msg) + } + + if (verbose) + cli_alert_info(sprintf("Normalizing rows")) + + kcdfminssize <- .get_kcdfNoneMinSampleSize(param) + gsvarows <- .compute_row_norm(expr=filtDataMatrix, + kcdf=.get_kcdf(param), + kcdf.min.ssize=kcdfminssize, + sparse=.get_sparse(param), + any_na=anyNA(param), + na_use=.get_NAuse(param), + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + rownames(gsvarows) <- rownames(filtDataMatrix) + colnames(gsvarows) <- colnames(filtDataMatrix) + + rval <- wrapData(get_exprData(param), gsvarows, param, "gsvarows") + + if (verbose && gsva_global$show_start_and_end_messages) + cli_alert_success("Calculations finished") + + return(rval) + }) + + + +#' +#' @return In the case of the `gsvaRanks()` method, an object of class +#' [`gsvaRanksParam-class`]. +#' +#' @aliases gsvaRanks,gsvaParam-method +#' @name gsvaRanks +#' @rdname gsvaRanks +#' #' @importFrom cli cli_alert_info cli_alert_success -#' @importFrom BiocParallel bpnworkers #' @exportMethod gsvaRanks setMethod("gsvaRanks", signature(param="gsvaParam"), function(param, @@ -763,12 +872,17 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), cli_alert_info(sprintf("Calculating GSVA ranks")) kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarnks <- .compute_gsva_ranks(expr=filtDataMatrix, - kcdf=.get_kcdf(param), - kcdf.min.ssize=kcdfminssize, - sparse=.get_sparse(param), - any_na=anyNA(param), - na_use=.get_NAuse(param), + gsvarows <- .compute_row_norm(expr=filtDataMatrix, + kcdf=.get_kcdf(param), + kcdf.min.ssize=kcdfminssize, + sparse=.get_sparse(param), + any_na=anyNA(param), + na_use=.get_NAuse(param), + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + gsvarnks <- .compute_gsva_ranks(Z=gsvarows, verbose=verbose, BPPARAM=BPPARAM, maxmem=maxmem) @@ -776,7 +890,8 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), rownames(gsvarnks) <- rownames(filtDataMatrix) colnames(gsvarnks) <- colnames(filtDataMatrix) - rnkscontainer <- wrapData(get_exprData(param), gsvarnks) + rnkscontainer <- wrapData(get_exprData(param), gsvarnks, param, + "gsvaranks") rval <- new("gsvaRanksParam", exprData=rnkscontainer, geneSets=get_geneSets(param), assay="gsvaranks", annotation=get_annotation(param), @@ -911,7 +1026,7 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), gsva_es, gs) + rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -1173,34 +1288,6 @@ compute.gene.cdf <- function(expr, Gaussk=TRUE, kernel=TRUE, return(gene.cdf) } -## here 'ties.method="last"' allows one to obtain the result -## from 'order()' based on ranks -## pending how to propagate verbosity if necessary - -#' @importFrom MatrixGenerics colRanks -compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, - verbose=TRUE) { - R <- NULL - - if (drop.sparsity && !is(Z, "DelayedMatrix")) - Z <- as.matrix(Z) - - if (is(Z, "dgCMatrix")) { ## assumes expression values are positive - R <- .sparseColumnApplyAndReplace(Z, rank, ties.method=ties.method) - } else if (is(Z, "SVT_SparseMatrix")) { - R <- .colRanks_SVT_SparseMatrix(Z, ties.method=ties.method) - } else if (is(Z, "DelayedMatrix")) { - R <- .colRanksHDF5(Z, ties.method=ties.method, drop.sparsity=drop.sparsity) - } else { - R <- colRanks(Z, ties.method=ties.method, preserveShape=TRUE) - } - - if (ncol(Z) > 10000) ## free up ASAP memory we need not anymore and was - out <- gc() ## allocated during rank calculations on a big Z - - return(R) -} - #' @importFrom Matrix nnzero .sufficient_ssize <- function(expr, kcdf.min.ssize) { ## in the sparse case stored in a 'dgCMatrix' or a 'SVT_SparseMatrix', @@ -1268,15 +1355,11 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, } -#' @importFrom IRanges IntegerList match -#' @importFrom BiocParallel bpnworkers -#' @importFrom cli cli_alert_info cli_progress_bar -#' @importFrom cli cli_progress_done cli_abort -#' @importFrom BiocGenerics rbind cbind -#' @importFrom sparseMatrixStats colRanks -.compute_gsva_ranks <- function(expr, kcdf, kcdf.min.ssize, - sparse, any_na, na_use, verbose, - BPPARAM=NULL, maxmem=Inf) { + +#' @importFrom cli cli_alert_info +.compute_row_norm <- function(expr, kcdf, kcdf.min.ssize, + sparse, any_na, na_use, verbose, + BPPARAM=NULL, maxmem=Inf) { kcdfparam <- .parse_kcdf_param(expr, kcdf, kcdf.min.ssize, sparse, verbose) kernel <- kcdfparam$kernel @@ -1290,6 +1373,43 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, na_use=na_use, verbose=verbose, minparrows=100, minparcols=100, BPPARAM=BPPARAM, maxmem=maxmem) + return(Z) +} + + + +## here 'ties.method="last"' allows one to obtain the result +## from 'order()' based on ranks +## pending how to propagate verbosity if necessary + +#' @importFrom MatrixGenerics colRanks +#' @importFrom sparseMatrixStats colRanks +compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, + verbose=TRUE) { + R <- NULL + + if (drop.sparsity && !is(Z, "DelayedMatrix")) + Z <- as.matrix(Z) + + if (is(Z, "dgCMatrix")) { ## assumes expression values are positive + R <- .sparseColumnApplyAndReplace(Z, rank, ties.method=ties.method) + } else if (is(Z, "SVT_SparseMatrix")) { + R <- .colRanks_SVT_SparseMatrix(Z, ties.method=ties.method) + } else if (is(Z, "DelayedMatrix")) { + R <- .colRanksHDF5(Z, ties.method=ties.method, drop.sparsity=drop.sparsity) + } else { + R <- colRanks(Z, ties.method=ties.method, preserveShape=TRUE) + } + + if (ncol(Z) > 10000) ## free up ASAP memory we need not anymore and was + out <- gc() ## allocated during rank calculations on a big Z + + return(R) +} + +#' @importFrom cli cli_alert_info +#' @importFrom cli cli_progress_done cli_abort +.compute_gsva_ranks <- function(Z, verbose, BPPARAM=NULL, maxmem=Inf) { if (verbose) cli_alert_info("Calculating column ranks") @@ -1483,7 +1603,6 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, ## written into an on-disk data structure (HDF5) instead of being returned in ## main memory. #' @importFrom cli cli_alert_info cli_alert_warning -#' @importFrom BiocParallel bpnworkers #' @importFrom S4Arrays is_sparse refdim DummyArrayGrid .compute_gsva_scores <- function(R, geneSetsIdx, tau, maxDiff, absRanking, sparse, any_na, na_use, minSize, ondisk, diff --git a/R/gsvaRanks_serialization.R b/R/gsvaRanks_serialization.R index 753a411..8e53ad3 100644 --- a/R/gsvaRanks_serialization.R +++ b/R/gsvaRanks_serialization.R @@ -74,8 +74,6 @@ saveHDF5GSVAranks <- function(x, dir, ...) { cli_abort("The input object in 'x' must be of class 'gsvaRanksParam'") edata <- get_exprData(x) - wasse <- is(edata, "SummarizedExperiment") - if (is(edata, "SummarizedExperiment")) { an <- assayNames(edata) if (!"gsvaranks" %in% an) @@ -96,28 +94,8 @@ saveHDF5GSVAranks <- function(x, dir, ...) { gsvaAnnotation(edata) <- annot } - knmss <- .get_kcdfNoneMinSampleSize(x) metadata(edata) <- c(metadata(edata), - list(gsvaRanksParam=list(originalClassWasSE=wasse, - geneSets=get_geneSets(x), - assay=get_assay(x), - annotation=get_annotation(x), - minSize=get_minSize(x), - maxSize=get_maxSize(x), - kcdf=.get_kcdf(x), - kcdfNoneMinSampleSize=knmss, - tau=.get_tau(x), - maxDiff=.get_maxDiff(x), - absRanking=.get_absRanking(x), - sparse=.get_sparse(x), - checkNA=.get_checkNA(x), - didCheckNA=.get_didCheckNA(x), - anyNA=anyNA(x), - use=.get_NAuse(x), - filterRows=.get_filterRows(x), - nzcount=nzcount(x), - ondisk=.get_ondisk(x)) - )) + list(gsvaRanksParam=.gsvaParam_as_list(x))) saveHDF5SummarizedExperiment(edata, dir, ...) diff --git a/R/plage.R b/R/plage.R index c132362..df92e5b 100644 --- a/R/plage.R +++ b/R/plage.R @@ -58,7 +58,7 @@ setMethod("gsva", signature(param="plageParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), plage_es, gs) + rval <- wrapData(get_exprData(param), plage_es, param, "es", gs) if (verbose) cli_alert_success("Calculations finished") diff --git a/R/ssgsea.R b/R/ssgsea.R index 0a38419..3eb7f60 100644 --- a/R/ssgsea.R +++ b/R/ssgsea.R @@ -64,7 +64,7 @@ setMethod("gsva", signature(param="ssgseaParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), ssgsea_es, gs) + rval <- wrapData(get_exprData(param), ssgsea_es, param, "es", gs) if (verbose) cli_alert_success("Calculations finished") @@ -440,7 +440,6 @@ setMethod("show", walkStat } -#' @importFrom IRanges IntegerList match #' @importFrom BiocParallel bpnworkers #' @importFrom BiocGenerics "type<-" #' @importFrom MatrixGenerics colRanks diff --git a/R/utils.R b/R/utils.R index 2ce4617..a31a91d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,55 +55,70 @@ setMethod("unwrapData", signature("SpatialExperiment"), ## wrapData: put the resulting data and gene sets into the original data container type setMethod("wrapData", signature(container="matrix"), - function(container, dataMatrix, geneSets) { + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(param)) + attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) }) setMethod("wrapData", signature(container="dgCMatrix"), - function(container, dataMatrix, geneSets) { + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(param)) + attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) }) setMethod("wrapData", signature(container="SVT_SparseMatrix"), - function(container, dataMatrix, geneSets) { + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(param)) + attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) }) setMethod("wrapData", signature(container="DelayedMatrix"), - function(container, dataMatrix, geneSets) { + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(param)) + attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) }) setMethod("wrapData", signature(container="ExpressionSet"), - function(container, dataMatrix, geneSets) { + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(param)) rval <- new("ExpressionSet", exprs=dataMatrix, phenoData=phenoData(container), experimentData=experimentData(container), annotation="") + attr(rval, "gsvaParam") <- .gsvaParam_as_list(param) if (!missing(geneSets)) attr(rval, "geneSets") <- geneSets return(rval) }) +#' @importFrom IRanges CharacterList +#' @importFrom S4Vectors SimpleList setMethod("wrapData", signature(container="SummarizedExperiment"), - function(container, dataMatrix, geneSets) { - rdata <- adata <- NULL - if (!missing(geneSets)) { - adata <- SimpleList(es=dataMatrix) + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) + stopifnot(!missing(param)) + rdata <- NULL + adata <- SimpleList(dataMatrix) + names(adata) <- assay + if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) - } else { ## assume missing geneSets implies dataMatrix are ranks + } else { ## missing geneSets implies adding an assay + stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), - SimpleList(gsvaranks=dataMatrix)) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SummarizedExperiment( @@ -111,22 +126,29 @@ setMethod("wrapData", signature(container="SummarizedExperiment"), colData=colData(container), rowData=rdata, metadata=metadata(container)) - if (!missing(geneSets)) + metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL return(rval) }) +#' @importFrom IRanges CharacterList +#' @importFrom S4Vectors SimpleList +#' @importFrom SingleCellExperiment SingleCellExperiment setMethod("wrapData", signature(container="SingleCellExperiment"), - function(container, dataMatrix, geneSets) { - rdata <- adata <- NULL - if (!missing(geneSets)) { - adata <- SimpleList(es=dataMatrix) + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) + stopifnot(!missing(param)) + rdata <- NULL + adata <- SimpleList(dataMatrix) + names(adata) <- assay + if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) - } else { ## assume missing geneSets implies dataMatrix are ranks + } else { ## missing geneSets implies adding an assay + stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), - SimpleList(gsvaranks=dataMatrix)) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SingleCellExperiment( @@ -134,22 +156,29 @@ setMethod("wrapData", signature(container="SingleCellExperiment"), colData=colData(container), rowData=rdata, metadata=metadata(container)) - if (!missing(geneSets)) + metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL return(rval) }) +#' @importFrom IRanges CharacterList +#' @importFrom S4Vectors SimpleList +#' @importFrom SingleCellExperiment SingleCellExperiment setMethod("wrapData", signature(container="SpatialExperiment"), - function(container, dataMatrix, geneSets) { - rdata <- adata <- NULL - if (!missing(geneSets)) { - adata <- SimpleList(es=dataMatrix) + function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) + stopifnot(!missing(param)) + rdata <- NULL + adata <- SimpleList(dataMatrix) + names(adata) <- assay + if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) - } else { ## assume missing geneSets implies dataMatrix are ranks + } else { ## missing geneSets implies adding an assay + stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), - SimpleList(gsvaranks=dataMatrix)) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SpatialExperiment( @@ -159,7 +188,8 @@ setMethod("wrapData", signature(container="SpatialExperiment"), metadata=metadata(container), imgData=imgData(container), spatialCoords=spatialCoords(container)) - if (!missing(geneSets)) + metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL return(rval) @@ -492,10 +522,10 @@ setMethod("wrapData", signature(container="SpatialExperiment"), if (is_sparse(X)) { estimated_flag <- FALSE if (is(X, "dgCMatrix") || is(X, "SVT_SparseMatrix")) - nzc <- nzcount(X) + nzc <- as.numeric(nzcount(X)) else if (is(X, "DelayedMatrix")) { if (nc < 2000) - nzc <- nzcount(as(X, "dgCMatrix")) + nzc <- as.numeric(nzcount(as(X, "dgCMatrix"))) else { block_dim <- chunkdim(X) if (is.null(block_dim)) { diff --git a/R/zscore.R b/R/zscore.R index 88a3c76..56e5654 100644 --- a/R/zscore.R +++ b/R/zscore.R @@ -58,7 +58,7 @@ setMethod("gsva", signature(param="zscoreParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), zscore_es, gs) + rval <- wrapData(get_exprData(param), zscore_es, param, "es", gs) if (verbose) cli_alert_success("Calculations finished") diff --git a/inst/unitTests/test_genesets.R b/inst/unitTests/test_genesets.R index 35625ad..a4bc3bd 100644 --- a/inst/unitTests/test_genesets.R +++ b/inst/unitTests/test_genesets.R @@ -52,6 +52,7 @@ test_genesets <- function() { ## estimate GSVA enrichment scores with gene sets input as a GeneSetCollection object es.mat2 <- gsva(gsvaParam(y, gsc), verbose=FALSE) + attr(es.mat2, "gsvaParam")$geneSets <- geneSets(es.mat2) ## this is obviously different checkTrue(identical(es.mat, es.mat2)) ## check that when input expression data has no rownames and gene sets @@ -60,13 +61,12 @@ test_genesets <- function() { rownames(y) <- NULL gsvapar <- gsvaParam(y, gsets, verbose=FALSE) es.mat3 <- gsva(gsvapar, verbose=FALSE) - attr(es.mat, "geneSets") <- attr(es.mat3, "geneSets") <- NULL - checkTrue(identical(es.mat, es.mat3)) + checkEqualsNumeric(es.mat, es.mat3) + gsets$gset3 <- c(gsets$gset3, 11) gsvapar <- gsvaParam(y, gsets, verbose=FALSE) es.mat4 <- gsva(gsvapar, verbose=FALSE) - attr(es.mat4, "geneSets") <- NULL - checkTrue(identical(es.mat, es.mat4)) + checkEqualsNumeric(es.mat, es.mat4) } test_geneSetDeDuplication <- function() { diff --git a/inst/unitTests/test_inputdatacontainers.R b/inst/unitTests/test_inputdatacontainers.R index ca3a6fb..278fe59 100644 --- a/inst/unitTests/test_inputdatacontainers.R +++ b/inst/unitTests/test_inputdatacontainers.R @@ -43,11 +43,9 @@ test_inputdatacontainers <- function() { es.eset <- gsva(gsvaParam(eset, gsets, verbose=FALSE), verbose=FALSE) gsets.eSet <- geneSets(es.eset) - ## as of 1.51.9, gene sets will be returned as attributes for containers not - ## inheriting from SummarizedExperiment and interfere with the check es.mat2 <- es.mat attr(es.mat2, "geneSets") <- NULL - attr(es.eset, "geneSets") <- NULL + attr(es.mat2, "gsvaParam") <- NULL checkTrue(identical(es.mat2, exprs(es.eset))) checkTrue(identical(gsets.mat, gsets.eSet)) @@ -79,7 +77,7 @@ test_inputdatacontainers <- function() { yMat <- Matrix(y, sparse=TRUE) ## check show() method for a gsvaParam object - param <- gsvaParam(yMat, gsets, verbose=FALSE) + param <- gsvaParam(yMat, gsets, sparse=FALSE, checkNA="auto", verbose=FALSE) out <- capture.output(show(param)) checkTrue(length(out) > 0 && sum(nchar(out)) > 0, "gsvaParam object show method output is empty") @@ -87,7 +85,9 @@ test_inputdatacontainers <- function() { es.dgCMat <- gsva(param, verbose=FALSE) gsets.dgCMat <- geneSets(es.dgCMat) - checkTrue(identical(es.mat, es.dgCMat)) + attr(es.dgCMat, "geneSets") <- NULL + attr(es.dgCMat, "gsvaParam") <- NULL + checkTrue(identical(es.mat2, es.dgCMat)) checkTrue(identical(gsets.mat, gsets.dgCMat)) ## testing geneIdsToGeneSetCollection() @@ -120,6 +120,7 @@ test_inputdatacontainers <- function() { gsets.sce <- geneSets(es.sce) attr(es.dgCMatSp, "geneSets") <- NULL + attr(es.dgCMatSp, "gsvaParam") <- NULL checkTrue(identical(es.dgCMatSp, assay(es.sce))) checkTrue(identical(gsets.mat, gsets.sce)) diff --git a/inst/unitTests/test_ondisk.R b/inst/unitTests/test_ondisk.R index 8f473aa..82f56ed 100644 --- a/inst/unitTests/test_ondisk.R +++ b/inst/unitTests/test_ondisk.R @@ -38,7 +38,9 @@ test_ondisk <- function() { ## estimate GSVA enrichment scores with HDF5 input and output and check that they are identical es_h5ondisk <- gsva(gsvaParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], attributes(es_h5ondisk)["geneSets"]) + attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], + attributes(es_h5ondisk)["geneSets"]) + attr(es_noh5, "gsvaParam") <- NULL checkIdentical(es_noh5, es_h5ondiskmat) ## estimate ssGSEA enrichment scores with and without HDF5 input and check that they are identical @@ -49,7 +51,9 @@ test_ondisk <- function() { ## estimate ssGSEA enrichment scores with HDF5 input and output and check that they are identical es_h5ondisk <- gsva(ssgseaParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], attributes(es_h5ondisk)["geneSets"]) + attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], + attributes(es_h5ondisk)["geneSets"]) + attr(es_noh5, "gsvaParam") <- NULL checkIdentical(es_noh5, es_h5ondiskmat) ## estimate Z-scores enrichment scores with and without HDF5 input and check that they are identical @@ -60,7 +64,8 @@ test_ondisk <- function() { ## estimate Z-scores enrichment scores with HDF5 input and output and check that they are identical es_h5ondisk <- gsva(zscoreParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], attributes(es_h5ondisk)["geneSets"]) + attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], + attributes(es_h5ondisk)["geneSets"]) checkEqualsNumeric(es_noh5, es_h5ondiskmat) ## not identical due to the rowSds() vs sd() differences ## test the block processing of a small toy HDF5 input and output by setting a small block size and diff --git a/man/GsvaExprData-class.Rd b/man/GsvaExprData-class.Rd index 6946e54..60e5188 100644 --- a/man/GsvaExprData-class.Rd +++ b/man/GsvaExprData-class.Rd @@ -16,11 +16,11 @@ of another class as well as defining common methods for all of them. \seealso{ \code{\link{matrix}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, -\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, +\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, -\code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, +\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, \code{\link[HDF5Array:HDF5Array-class]{HDF5Array}} } diff --git a/man/GsvaMethodParam-class.Rd b/man/GsvaMethodParam-class.Rd index 27e42af..ed7f603 100644 --- a/man/GsvaMethodParam-class.Rd +++ b/man/GsvaMethodParam-class.Rd @@ -89,12 +89,12 @@ is(gsvapar, "GsvaMethodParam") } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}}, +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} \code{\link{plageParam}}, \code{\link{zscoreParam}}, \code{\link{ssgseaParam}}, \code{\link{gsvaParam}} diff --git a/man/geneIdsToGeneSetCollection.Rd b/man/geneIdsToGeneSetCollection.Rd index 9bc8d98..ebfe8e1 100644 --- a/man/geneIdsToGeneSetCollection.Rd +++ b/man/geneIdsToGeneSetCollection.Rd @@ -54,7 +54,7 @@ geneIdsToGeneSetCollection(gsets) \seealso{ \code{\link[GSEABase:GeneSetCollection-class]{GeneSetCollection}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, -\code{\link[GSEABase:GeneSet-class]{geneIds}}, +\code{\link[GSEABase:geneIds]{geneIds}}, \code{\link{deduplicateGeneSets}}, \code{\link{guessGeneIdType}}, \code{\link[GSEABase:GeneSet-class]{GeneSet}} diff --git a/man/geneSets.Rd b/man/geneSets.Rd index 8d94088..c59f294 100644 --- a/man/geneSets.Rd +++ b/man/geneSets.Rd @@ -30,9 +30,9 @@ \item{obj}{An object of one of the following classes: \itemize{ \item An expression data object of one of the classes described in -\code{\linkS4class{GsvaExprData}} that is the return value of a call to \code{gsva()}. +\code{\link[=GsvaExprData-class]{GsvaExprData}} that is the return value of a call to \code{gsva()}. \item A parameter object of one of the classes described in -\code{\linkS4class{GsvaMethodParam}} that could be used in a call to \code{gsva()}. +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}} that could be used in a call to \code{gsva()}. }} } \value{ diff --git a/man/gsva.Rd b/man/gsva.Rd index f998718..c966643 100644 --- a/man/gsva.Rd +++ b/man/gsva.Rd @@ -70,7 +70,7 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} @@ -81,18 +81,18 @@ container object of the same type as the input expression data container, except for the fact that enrichment scores are always dense, irrespective of whether the input is sparse, such as in single-cell data. If the input was a base matrix, a \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, a -\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, or a -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}} object, then the output +\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, or a +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}} object, then the output will be either a base matrix object or a -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, with the gene sets +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, with the gene sets employed in the calculations stored in an attribute called \code{geneSets} of that object. If the input was an \code{ExpressionSet} object, then the output will be also an \code{ExpressionSet} object with the gene sets employed in the calculations stored in an attribute called \code{geneSets}. If the input was an object of either class \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, -or \code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, +or \code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, then the output will be of the same class, where enrichment scores will be stored in an assay called \code{es} and the gene sets employed in the calculations will be stored in the \code{rowData} slot of the object under the @@ -175,5 +175,5 @@ using singular value decomposition. \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaAnnotation.Rd b/man/gsvaAnnotation.Rd index 09b0ac3..860f4af 100644 --- a/man/gsvaAnnotation.Rd +++ b/man/gsvaAnnotation.Rd @@ -49,7 +49,7 @@ } \arguments{ \item{object}{An expression data object of one of the classes described in -\code{\linkS4class{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not +\code{\link[=GsvaExprData-class]{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not capable of storing annotation metadata and will return \code{NULL}.} \item{value}{For the replacement methods, the annotation metadata to be diff --git a/man/gsvaEnrichment.Rd b/man/gsvaEnrichment.Rd index 4de6be2..7297734 100644 --- a/man/gsvaEnrichment.Rd +++ b/man/gsvaEnrichment.Rd @@ -14,7 +14,7 @@ ) } \arguments{ -\item{param}{A \code{\linkS4class{gsvaRanksParam}} object obtained with the method +\item{param}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object obtained with the method \code{\link{gsvaRanks}}.} \item{column}{The column for which we want to retrieve the enrichment data. diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index 628e67d..c35aeeb 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -36,11 +36,11 @@ gsvaParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -163,19 +163,19 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\linkS4class{gsvaParam}}.} +\item{x}{An object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\linkS4class{gsvaParam}}.} +class \code{\link[=gsvaParam-class]{gsvaParam}}.} \item{object}{For the replacement method, an object of class -\code{\linkS4class{gsvaRanksParam}}.} +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}.} \item{value}{For the replacement method, an object of the classes supported by -\code{\linkS4class{GsvaGeneSets}}.} +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}.} } \value{ -A new \code{\linkS4class{gsvaParam}} object. +A new \code{\link[=gsvaParam-class]{gsvaParam}} object. } \description{ S4 class for GSVA method parameter objects. @@ -294,16 +294,16 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{ssgseaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 38d55d3..62331aa 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -1,12 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/gsva.R -\name{gsvaRanks} +\name{gsvaRowNorm} +\alias{gsvaRowNorm} +\alias{gsvaRowNorm,gsvaParam-method} \alias{gsvaRanks} \alias{gsvaRanks,gsvaParam-method} \alias{gsvaScores} \alias{gsvaScores,gsvaRanksParam-method} \title{GSVA ranks and scores} \usage{ +\S4method{gsvaRowNorm}{gsvaParam}( + param, + verbose = TRUE, + BPPARAM = SerialParam(progressbar = verbose), + maxmem = "auto" +) + \S4method{gsvaRanks}{gsvaParam}( param, verbose = TRUE, @@ -22,7 +31,7 @@ ) } \arguments{ -\item{param}{A parameter object of the \code{\linkS4class{gsvaRanksParam}} class.} +\item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} @@ -38,14 +47,17 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} } \value{ +In the case of the \code{gsvaRowNorm()} method, an object of class +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. + In the case of the \code{gsvaRanks()} method, an object of class -\code{\linkS4class{gsvaRanksParam}}. +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. In the case of the \code{gsvaScores()} method, a gene-set by sample matrix of GSVA enrichment scores stored in a container object of the same type as @@ -63,9 +75,10 @@ calculations will be stored in the \code{rowData} slot of the object under the column name \code{gs}. } \description{ -Calculate GSVA scores in two steps: (1) calculate GSVA -ranks; and (2) calculate GSVA scores using the previously calculated -ranks. +Calculate GSVA scores in three steps: (1) normalize values of +expression by row; (2) calculate GSVA ranks by column from the previous +row-normalized values; and (3) calculate GSVA scores by column from the +previously calculated column ranks. } \examples{ library(GSVA) @@ -119,9 +132,9 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\linkS4class{gsvaParam}}, \code{\linkS4class{gsvaRanksParam}}, \code{\link{gsva}}, +\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}, \code{\link{gsva}}, \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaRanks_serialization.Rd b/man/gsvaRanks_serialization.Rd index 8eb95d5..abb5133 100644 --- a/man/gsvaRanks_serialization.Rd +++ b/man/gsvaRanks_serialization.Rd @@ -10,7 +10,7 @@ saveHDF5GSVAranks(x, dir, ...) loadHDF5GSVAranks(dir, ...) } \arguments{ -\item{x}{A \code{\linkS4class{gsvaRanksParam}} object to save to disk.} +\item{x}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object to save to disk.} \item{dir}{The path to the directory where to save or load the GSVA ranks data.} @@ -18,7 +18,7 @@ data.} \item{...}{Additional arguments to be passed to the underlying HDF5 saving/loading functions \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} -and \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, +and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, respectively.} } \value{ diff --git a/man/plageParam-class.Rd b/man/plageParam-class.Rd index 5fbf4ed..e85b003 100644 --- a/man/plageParam-class.Rd +++ b/man/plageParam-class.Rd @@ -19,11 +19,11 @@ plageParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\linkS4class{plageParam}} object. +A new \code{\link[=plageParam-class]{plageParam}} object. } \description{ S4 class for PLAGE method parameter objects. @@ -104,12 +104,12 @@ using singular value decomposition. \doi{10.1186/1471-2105-6-225} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } diff --git a/man/ssgseaParam-class.Rd b/man/ssgseaParam-class.Rd index f3a24b0..7db1043 100644 --- a/man/ssgseaParam-class.Rd +++ b/man/ssgseaParam-class.Rd @@ -26,11 +26,11 @@ ssgseaParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -103,13 +103,13 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\linkS4class{ssgseaParam}}.} +\item{x}{An object of class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\linkS4class{ssgseaParam}}.} +class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} } \value{ -A new \code{\linkS4class{ssgseaParam}} object. +A new \code{\link[=ssgseaParam-class]{ssgseaParam}} object. } \description{ S4 class for ssGSEA method parameter objects. @@ -180,16 +180,16 @@ oncogenic KRAS-driven cancers require TBK1. \doi{10.1038/nature08460} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/zscoreParam-class.Rd b/man/zscoreParam-class.Rd index 2cc8d2f..f8186e4 100644 --- a/man/zscoreParam-class.Rd +++ b/man/zscoreParam-class.Rd @@ -19,11 +19,11 @@ zscoreParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\linkS4class{zscoreParam}} object. +A new \code{\link[=zscoreParam-class]{zscoreParam}} object. } \description{ S4 class for combined z-scores method parameter objects. @@ -104,12 +104,12 @@ disease classification. \doi{10.1371/journal.pcbi.1000217} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } From ca5e7ebdce5b3a90cca5d41a1e4b2bb29b45928e Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Fri, 22 May 2026 17:49:14 +0200 Subject: [PATCH 02/12] Added gsvaColRanks() and gsvaColScores() functions --- NAMESPACE | 11 +- R/AllGenerics.R | 8 + R/GSVA-package.R | 1 - R/gsva.R | 260 ++++++++++++++++++---- R/utils.R | 10 + R/zscore.R | 2 +- inst/unitTests/test_genesets.R | 18 +- inst/unitTests/test_inputdatacontainers.R | 17 +- inst/unitTests/test_ondisk.R | 30 ++- man/gsvaRanks.Rd | 39 +++- 10 files changed, 307 insertions(+), 89 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index cb7db93..54769f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,8 @@ export(geneSetSizes) export(geneSets) export(gsva) export(gsvaAnnotation) +export(gsvaColRanks) +export(gsvaColScores) export(gsvaEnrichment) export(gsvaParam) export(gsvaRanks) @@ -38,6 +40,8 @@ exportMethods(geneSetSizes) exportMethods(geneSets) exportMethods(gsva) exportMethods(gsvaAnnotation) +exportMethods(gsvaColRanks) +exportMethods(gsvaColScores) exportMethods(gsvaEnrichment) exportMethods(gsvaRanks) exportMethods(gsvaRowNorm) @@ -78,18 +82,13 @@ importFrom(DelayedArray,chunkGrid) importFrom(DelayedArray,chunkdim) importFrom(DelayedArray,close) importFrom(DelayedArray,colAutoGrid) -importFrom(DelayedArray,colSums) -importFrom(DelayedArray,currentBlockId) importFrom(DelayedArray,defaultAutoGrid) importFrom(DelayedArray,getAutoBlockLength) importFrom(DelayedArray,gridReduce) -importFrom(DelayedArray,read_block) importFrom(DelayedArray,rowAutoGrid) importFrom(DelayedArray,rowRanges) importFrom(DelayedArray,seed) importFrom(DelayedArray,setAutoBPPARAM) -importFrom(DelayedArray,t) -importFrom(DelayedArray,write_block) importFrom(DelayedMatrixStats,rowSds) importFrom(GSEABase,AnnoOrEntrezIdentifier) importFrom(GSEABase,ENSEMBLIdentifier) @@ -112,7 +111,6 @@ importFrom(IRanges,match) importFrom(IRanges,ranges) importFrom(IRanges,start) importFrom(IRanges,width) -importFrom(Matrix,colSums) importFrom(Matrix,nnzero) importFrom(MatrixGenerics,colRanks) importFrom(MatrixGenerics,colSums) @@ -123,6 +121,7 @@ importFrom(S4Arrays,DummyArrayGrid) importFrom(S4Arrays,is_sparse) importFrom(S4Arrays,read_block) importFrom(S4Arrays,refdim) +importFrom(S4Arrays,write_block) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,DataFrame) importFrom(S4Vectors,SimpleList) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 8af4e7f..b097fd4 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -7,6 +7,14 @@ setGeneric("gsva", setGeneric("gsvaRowNorm", function(param, ...) standardGeneric("gsvaRowNorm")) +#' @export +setGeneric("gsvaColRanks", + function(rowNormExprData, ...) standardGeneric("gsvaColRanks")) + +#' @export +setGeneric("gsvaColScores", + function(rankExprData, ...) standardGeneric("gsvaColScores")) + #' @export setGeneric("gsvaRanks", function(param, ...) standardGeneric("gsvaRanks")) diff --git a/R/GSVA-package.R b/R/GSVA-package.R index bfb1639..0f06382 100644 --- a/R/GSVA-package.R +++ b/R/GSVA-package.R @@ -17,7 +17,6 @@ #' @importFrom SingleCellExperiment SingleCellExperiment #' @importFrom SpatialExperiment SpatialExperiment #' @importFrom DelayedArray rowAutoGrid colAutoGrid defaultAutoGrid -#' currentBlockId read_block gridReduce write_block close t colSums #' @importFrom HDF5Array HDF5RealizationSink writeHDF5Array #' @importFrom cli cli_abort cli_alert_info cli_alert_warning cli_alert_success NULL diff --git a/R/gsva.R b/R/gsva.R index 3f17621..506f40b 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -658,6 +658,44 @@ setMethod("show", return(lst) } +## by now this is only called from gsvaColRanks(), i.e., no need +## to care about other methods +#' @importFrom S4Vectors metadata +.pull_param <- function(exprData, assay) { + + p <- NULL + if (is(exprData, "matrix") || is(exprData, "dgCMatrix") || + is(exprData, "SVT_SparseMatrix") || is(exprData("delayedMatrix")) || + is(exprData, "HDF5Matrix") || is(exprData("ExpressionSet"))) { + mask <- is.null(attr(exprData, "gsvaParam")) || + is.null(attr(exprData, "assay")) + if (any(mask)) + cli_abort(c("x"="Missing metadata in the input expression data.")) + p <- attr(exprData, "gsvaParam") + a <- attr(exprData, "assay") + if (!a %in% c("gsvarownr", "gsvaranks")) + cli_abort(c("x"="Wrong metadata in the input expression data.")) + } else { ## a SummarizedExperiment derivative + if (is.null(metadata(exprData)$gsvaParam)) + cli_abort(c("x"="Missing metadata in the input expression data")) + p <- metadata(exprData)$gsvaParam + if (!any(assayNames(exprData) %in% c("gsvarownr", "gsvaranks"))) + cli_abort(c("x"="Wrong metadata in the input expression data.")) + } + + param <- new("gsvaParam", + exprData=exprData, geneSets=p$geneSets, + assay=p$assay, annotation=p$annotation, + minSize=p$minSize, maxSize=p$maxSize, + kcdf=p$kcdf, kcdfNoneMinSampleSize=p$kcdfNoneMinSampleSize, + tau=p$tau, maxDiff=p$maxDiff, absRanking=p$absRanking, + sparse=p$sparse, checkNA=p$checkNA, didCheckNA=p$didCheckNA, + anyNA=p$anyNA, use=p$use, filterRows=p$filterRows, + nzcount=p$nzcount, ondisk=p$ondisk) + + return(param) +} + #' @title GSVA ranks and scores #' @@ -709,8 +747,6 @@ setMethod("show", #' #' p <- 10 ## number of genes #' n <- 30 ## number of samples -#' nGrp1 <- 15 ## number of samples in group 1 -#' nGrp2 <- n - nGrp1 ## number of samples in group 2 #' #' ## consider three disjoint gene sets #' geneSets <- list(gset1=paste0("g", 1:3), @@ -721,18 +757,17 @@ setMethod("show", #' y <- matrix(rnorm(n*p), nrow=p, ncol=n, #' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) #' -#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples -#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 -#' #' ## build GSVA parameter object #' gsvapar <- gsvaParam(y, geneSets) #' -#' ## calculate GSVA ranks -#' gsvarankspar <- gsvaRanks(gsvapar) -#' gsvarankspar +#' ## calculate row-normalized expression values +#' gsvarownormexpr <- gsvaRowNorm(gsvapar) +#' +#' ## calculate GSVA column ranks +#' gsvarankspar <- gsvaColRanks(gsvarownormexpr) +#' #' ## calculate GSVA scores #' gsva_es <- gsvaScores(gsvarankspar) -#' gsva_es #' #' ## calculate now GSVA scores in a single step #' gsva_es1 <- gsva(gsvapar) @@ -798,20 +833,77 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), cli_alert_info(sprintf("Normalizing rows")) kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarows <- .compute_row_norm(expr=filtDataMatrix, - kcdf=.get_kcdf(param), - kcdf.min.ssize=kcdfminssize, - sparse=.get_sparse(param), - any_na=anyNA(param), - na_use=.get_NAuse(param), - verbose=verbose, - BPPARAM=BPPARAM, - maxmem=maxmem) + gsvarownr <- .compute_row_norm(expr=filtDataMatrix, + kcdf=.get_kcdf(param), + kcdf.min.ssize=kcdfminssize, + sparse=.get_sparse(param), + any_na=anyNA(param), + na_use=.get_NAuse(param), + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + rownames(gsvarownr) <- rownames(filtDataMatrix) + colnames(gsvarownr) <- colnames(filtDataMatrix) + + rval <- wrapData(get_exprData(param), gsvarownr, param, + "gsvarownr") - rownames(gsvarows) <- rownames(filtDataMatrix) - colnames(gsvarows) <- colnames(filtDataMatrix) + if (verbose && gsva_global$show_start_and_end_messages) + cli_alert_success("Calculations finished") - rval <- wrapData(get_exprData(param), gsvarows, param, "gsvarows") + return(rval) + }) + + + +#' +#' @param rowNormExprData A row-normalized expression data set obtained with +#' [`gsvaRowNorm`]. Must be one of the classes +#' supported by [`GsvaExprData-class`]. For a list of these classes, see its +#' help page using `help(GsvaExprData)`. +#' +#' @return In the case of the `gsvaColRanks()` method, an object of class +#' [`gsvaRanksParam-class`]. +#' +#' @aliases gsvaColRanks,GsvaExprData-method +#' @name gsvaColRanks +#' @rdname gsvaRanks +#' +#' @importFrom cli cli_alert_info cli_alert_success +#' @exportMethod gsvaColRanks +setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), + function(rowNormExprData, + verbose=TRUE, + BPPARAM=SerialParam(progressbar=verbose), + maxmem="auto") { + + param <- .pull_param(rowNormExprData, "gsvarownr") + + if (verbose && gsva_global$show_start_and_end_messages) { + pkgversion <- packageDescription("GSVA")[["Version"]] + cli_alert_info("GSVA version {pkgversion}") + } + + .check_bpparam(BPPARAM) + + dataMatrix <- unwrapData(rowNormExprData, get_assay(param)) + maxmem <- .check_maxmem(param, maxmem, verbose) + ondisk <- .check_ondisk(param, maxmem, verbose) + + dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", + ondisk, verbose) + + gsvarnks <- .compute_gsva_ranks(Z=rowNormExprData, + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + rownames(gsvarnks) <- rownames(dataMatrix) + colnames(gsvarnks) <- colnames(dataMatrix) + + rval <- wrapData(get_exprData(param), gsvarnks, param, + "gsvaranks") if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -820,7 +912,6 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), }) - #' #' @return In the case of the `gsvaRanks()` method, an object of class #' [`gsvaRanksParam-class`]. @@ -872,17 +963,17 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), cli_alert_info(sprintf("Calculating GSVA ranks")) kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarows <- .compute_row_norm(expr=filtDataMatrix, - kcdf=.get_kcdf(param), - kcdf.min.ssize=kcdfminssize, - sparse=.get_sparse(param), - any_na=anyNA(param), - na_use=.get_NAuse(param), - verbose=verbose, - BPPARAM=BPPARAM, - maxmem=maxmem) - - gsvarnks <- .compute_gsva_ranks(Z=gsvarows, + gsvarownr <- .compute_row_norm(expr=filtDataMatrix, + kcdf=.get_kcdf(param), + kcdf.min.ssize=kcdfminssize, + sparse=.get_sparse(param), + any_na=anyNA(param), + na_use=.get_NAuse(param), + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + gsvarnks <- .compute_gsva_ranks(Z=gsvarownr, verbose=verbose, BPPARAM=BPPARAM, maxmem=maxmem) @@ -1034,6 +1125,93 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), return(rval) }) +#' @aliases gsvaColScores,GsvaExprData-method +#' @name gsvaColScores +#' @rdname gsvaRanks +#' +#' @importFrom S4Arrays is_sparse +#' @importFrom cli cli_alert_info cli_alert_success +#' @exportMethod gsvaColScores +setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), + function(rankExprData, verbose=TRUE, + BPPARAM=SerialParam(progressbar=verbose), + maxmem="auto") { + + param <- .pull_param(rankExprData, "gsvaranks") + + if (verbose && gsva_global$show_start_and_end_messages) { + pkgversion <- packageDescription("GSVA")[["Version"]] + cli_alert_info("GSVA version {pkgversion}") + } + + .check_bpparam(BPPARAM) + + ## assuming rows in the rank data have been already filtered + filtDataMatrix <- unwrapData(rankExprData, "gsvaranks") + + filtMappedGeneSets <- .filterAndMapGeneSets(param=param, + filteredDataMatrix=filtDataMatrix, + verbose=verbose) + + sparse <- .get_sparse(param) + if (sparse && !is_sparse(filtDataMatrix)) + sparse <- FALSE + + if (verbose) { + if (sparse) + cli_alert_info("GSVA sparse algorithm") + else + cli_alert_info("GSVA dense (classical) algorithm") + } + + maxmem <- .check_maxmem(param, maxmem, verbose) + ondisk <- .check_ondisk(param, maxmem, verbose) + + filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, + "GSVA", ondisk, + verbose) + + BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, + minparrows=100, minparcols=100, + verbose) + + ondisk <- .check_es_memory_requirements(filtDataMatrix, + filtMappedGeneSets, + ondisk, maxmem) + if (verbose) { + n <- length(filtMappedGeneSets) + cli_alert_info("Calculating GSVA scores for {n} gene sets") + } + + gsva_es <- .processMatrixCols(filtDataMatrix, + FUN=.compute_gsva_scores, + geneSetsIdx=filtMappedGeneSets, + tau=.get_tau(param), + maxDiff=.get_maxDiff(param), + absRanking=.get_absRanking(param), + sparse=sparse, any_na=anyNA(param), + na_use=.get_NAuse(param), + minSize=get_minSize(param), + ondisk=ondisk, verbose=verbose, + minparrows=100, minparcols=100, + BPPARAM=BPPARAM, + maxmem=ceiling(maxmem/100)) ## use + ## of memory increases here about + ## 10-fold over block size memory + + rownames(gsva_es) <- names(filtMappedGeneSets) + colnames(gsva_es) <- colnames(filtDataMatrix) + + gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, + names=rownames(filtDataMatrix)) + rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) + + if (verbose && gsva_global$show_start_and_end_messages) + cli_alert_success("Calculations finished") + + return(rval) + }) + #' @title GSVA enrichment data and visualization #' #' @description Extract and plot enrichment data from GSVA scores. @@ -1603,7 +1781,8 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, ## written into an on-disk data structure (HDF5) instead of being returned in ## main memory. #' @importFrom cli cli_alert_info cli_alert_warning -#' @importFrom S4Arrays is_sparse refdim DummyArrayGrid +#' @importFrom S4Arrays is_sparse refdim DummyArrayGrid read_block write_block +#' @importFrom DelayedArray close .compute_gsva_scores <- function(R, geneSetsIdx, tau, maxDiff, absRanking, sparse, any_na, na_use, minSize, ondisk, verbose) { @@ -1861,7 +2040,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom HDF5Array HDF5RealizationSink #' @importFrom S4Arrays DummyArrayGrid -#' @importFrom DelayedArray seed gridReduce +#' @importFrom DelayedArray seed gridReduce close .ecdfvals_sparseh5_to_sparseh5 <- function(X, grid=NULL, verbose=FALSE) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -1882,7 +2061,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom HDF5Array HDF5RealizationSink #' @importFrom S4Arrays DummyArrayGrid -#' @importFrom DelayedArray seed gridReduce +#' @importFrom DelayedArray seed gridReduce close .ecdfvals_sparseh5_to_denseh5 <- function(X, grid=NULL, verbose) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -1903,7 +2082,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom S4Arrays DummyArrayGrid #' @importFrom HDF5Array HDF5RealizationSink -#' @importFrom DelayedArray seed rowAutoGrid blockReduce +#' @importFrom DelayedArray seed rowAutoGrid blockReduce close .ecdfvals_denseh5_to_denseh5 <- function(X, grid=NULL, verbose) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -1980,7 +2159,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom S4Arrays DummyArrayGrid #' @importFrom HDF5Array HDF5RealizationSink -#' @importFrom DelayedArray seed rowAutoGrid blockReduce +#' @importFrom DelayedArray seed rowAutoGrid blockReduce close .kcdfvals_sparseh5_to_sparseh5 <- function(X, Gaussk, grid=NULL, verbose) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -2001,7 +2180,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom S4Arrays DummyArrayGrid #' @importFrom HDF5Array HDF5RealizationSink -#' @importFrom DelayedArray seed rowAutoGrid blockReduce +#' @importFrom DelayedArray seed rowAutoGrid blockReduce close .kcdfvals_sparseh5_to_denseh5 <- function(X, Gaussk, grid=NULL, verbose) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -2022,7 +2201,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom S4Arrays DummyArrayGrid #' @importFrom HDF5Array HDF5RealizationSink -#' @importFrom DelayedArray seed rowAutoGrid blockReduce +#' @importFrom DelayedArray seed rowAutoGrid blockReduce close .kcdfvals_denseh5_to_denseh5 <- function(X, Gaussk, grid=NULL, verbose) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC @@ -2106,6 +2285,7 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom S4Arrays DummyArrayGrid #' @importFrom MatrixGenerics colRanks #' @importFrom BiocParallel SerialParam +#' @importFrom DelayedArray close .colRanksHDF5 <- function(X, grid=NULL, ties.method="last", drop.sparsity=FALSE) { stopifnot(is(X, "DelayedMatrix") || is(X, "HDF5Matrix")) ## QC diff --git a/R/utils.R b/R/utils.R index a31a91d..aae8f24 100644 --- a/R/utils.R +++ b/R/utils.R @@ -56,8 +56,10 @@ setMethod("unwrapData", signature("SpatialExperiment"), ## wrapData: put the resulting data and gene sets into the original data container type setMethod("wrapData", signature(container="matrix"), function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) stopifnot(!missing(param)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) @@ -65,8 +67,10 @@ setMethod("wrapData", signature(container="matrix"), setMethod("wrapData", signature(container="dgCMatrix"), function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) stopifnot(!missing(param)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) @@ -74,8 +78,10 @@ setMethod("wrapData", signature(container="dgCMatrix"), setMethod("wrapData", signature(container="SVT_SparseMatrix"), function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) stopifnot(!missing(param)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) @@ -83,8 +89,10 @@ setMethod("wrapData", signature(container="SVT_SparseMatrix"), setMethod("wrapData", signature(container="DelayedMatrix"), function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) stopifnot(!missing(param)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) attr(dataMatrix, "geneSets") <- geneSets return(dataMatrix) @@ -92,12 +100,14 @@ setMethod("wrapData", signature(container="DelayedMatrix"), setMethod("wrapData", signature(container="ExpressionSet"), function(container, dataMatrix, param, assay, geneSets) { + stopifnot(!missing(assay)) stopifnot(!missing(param)) rval <- new("ExpressionSet", exprs=dataMatrix, phenoData=phenoData(container), experimentData=experimentData(container), annotation="") attr(rval, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) attr(rval, "geneSets") <- geneSets diff --git a/R/zscore.R b/R/zscore.R index 56e5654..35ae04a 100644 --- a/R/zscore.R +++ b/R/zscore.R @@ -336,7 +336,7 @@ setValidity("zscoreParam", function(object) { #' @importFrom cli cli_alert_info #' @importFrom cli cli_progress_bar cli_progress_update cli_progress_done #' @importFrom BiocParallel bpnworkers bplapply bpprogressbar -#' @importFrom Matrix colSums +#' @importFrom MatrixGenerics colSums zscore <- function(X, geneSets, ondisk=FALSE, verbose=TRUE, BPPARAM=NULL, maxmem=Inf) { diff --git a/inst/unitTests/test_genesets.R b/inst/unitTests/test_genesets.R index a4bc3bd..29ab3d5 100644 --- a/inst/unitTests/test_genesets.R +++ b/inst/unitTests/test_genesets.R @@ -52,8 +52,7 @@ test_genesets <- function() { ## estimate GSVA enrichment scores with gene sets input as a GeneSetCollection object es.mat2 <- gsva(gsvaParam(y, gsc), verbose=FALSE) - attr(es.mat2, "gsvaParam")$geneSets <- geneSets(es.mat2) ## this is obviously different - checkTrue(identical(es.mat, es.mat2)) + checkEqualsNumeric(es.mat, es.mat2) ## check that when input expression data has no rownames and gene sets ## are made out of indexes to the rows, the results do not change @@ -106,14 +105,21 @@ test_readGMT <- function() { fname <- tempfile() con <- file(fname, "w") writeLines(c(names(gsets)[1], - paste(names(gsets)[2], "desc2", paste(gsets[[2]], collapse="\t"), sep="\t")), con) + paste(names(gsets)[2], "desc2", + paste(gsets[[2]], collapse="\t"), + sep="\t")), con) close(con) - checkException(gsets.read <- readGMT(fname, deduplUse="drop", valueType="list")) + checkException(gsets.read <- readGMT(fname, deduplUse="drop", + valueType="list")) gsets[[1]][2] <- gsets[[1]][1] con <- file(fname, "w") - writeLines(c(paste(names(gsets)[1], "desc1", paste(paste0("ENSG", gsets[[1]]), collapse="\t"), sep="\t"), - paste(names(gsets)[2], "desc2", paste(paste0("ENSG", gsets[[2]]), collapse="\t"), sep="\t")), con) + writeLines(c(paste(names(gsets)[1], "desc1", + paste(paste0("ENSG", gsets[[1]]), + collapse="\t"), sep="\t"), + paste(names(gsets)[2], "desc2", + paste(paste0("ENSG", gsets[[2]]), + collapse="\t"), sep="\t")), con) close(con) library(cli) gsets.read <- readGMT(fname, deduplUse="drop", valueType="list") diff --git a/inst/unitTests/test_inputdatacontainers.R b/inst/unitTests/test_inputdatacontainers.R index 278fe59..133b62b 100644 --- a/inst/unitTests/test_inputdatacontainers.R +++ b/inst/unitTests/test_inputdatacontainers.R @@ -44,9 +44,7 @@ test_inputdatacontainers <- function() { gsets.eSet <- geneSets(es.eset) es.mat2 <- es.mat - attr(es.mat2, "geneSets") <- NULL - attr(es.mat2, "gsvaParam") <- NULL - checkTrue(identical(es.mat2, exprs(es.eset))) + checkEqualsNumeric(es.mat2, exprs(es.eset)) checkTrue(identical(gsets.mat, gsets.eSet)) ## estimate GSVA enrichment scores with input as a SummarizedExperiment object @@ -64,7 +62,7 @@ test_inputdatacontainers <- function() { es.se <- gsva(gsvapar, verbose=FALSE) gsets.se <- geneSets(es.se) - checkTrue(identical(es.mat2, assay(es.se))) + checkEqualsNumeric(es.mat2, assay(es.se)) checkTrue(identical(gsets.mat, gsets.se)) out <- cli_fmt(gsvaParam(se, gsets)) @@ -85,15 +83,14 @@ test_inputdatacontainers <- function() { es.dgCMat <- gsva(param, verbose=FALSE) gsets.dgCMat <- geneSets(es.dgCMat) - attr(es.dgCMat, "geneSets") <- NULL - attr(es.dgCMat, "gsvaParam") <- NULL - checkTrue(identical(es.mat2, es.dgCMat)) + checkEqualsNumeric(es.mat2, es.dgCMat) checkTrue(identical(gsets.mat, gsets.dgCMat)) ## testing geneIdsToGeneSetCollection() suppressPackageStartupMessages(library(GSEABase)) - suppressWarnings(gsc <- geneIdsToGeneSetCollection(gsets.dgCMat, geneIdType="whatever")) + suppressWarnings(gsc <- geneIdsToGeneSetCollection(gsets.dgCMat, + geneIdType="whatever")) checkTrue(is(gsc, "GeneSetCollection")) sp <- 0.5 * prod(dim(y)) @@ -119,9 +116,7 @@ test_inputdatacontainers <- function() { es.sce <- gsva(param, verbose=FALSE) gsets.sce <- geneSets(es.sce) - attr(es.dgCMatSp, "geneSets") <- NULL - attr(es.dgCMatSp, "gsvaParam") <- NULL - checkTrue(identical(es.dgCMatSp, assay(es.sce))) + checkEqualsNumeric(es.dgCMatSp, assay(es.sce)) checkTrue(identical(gsets.mat, gsets.sce)) gsets.ov.list <- computeGeneSetsOverlap(gsets.sce, rownames(sce)) diff --git a/inst/unitTests/test_ondisk.R b/inst/unitTests/test_ondisk.R index 82f56ed..b836430 100644 --- a/inst/unitTests/test_ondisk.R +++ b/inst/unitTests/test_ondisk.R @@ -38,38 +38,34 @@ test_ondisk <- function() { ## estimate GSVA enrichment scores with HDF5 input and output and check that they are identical es_h5ondisk <- gsva(gsvaParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], - attributes(es_h5ondisk)["geneSets"]) - attr(es_noh5, "gsvaParam") <- NULL - checkIdentical(es_noh5, es_h5ondiskmat) + checkEqualsNumeric(es_noh5, es_h5ondiskmat) ## estimate ssGSEA enrichment scores with and without HDF5 input and check that they are identical - es_noh5 <- gsva(ssgseaParam(M, gsets, verbose=FALSE), verbose=TRUE) + es_noh5 <- gsva(ssgseaParam(M, gsets, verbose=FALSE), verbose=FALSE) es_h5 <- gsva(ssgseaParam(H5, gsets, verbose=FALSE), verbose=FALSE) checkIdentical(es_noh5, es_h5) ## estimate ssGSEA enrichment scores with HDF5 input and output and check that they are identical - es_h5ondisk <- gsva(ssgseaParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) + es_h5ondisk <- gsva(ssgseaParam(H5, gsets, ondisk="yes", verbose=FALSE), + verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], - attributes(es_h5ondisk)["geneSets"]) - attr(es_noh5, "gsvaParam") <- NULL - checkIdentical(es_noh5, es_h5ondiskmat) + checkEqualsNumeric(es_noh5, es_h5ondiskmat) ## estimate Z-scores enrichment scores with and without HDF5 input and check that they are identical es_noh5 <- gsva(zscoreParam(M, gsets, verbose=FALSE), verbose=TRUE) es_h5 <- gsva(zscoreParam(H5, gsets, verbose=FALSE), verbose=FALSE) - checkEqualsNumeric(es_noh5, es_h5) ## not identical due to the rowSds() vs sd() differences + ## not identical also due to the rowSds() vs sd() differences + checkEqualsNumeric(es_noh5, es_h5) ## estimate Z-scores enrichment scores with HDF5 input and output and check that they are identical - es_h5ondisk <- gsva(zscoreParam(H5, gsets, ondisk="yes", verbose=FALSE), verbose=FALSE) + es_h5ondisk <- gsva(zscoreParam(H5, gsets, ondisk="yes", verbose=FALSE), + verbose=FALSE) es_h5ondiskmat <- as.matrix(es_h5ondisk) - attributes(es_h5ondiskmat) <- c(attributes(es_h5ondiskmat)[c("dim", "dimnames")], - attributes(es_h5ondisk)["geneSets"]) - checkEqualsNumeric(es_noh5, es_h5ondiskmat) ## not identical due to the rowSds() vs sd() differences + ## not identical also due to the rowSds() vs sd() differences + checkEqualsNumeric(es_noh5, es_h5ondiskmat) - ## test the block processing of a small toy HDF5 input and output by setting a small block size and - ## maximum available memory + ## test the block processing of a small toy HDF5 input and output by + ## setting a small block size and maximum available memory oldautoblocksize <- getAutoBlockSize() setAutoBlockSize(1024) es_noh5 <- gsva(gsvaParam(M, gsets, verbose=FALSE), verbose=FALSE) diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 62331aa..0fbbc4e 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -3,10 +3,14 @@ \name{gsvaRowNorm} \alias{gsvaRowNorm} \alias{gsvaRowNorm,gsvaParam-method} +\alias{gsvaColRanks} +\alias{gsvaColRanks,GsvaExprData-method} \alias{gsvaRanks} \alias{gsvaRanks,gsvaParam-method} \alias{gsvaScores} \alias{gsvaScores,gsvaRanksParam-method} +\alias{gsvaColScores} +\alias{gsvaColScores,GsvaExprData-method} \title{GSVA ranks and scores} \usage{ \S4method{gsvaRowNorm}{gsvaParam}( @@ -16,6 +20,13 @@ maxmem = "auto" ) +\S4method{gsvaColRanks}{GsvaExprData}( + rowNormExprData, + verbose = TRUE, + BPPARAM = SerialParam(progressbar = verbose), + maxmem = "auto" +) + \S4method{gsvaRanks}{gsvaParam}( param, verbose = TRUE, @@ -29,6 +40,13 @@ BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) + +\S4method{gsvaColScores}{GsvaExprData}( + rankExprData, + verbose = TRUE, + BPPARAM = SerialParam(progressbar = verbose), + maxmem = "auto" +) } \arguments{ \item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} @@ -51,11 +69,19 @@ By default \code{maxmem="auto"}, indicating that the maximum memory will be the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} + +\item{rowNormExprData}{A row-normalized expression data set obtained with +\code{\link{gsvaRowNorm}}. Must be one of the classes +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +help page using \code{help(GsvaExprData)}.} } \value{ In the case of the \code{gsvaRowNorm()} method, an object of class \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. +In the case of the \code{gsvaColRanks()} method, an object of class +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. + In the case of the \code{gsvaRanks()} method, an object of class \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. @@ -97,18 +123,17 @@ geneSets <- list(gset1=paste0("g", 1:3), y <- matrix(rnorm(n*p), nrow=p, ncol=n, dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) -## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples -y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 - ## build GSVA parameter object gsvapar <- gsvaParam(y, geneSets) -## calculate GSVA ranks -gsvarankspar <- gsvaRanks(gsvapar) -gsvarankspar +## calculate row-normalized expression values +gsvarownormexpr <- gsvaRowNorm(gsvapar) + +## calculate GSVA column ranks +gsvarankspar <- gsvaColRanks(gsvarownormexpr) + ## calculate GSVA scores gsva_es <- gsvaScores(gsvarankspar) -gsva_es ## calculate now GSVA scores in a single step gsva_es1 <- gsva(gsvapar) From 36df55d6845b5d9f7bb187f63edc726f64368446 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Sat, 23 May 2026 03:08:43 +0200 Subject: [PATCH 03/12] Fixes on code, documentation and unit tests --- DESCRIPTION | 1 + R/gsva.R | 35 +++++++++++++++++++------------ R/utils.R | 2 +- inst/unitTests/test_gsvaRanks.R | 1 - man/GsvaExprData-class.Rd | 8 +++---- man/GsvaMethodParam-class.Rd | 12 +++++------ man/geneIdsToGeneSetCollection.Rd | 2 +- man/geneSets.Rd | 4 ++-- man/gsva.Rd | 14 ++++++------- man/gsvaAnnotation.Rd | 2 +- man/gsvaEnrichment.Rd | 2 +- man/gsvaParam-class.Rd | 28 ++++++++++++------------- man/gsvaRanks.Rd | 31 ++++++++++++++------------- man/gsvaRanks_serialization.Rd | 4 ++-- man/plageParam-class.Rd | 18 ++++++++-------- man/ssgseaParam-class.Rd | 24 ++++++++++----------- man/zscoreParam-class.Rd | 18 ++++++++-------- 17 files changed, 109 insertions(+), 97 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d58ee72..76022c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,3 +31,4 @@ Encoding: UTF-8 biocViews: FunctionalGenomics, Microarray, RNASeq, Pathways, GeneSetEnrichment Roxygen: list(markdown = TRUE) Config/roxygen2/version: 8.0.0 +RoxygenNote: 7.3.3 diff --git a/R/gsva.R b/R/gsva.R index 506f40b..de6a8ac 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -163,11 +163,15 @@ setMethod("gsva", signature(param="gsvaParam"), .check_bpparam(BPPARAM) - rankspar <- gsvaRanks(param=param, verbose=verbose, - BPPARAM=BPPARAM, maxmem=maxmem) + gsvarownr <- gsvaRowNorm(param=param, verbose=verbose, + BPPARAM=BPPARAM, maxmem=maxmem) - es <- gsvaScores(param=rankspar, verbose=verbose, - BPPARAM=BPPARAM, maxmem=maxmem) + gsvaranks <- gsvaColRanks(rowNormExprData=gsvarownr, + verbose=verbose, BPPARAM=BPPARAM, + maxmem=maxmem) + + es <- gsvaColScores(rankExprData=gsvaranks, verbose=verbose, + BPPARAM=BPPARAM, maxmem=maxmem) if (verbose) { cli_alert_success("Calculations finished") @@ -665,11 +669,11 @@ setMethod("show", p <- NULL if (is(exprData, "matrix") || is(exprData, "dgCMatrix") || - is(exprData, "SVT_SparseMatrix") || is(exprData("delayedMatrix")) || - is(exprData, "HDF5Matrix") || is(exprData("ExpressionSet"))) { + is(exprData, "SVT_SparseMatrix") || is(exprData, "DelayedMatrix") || + is(exprData, "HDF5Matrix") || is(exprData, "ExpressionSet")) { mask <- is.null(attr(exprData, "gsvaParam")) || is.null(attr(exprData, "assay")) - if (any(mask)) + if (any(mask)) cli_abort(c("x"="Missing metadata in the input expression data.")) p <- attr(exprData, "gsvaParam") a <- attr(exprData, "assay") @@ -764,10 +768,10 @@ setMethod("show", #' gsvarownormexpr <- gsvaRowNorm(gsvapar) #' #' ## calculate GSVA column ranks -#' gsvarankspar <- gsvaColRanks(gsvarownormexpr) +#' gsvacolranks <- gsvaColRanks(gsvarownormexpr) #' #' ## calculate GSVA scores -#' gsva_es <- gsvaScores(gsvarankspar) +#' gsva_es <- gsvaColScores(gsvacolranks) #' #' ## calculate now GSVA scores in a single step #' gsva_es1 <- gsva(gsvapar) @@ -780,8 +784,8 @@ setMethod("show", #' gset2=paste0("g", c(1, 2, 7, 8))) #' #' ## note that there is no need to calculate the GSVA ranks again -#' geneSets(gsvarankspar) <- geneSets2 -#' gsvaScores(gsvarankspar) +#' ## geneSets(gsvarankspar) <- geneSets2 +#' ## gsvaScores(gsvarankspar) #' #' @return In the case of the `gsvaRowNorm()` method, an object of class #' [`gsvaRanksParam-class`]. @@ -887,14 +891,14 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), .check_bpparam(BPPARAM) - dataMatrix <- unwrapData(rowNormExprData, get_assay(param)) + dataMatrix <- unwrapData(rowNormExprData, "gsvarownr") maxmem <- .check_maxmem(param, maxmem, verbose) ondisk <- .check_ondisk(param, maxmem, verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", ondisk, verbose) - gsvarnks <- .compute_gsva_ranks(Z=rowNormExprData, + gsvarnks <- .compute_gsva_ranks(Z=dataMatrix, verbose=verbose, BPPARAM=BPPARAM, maxmem=maxmem) @@ -1125,6 +1129,11 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), return(rval) }) +#' @param rankExprData A column-rank expression data set obtained with +#' [`gsvaColRanks`]. Must be one of the classes +#' supported by [`GsvaExprData-class`]. For a list of these classes, see its +#' help page using `help(GsvaExprData)`. +#' #' @aliases gsvaColScores,GsvaExprData-method #' @name gsvaColScores #' @rdname gsvaRanks diff --git a/R/utils.R b/R/utils.R index aae8f24..fe403c1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -107,7 +107,7 @@ setMethod("wrapData", signature(container="ExpressionSet"), experimentData=experimentData(container), annotation="") attr(rval, "gsvaParam") <- .gsvaParam_as_list(param) - attr(dataMatrix, "assay") <- assay + attr(rval, "assay") <- assay if (!missing(geneSets)) attr(rval, "geneSets") <- geneSets diff --git a/inst/unitTests/test_gsvaRanks.R b/inst/unitTests/test_gsvaRanks.R index ab804e2..498bde8 100644 --- a/inst/unitTests/test_gsvaRanks.R +++ b/inst/unitTests/test_gsvaRanks.R @@ -33,7 +33,6 @@ test_gsvaRanks <- function() { ## both approaches to calculate GSVA scores must give ## the same result with the same input gene sets checkEqualsNumeric(gsva_es1, gsva_es2) - checkTrue(all.equal(gsva_es1, gsva_es2)) ## check that gsvaEnrichment() works geneSets(gsvarankspar) <- c(gsets, set4=c("g1", "g4", "g7")) diff --git a/man/GsvaExprData-class.Rd b/man/GsvaExprData-class.Rd index 60e5188..6946e54 100644 --- a/man/GsvaExprData-class.Rd +++ b/man/GsvaExprData-class.Rd @@ -16,11 +16,11 @@ of another class as well as defining common methods for all of them. \seealso{ \code{\link{matrix}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, -\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, -\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, +\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, +\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, -\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, +\code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, \code{\link[HDF5Array:HDF5Array-class]{HDF5Array}} } diff --git a/man/GsvaMethodParam-class.Rd b/man/GsvaMethodParam-class.Rd index ed7f603..27e42af 100644 --- a/man/GsvaMethodParam-class.Rd +++ b/man/GsvaMethodParam-class.Rd @@ -89,12 +89,12 @@ is(gsvapar, "GsvaMethodParam") } \seealso{ -\code{\link[=GsvaExprData-class]{GsvaExprData}}, -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, -\code{\link[=zscoreParam-class]{zscoreParam}}, -\code{\link[=plageParam-class]{plageParam}}, -\code{\link[=ssgseaParam-class]{ssgseaParam}}, -\code{\link[=gsvaParam-class]{gsvaParam}}, +\code{\linkS4class{GsvaExprData}}, +\code{\linkS4class{GsvaGeneSets}}, +\code{\linkS4class{zscoreParam}}, +\code{\linkS4class{plageParam}}, +\code{\linkS4class{ssgseaParam}}, +\code{\linkS4class{gsvaParam}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} \code{\link{plageParam}}, \code{\link{zscoreParam}}, \code{\link{ssgseaParam}}, \code{\link{gsvaParam}} diff --git a/man/geneIdsToGeneSetCollection.Rd b/man/geneIdsToGeneSetCollection.Rd index ebfe8e1..9bc8d98 100644 --- a/man/geneIdsToGeneSetCollection.Rd +++ b/man/geneIdsToGeneSetCollection.Rd @@ -54,7 +54,7 @@ geneIdsToGeneSetCollection(gsets) \seealso{ \code{\link[GSEABase:GeneSetCollection-class]{GeneSetCollection}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, -\code{\link[GSEABase:geneIds]{geneIds}}, +\code{\link[GSEABase:GeneSet-class]{geneIds}}, \code{\link{deduplicateGeneSets}}, \code{\link{guessGeneIdType}}, \code{\link[GSEABase:GeneSet-class]{GeneSet}} diff --git a/man/geneSets.Rd b/man/geneSets.Rd index c59f294..8d94088 100644 --- a/man/geneSets.Rd +++ b/man/geneSets.Rd @@ -30,9 +30,9 @@ \item{obj}{An object of one of the following classes: \itemize{ \item An expression data object of one of the classes described in -\code{\link[=GsvaExprData-class]{GsvaExprData}} that is the return value of a call to \code{gsva()}. +\code{\linkS4class{GsvaExprData}} that is the return value of a call to \code{gsva()}. \item A parameter object of one of the classes described in -\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}} that could be used in a call to \code{gsva()}. +\code{\linkS4class{GsvaMethodParam}} that could be used in a call to \code{gsva()}. }} } \value{ diff --git a/man/gsva.Rd b/man/gsva.Rd index c966643..f998718 100644 --- a/man/gsva.Rd +++ b/man/gsva.Rd @@ -70,7 +70,7 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} @@ -81,18 +81,18 @@ container object of the same type as the input expression data container, except for the fact that enrichment scores are always dense, irrespective of whether the input is sparse, such as in single-cell data. If the input was a base matrix, a \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, a -\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, or a -\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}} object, then the output +\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, or a +\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}} object, then the output will be either a base matrix object or a -\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, with the gene sets +\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, with the gene sets employed in the calculations stored in an attribute called \code{geneSets} of that object. If the input was an \code{ExpressionSet} object, then the output will be also an \code{ExpressionSet} object with the gene sets employed in the calculations stored in an attribute called \code{geneSets}. If the input was an object of either class \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, -or \code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, +or \code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, then the output will be of the same class, where enrichment scores will be stored in an assay called \code{es} and the gene sets employed in the calculations will be stored in the \code{rowData} slot of the object under the @@ -175,5 +175,5 @@ using singular value decomposition. \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} } diff --git a/man/gsvaAnnotation.Rd b/man/gsvaAnnotation.Rd index 860f4af..09b0ac3 100644 --- a/man/gsvaAnnotation.Rd +++ b/man/gsvaAnnotation.Rd @@ -49,7 +49,7 @@ } \arguments{ \item{object}{An expression data object of one of the classes described in -\code{\link[=GsvaExprData-class]{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not +\code{\linkS4class{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not capable of storing annotation metadata and will return \code{NULL}.} \item{value}{For the replacement methods, the annotation metadata to be diff --git a/man/gsvaEnrichment.Rd b/man/gsvaEnrichment.Rd index 7297734..4de6be2 100644 --- a/man/gsvaEnrichment.Rd +++ b/man/gsvaEnrichment.Rd @@ -14,7 +14,7 @@ ) } \arguments{ -\item{param}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object obtained with the method +\item{param}{A \code{\linkS4class{gsvaRanksParam}} object obtained with the method \code{\link{gsvaRanks}}.} \item{column}{The column for which we want to retrieve the enrichment data. diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index c35aeeb..628e67d 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -36,11 +36,11 @@ gsvaParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -163,19 +163,19 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} +\item{x}{An object of class \code{\linkS4class{gsvaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\link[=gsvaParam-class]{gsvaParam}}.} +class \code{\linkS4class{gsvaParam}}.} \item{object}{For the replacement method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}.} +\code{\linkS4class{gsvaRanksParam}}.} \item{value}{For the replacement method, an object of the classes supported by -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}.} +\code{\linkS4class{GsvaGeneSets}}.} } \value{ -A new \code{\link[=gsvaParam-class]{gsvaParam}} object. +A new \code{\linkS4class{gsvaParam}} object. } \description{ S4 class for GSVA method parameter objects. @@ -294,16 +294,16 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\link[=GsvaExprData-class]{GsvaExprData}}, -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, -\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, -\code{\link[=plageParam-class]{plageParam}}, -\code{\link[=zscoreParam-class]{zscoreParam}}, -\code{\link[=ssgseaParam-class]{ssgseaParam}} +\code{\linkS4class{GsvaExprData}}, +\code{\linkS4class{GsvaGeneSets}}, +\code{\linkS4class{GsvaMethodParam}}, +\code{\linkS4class{plageParam}}, +\code{\linkS4class{zscoreParam}}, +\code{\linkS4class{ssgseaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} } diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 0fbbc4e..42e65e9 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -49,7 +49,7 @@ ) } \arguments{ -\item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} +\item{param}{A parameter object of the \code{\linkS4class{gsvaRanksParam}} class.} \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} @@ -65,25 +65,30 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} \item{rowNormExprData}{A row-normalized expression data set obtained with \code{\link{gsvaRowNorm}}. Must be one of the classes -supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +help page using \code{help(GsvaExprData)}.} + +\item{rankExprData}{A column-rank expression data set obtained with +\code{\link{gsvaColRanks}}. Must be one of the classes +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} } \value{ In the case of the \code{gsvaRowNorm()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. +\code{\linkS4class{gsvaRanksParam}}. In the case of the \code{gsvaColRanks()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. +\code{\linkS4class{gsvaRanksParam}}. In the case of the \code{gsvaRanks()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. +\code{\linkS4class{gsvaRanksParam}}. In the case of the \code{gsvaScores()} method, a gene-set by sample matrix of GSVA enrichment scores stored in a container object of the same type as @@ -111,8 +116,6 @@ library(GSVA) p <- 10 ## number of genes n <- 30 ## number of samples -nGrp1 <- 15 ## number of samples in group 1 -nGrp2 <- n - nGrp1 ## number of samples in group 2 ## consider three disjoint gene sets geneSets <- list(gset1=paste0("g", 1:3), @@ -130,10 +133,10 @@ gsvapar <- gsvaParam(y, geneSets) gsvarownormexpr <- gsvaRowNorm(gsvapar) ## calculate GSVA column ranks -gsvarankspar <- gsvaColRanks(gsvarownormexpr) +gsvacolranks <- gsvaColRanks(gsvarownormexpr) ## calculate GSVA scores -gsva_es <- gsvaScores(gsvarankspar) +gsva_es <- gsvaColScores(gsvacolranks) ## calculate now GSVA scores in a single step gsva_es1 <- gsva(gsvapar) @@ -146,8 +149,8 @@ geneSets2 <- list(gset1=paste0("g", 3:6), gset2=paste0("g", c(1, 2, 7, 8))) ## note that there is no need to calculate the GSVA ranks again -geneSets(gsvarankspar) <- geneSets2 -gsvaScores(gsvarankspar) +## geneSets(gsvarankspar) <- geneSets2 +## gsvaScores(gsvarankspar) } \references{ @@ -157,9 +160,9 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}, \code{\link{gsva}}, +\code{\linkS4class{gsvaParam}}, \code{\linkS4class{gsvaRanksParam}}, \code{\link{gsva}}, \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} } diff --git a/man/gsvaRanks_serialization.Rd b/man/gsvaRanks_serialization.Rd index abb5133..8eb95d5 100644 --- a/man/gsvaRanks_serialization.Rd +++ b/man/gsvaRanks_serialization.Rd @@ -10,7 +10,7 @@ saveHDF5GSVAranks(x, dir, ...) loadHDF5GSVAranks(dir, ...) } \arguments{ -\item{x}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object to save to disk.} +\item{x}{A \code{\linkS4class{gsvaRanksParam}} object to save to disk.} \item{dir}{The path to the directory where to save or load the GSVA ranks data.} @@ -18,7 +18,7 @@ data.} \item{...}{Additional arguments to be passed to the underlying HDF5 saving/loading functions \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} -and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, +and \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, respectively.} } \value{ diff --git a/man/plageParam-class.Rd b/man/plageParam-class.Rd index e85b003..5fbf4ed 100644 --- a/man/plageParam-class.Rd +++ b/man/plageParam-class.Rd @@ -19,11 +19,11 @@ plageParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\link[=plageParam-class]{plageParam}} object. +A new \code{\linkS4class{plageParam}} object. } \description{ S4 class for PLAGE method parameter objects. @@ -104,12 +104,12 @@ using singular value decomposition. \doi{10.1186/1471-2105-6-225} } \seealso{ -\code{\link[=GsvaExprData-class]{GsvaExprData}}, -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, -\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, -\code{\link[=zscoreParam-class]{zscoreParam}}, -\code{\link[=ssgseaParam-class]{ssgseaParam}}, -\code{\link[=gsvaParam-class]{gsvaParam}} +\code{\linkS4class{GsvaExprData}}, +\code{\linkS4class{GsvaGeneSets}}, +\code{\linkS4class{GsvaMethodParam}}, +\code{\linkS4class{zscoreParam}}, +\code{\linkS4class{ssgseaParam}}, +\code{\linkS4class{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } diff --git a/man/ssgseaParam-class.Rd b/man/ssgseaParam-class.Rd index 7db1043..f3a24b0 100644 --- a/man/ssgseaParam-class.Rd +++ b/man/ssgseaParam-class.Rd @@ -26,11 +26,11 @@ ssgseaParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -103,13 +103,13 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} +\item{x}{An object of class \code{\linkS4class{ssgseaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} +class \code{\linkS4class{ssgseaParam}}.} } \value{ -A new \code{\link[=ssgseaParam-class]{ssgseaParam}} object. +A new \code{\linkS4class{ssgseaParam}} object. } \description{ S4 class for ssGSEA method parameter objects. @@ -180,16 +180,16 @@ oncogenic KRAS-driven cancers require TBK1. \doi{10.1038/nature08460} } \seealso{ -\code{\link[=GsvaExprData-class]{GsvaExprData}}, -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, -\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, -\code{\link[=plageParam-class]{plageParam}}, -\code{\link[=zscoreParam-class]{zscoreParam}}, -\code{\link[=gsvaParam-class]{gsvaParam}} +\code{\linkS4class{GsvaExprData}}, +\code{\linkS4class{GsvaGeneSets}}, +\code{\linkS4class{GsvaMethodParam}}, +\code{\linkS4class{plageParam}}, +\code{\linkS4class{zscoreParam}}, +\code{\linkS4class{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} } diff --git a/man/zscoreParam-class.Rd b/man/zscoreParam-class.Rd index f8186e4..2cc8d2f 100644 --- a/man/zscoreParam-class.Rd +++ b/man/zscoreParam-class.Rd @@ -19,11 +19,11 @@ zscoreParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\link[=zscoreParam-class]{zscoreParam}} object. +A new \code{\linkS4class{zscoreParam}} object. } \description{ S4 class for combined z-scores method parameter objects. @@ -104,12 +104,12 @@ disease classification. \doi{10.1371/journal.pcbi.1000217} } \seealso{ -\code{\link[=GsvaExprData-class]{GsvaExprData}}, -\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, -\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, -\code{\link[=plageParam-class]{plageParam}}, -\code{\link[=ssgseaParam-class]{ssgseaParam}}, -\code{\link[=gsvaParam-class]{gsvaParam}} +\code{\linkS4class{GsvaExprData}}, +\code{\linkS4class{GsvaGeneSets}}, +\code{\linkS4class{GsvaMethodParam}}, +\code{\linkS4class{plageParam}}, +\code{\linkS4class{ssgseaParam}}, +\code{\linkS4class{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } From 1776d568282f368e35c6952854f558696f35d6f4 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Sun, 24 May 2026 21:00:13 +0200 Subject: [PATCH 04/12] Fixed gsvaEnrichment(), updated show() method, updated scRNA-seq vignette --- NAMESPACE | 3 + R/AllGenerics.R | 2 +- R/GsvaMethodParam.R | 25 +++- R/gsva.R | 136 ++++++++++++++++------ R/ssgsea.R | 8 +- inst/unitTests/test_gsvaRanks.R | 22 ++-- inst/unitTests/test_inputdatacontainers.R | 3 + man/GsvaExprData-class.Rd | 8 +- man/GsvaMethodParam-class.Rd | 31 +++-- man/geneIdsToGeneSetCollection.Rd | 2 +- man/geneSets.Rd | 4 +- man/gsva.Rd | 14 +-- man/gsvaAnnotation.Rd | 2 +- man/gsvaEnrichment.Rd | 40 ++++--- man/gsvaParam-class.Rd | 34 +++--- man/gsvaRanks.Rd | 23 ++-- man/gsvaRanks_serialization.Rd | 4 +- man/plageParam-class.Rd | 18 +-- man/ssgseaParam-class.Rd | 24 ++-- man/zscoreParam-class.Rd | 18 +-- vignettes/GSVA.bib | 9 ++ vignettes/GSVA_proteomics.Rmd | 12 +- vignettes/GSVA_scRNAseq.Rmd | 129 +++++++++++--------- 23 files changed, 364 insertions(+), 207 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 54769f3..78489f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,6 +35,7 @@ exportClasses(plageParam) exportClasses(ssgseaParam) exportClasses(zscoreParam) exportMethods(computeGeneSetsOverlap) +exportMethods(details) exportMethods(filterGeneSets) exportMethods(geneSetSizes) exportMethods(geneSets) @@ -96,6 +97,7 @@ importFrom(GSEABase,EntrezIdentifier) importFrom(GSEABase,NullCollection) importFrom(GSEABase,NullIdentifier) importFrom(GSEABase,SymbolIdentifier) +importFrom(GSEABase,details) importFrom(GSEABase,geneIdType) importFrom(GSEABase,geneIds) importFrom(GSEABase,mapIdentifiers) @@ -150,6 +152,7 @@ importFrom(graphics,plot) importFrom(graphics,segments) importFrom(memuse,Sys.meminfo) importFrom(memuse,howbig) +importFrom(methods,callNextMethod) importFrom(methods,getClass) importFrom(methods,is) importFrom(methods,isClass) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index b097fd4..0eaa78b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -25,7 +25,7 @@ setGeneric("gsvaScores", #' @export setGeneric("gsvaEnrichment", - function(param, ranks, ...) standardGeneric("gsvaEnrichment")) + function(rankExprData, ...) standardGeneric("gsvaEnrichment")) #' @export setGeneric("filterGeneSets", diff --git a/R/GsvaMethodParam.R b/R/GsvaMethodParam.R index 0310884..6444f5e 100644 --- a/R/GsvaMethodParam.R +++ b/R/GsvaMethodParam.R @@ -2,7 +2,10 @@ #' @title The `GsvaMethodParam` class #' #' @description A virtual superclass of the `GSVA` packages' method-specific -#' parameter classes. +#' parameter classes. The method 'details()' provides a detailed summary of the +#' parameter values stored in this class and its subclasses. +#' +#' @param object An object of class `GsvaMethodParam` or one of its subclasses. #' #' @details The `GSVA` package implements four single-sample gene set analysis #' methods (PLAGE, combined z-scores, ssGSEA, and GSVA) and a respective @@ -21,7 +24,23 @@ NULL setMethod("show", signature=signature(object="GsvaMethodParam"), function(object) { - cat("A ", .objPkgClass(object), " object\n", + cat("class: ", .objPkgClass(object), "\n", sep="") + expr <- get_exprData(object) + cat(sprintf("expression data dim: %d %d\n", + nrow(expr), ncol(expr))) + cat(sprintf("number of gene sets: %d\n", + length(get_geneSets(object)))) + cat("details: use 'details(object)'\n") + }) + +#' @aliases details,GsvaMethodParam-method +#' @rdname GsvaMethodParam-class +#' @exportMethod details +#' @importFrom GSEABase details +setMethod("details", + signature=signature(object="GsvaMethodParam"), + function(object) { + cat("class: ", .objPkgClass(object), "\n", "expression data:\n", sep="") .catObj(get_exprData(object)) oa <- if(is.na(get_assay(object))) "none" else get_assay(object) @@ -40,6 +59,8 @@ setMethod("show", }) + + ## ----- getters ----- setMethod("get_exprData", signature("GsvaMethodParam"), diff --git a/R/gsva.R b/R/gsva.R index de6a8ac..5bf60a9 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -602,9 +602,14 @@ setMethod("anyNA", signature=c("gsvaParam"), return(x@anyNA)) -## ----- show ----- - -setMethod("show", +## ----- details method ----- + +#' @importFrom methods callNextMethod +#' @importFrom GSEABase details +#' @aliases details,gsvaParam-method +#' @rdname GsvaMethodParam-class +#' @exportMethod details +setMethod("details", signature=signature(object="gsvaParam"), function(object) { callNextMethod(object) @@ -1010,16 +1015,16 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), ## ----- setters for gsvaRanksParam ----- #' @param object For the replacement method, an object of class -#' [`gsvaRanksParam-class`]. +#' [`gsvaParam-class`]. #' -#' @param value For the replacement method, an object of the classes supported by -#' [`GsvaGeneSets-class`]. +#' @param value For the replacement method, an object of the classes supported +#' by [`GsvaGeneSets-class`]. #' #' @aliases geneSets<- -#' @aliases geneSets<-,gsvaRanksParam,GsvaGeneSets-method +#' @aliases geneSets<-,gsvaParam,GsvaGeneSets-method #' @rdname gsvaParam-class #' @exportMethod geneSets -setReplaceMethod("geneSets", signature=signature(object="gsvaRanksParam", +setReplaceMethod("geneSets", signature=signature(object="gsvaParam", value="GsvaGeneSets"), function(object, value) { object@geneSets <- value @@ -1134,6 +1139,10 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), #' supported by [`GsvaExprData-class`]. For a list of these classes, see its #' help page using `help(GsvaExprData)`. #' +#' @param geneSets An object of the classes supported by [`GsvaGeneSets-class`]. +#' Currently, either a [`GeneSetCollection`][GSEABase::GeneSetCollection-class] +#' object or a `list` object. +#' #' @aliases gsvaColScores,GsvaExprData-method #' @name gsvaColScores #' @rdname gsvaRanks @@ -1142,12 +1151,21 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), #' @importFrom cli cli_alert_info cli_alert_success #' @exportMethod gsvaColScores setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), - function(rankExprData, verbose=TRUE, + function(rankExprData, geneSets, verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { param <- .pull_param(rankExprData, "gsvaranks") + if (!missing(geneSets)) { + if (!is(geneSets, "GsvaGeneSets")) + cli_abort(c("x"=paste("'geneSets' must be a", + "'GsvaGeneSets' object. See", + "class ? GsvaGeneSets."))) + geneSets(param) <- geneSets + } + + if (verbose && gsva_global$show_start_and_end_messages) { pkgversion <- packageDescription("GSVA")[["Version"]] cli_alert_info("GSVA version {pkgversion}") @@ -1225,15 +1243,21 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), #' #' @description Extract and plot enrichment data from GSVA scores. #' -#' @param param A [`gsvaRanksParam-class`] object obtained with the method -#' [`gsvaRanks`]. +#' @param rankExprData A column-rank expression data set obtained with +#' [`gsvaColRanks`]. Must be one of the classes +#' supported by [`GsvaExprData-class`]. For a list of these classes, see its +#' help page using `help(GsvaExprData)`. +#' #' #' @param column The column for which we want to retrieve the enrichment data. #' This parameter is only available in the `gsvaEnrichment()` method. #' -#' @param geneSet Either a positive integer number between 1 and the number of -#' available gene sets in `param`, or a character string with the name of -#' one of the gene sets available in `param`. +#' @param geneSet Either a single positive integer number between 1 and the +#' number of available gene sets in parameter object stored in `rankExprData`, +#' or a single character string with the name of one of the gene sets available +#' in that object, or a vector of integers or character strings with the index +#' values or names of rows in `rankExprData` that should be considered as the +#' gene set for which the enrichment data should be retrieved. #' #' @param plot A character string indicating whether an enrichment plot should #' be produced using either base R graphics (`plot="base"`) or the ggplot2 package @@ -1249,7 +1273,7 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), #' `plot="ggplot"`, this method returns a `ggplot` object. When `plot="base"` #' no value is returned. #' -#' @aliases gsvaEnrichment,gsvaRanksParam-method +#' @aliases gsvaEnrichment,GsvaExprData-method #' @name gsvaEnrichment #' @rdname gsvaEnrichment #' @@ -1275,54 +1299,92 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), #' y <- matrix(rnorm(n*p), nrow=p, ncol=n, #' dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) #' -#' ## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples -#' y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 -#' #' ## build GSVA parameter object #' gsvapar <- gsvaParam(y, geneSets) #' #' ## calculate GSVA ranks -#' gsvarankspar <- gsvaRanks(gsvapar) -#' gsvarankspar +#' gsvarownorm <- gsvaRowNorm(gsvapar) +#' gsvaranks <- gsvaColRanks(gsvarownorm) #' #' ## by default the enrichment data for the first column and the first -#' ## gene set are retrieved -#' gsvaEnrichment(gsvarankspar) +#' ## gene set in the input parameter object, are retrieved +#' gsvaEnrichment(gsvaranks) +#' +#' ## we can calculate the enrichment data for any of the gene sets given +#' ## in the input parameter object +#' gsvaEnrichment(gsvaranks, geneSet="gset2") +#' +#' ## we can calculate the enrichment data for a new gene set that did not +#' ## form part of the input parameter object +#' gsvaEnrichment(gsvaranks, geneSet=c("g1", "g4", "g7")) #' #' @importFrom cli cli_alert_info cli_abort cli_alert_danger #' @importFrom utils installed.packages #' @exportMethod gsvaEnrichment -setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), - function(param, column=1, geneSet=1, +setMethod("gsvaEnrichment", signature(rankExprData="GsvaExprData"), + function(rankExprData, column=1, geneSet=1, plot=c("auto", "base", "ggplot", "no"), ...) { + if (length(column) != 1) + cli_abort(c("x"="'column' should be of length 1.")) + + if (is.numeric(column) && !is.na(column)) { + if (column != as.integer(column) || + column < 1 || column > ncol(rankExprData)) + cli_abort(c("x"=paste("'column' should be a positive", + "integer between 1 and the number", + "of columns in the input data."))) + } else + cli_abort(c("x"="'column' should be a positive integer.")) + + param <- .pull_param(rankExprData, "gsvaranks") + plot <- match.arg(plot) geneSets <- get_geneSets(param) - if (length(geneSet) > 1) { - msg <- paste("Please provide only the name or position of a", - "single gene set.") - cli_abort(c("x"=msg)) - } - if (is.character(geneSet)) { + if (is.character(geneSet) && length(geneSet) == 1) { if (!geneSet %in% names(geneSets)) { msg <- paste("Gene set {geneSet} is missing from the input", "parameter object.") cli_abort(c("x"=msg)) } - } else if (is.numeric(geneSet)) { + } else if (is.numeric(geneSet) && length(geneSet) == 1 && !is.na(geneSet)) { if (geneSet < 1 || geneSet > length(geneSets)) { - msg <- paste("When 'geneSet' is numeric, it should be a", - "number between 1 and the number of gene", - "sets ({length(geneSets)}).") + msg <- paste("When 'geneSet' is a single number, it", + "should be a number between 1 and the", + "number of gene sets ({length(geneSets)}).") cli_abort(c("x"=msg)) } - } else { + } else if (!is.character(geneSet) && !is.numeric(geneSet)) { msg <- paste("input argument 'geneSet' should be either", "numeric or character.") cli_abort(c("x"=msg)) } + if (is.numeric(geneSet) && length(geneSet) > 1) { + if (any(geneSet != as.integer(geneSet)) || any(geneSet < 1) || + any(geneSet > nrow(rankExprData))) { + msg <- paste("When 'geneSet' is a numeric vector,", + "it should contain positive integers", + "between 1 and the number of rows in the", + "input data.") + cli_abort(c("x"=msg)) + } + geneSets(param) <- list(geneSet) + geneSet <- 1 + } + + if (is.character(geneSet) && length(geneSet) > 1) { + if (!all(geneSet %in% rownames(rankExprData))) { + msg <- paste("When 'geneSet' is a character vector, all", + "its values should be present in the row", + "names of the input data.") + cli_abort(c("x"=msg)) + } + geneSets(param) <- list(geneSet) + geneSet <- 1 + } + tau <- .get_tau(param) maxDiff <- .get_maxDiff(param) absRanking <- .get_absRanking(param) @@ -1332,11 +1394,11 @@ setMethod("gsvaEnrichment", signature(param="gsvaRanksParam"), minsize <- get_minSize(param) exprData <- get_exprData(param) - filtDataMatrix <- unwrapData(exprData, get_assay(param)) + filtDataMatrix <- unwrapData(rankExprData, "gsvaranks") ## no need for verbosity when mapping a single gene set filtMappedGeneSets <- .filterAndMapGeneSets(param, - wgset=geneSet, + wgset=geneSet, ## use that gene set filteredDataMatrix=filtDataMatrix, verbose=FALSE) diff --git a/R/ssgsea.R b/R/ssgsea.R index 3eb7f60..91cef1d 100644 --- a/R/ssgsea.R +++ b/R/ssgsea.R @@ -350,9 +350,13 @@ setMethod("anyNA", signature=c("ssgseaParam"), return(x@anyNA)) -## ----- show ----- +## ----- details method ----- -setMethod("show", +#' @importFrom GSEABase details +#' @aliases details,ssgseaParam-method +#' @rdname GsvaMethodParam-class +#' @exportMethod details +setMethod("details", signature=signature(object="ssgseaParam"), function(object) { callNextMethod(object) diff --git a/inst/unitTests/test_gsvaRanks.R b/inst/unitTests/test_gsvaRanks.R index 498bde8..d8584cc 100644 --- a/inst/unitTests/test_gsvaRanks.R +++ b/inst/unitTests/test_gsvaRanks.R @@ -23,24 +23,30 @@ test_gsvaRanks <- function() { ## calculate GSVA scores in one step gsva_es1 <- gsva(gsvapar, verbose=FALSE) - ## calculate GSVA scores in two steps - ## first calculate GSVA ranks - gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE) + ## calculate GSVA scores in three steps + ## first calculate row-normalized expression values + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) - ## second calculate GSVA scores using GSVA ranks - gsva_es2 <- gsvaScores(gsvarankspar, verbose=FALSE) + ## second calculate GSVA column ranks + gsvaranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + + ## third calculate GSVA scores from column ranks + gsva_es2 <- gsvaColScores(gsvaranks, verbose=FALSE) ## both approaches to calculate GSVA scores must give ## the same result with the same input gene sets checkEqualsNumeric(gsva_es1, gsva_es2) ## check that gsvaEnrichment() works - geneSets(gsvarankspar) <- c(gsets, set4=c("g1", "g4", "g7")) - gsvaenrich <- gsvaEnrichment(gsvarankspar, plot="no") + gsvaenrich <- gsvaEnrichment(gsvaranks, plot="no") checkEqualsNumeric(gsva_es1[1, 1], gsvaenrich$score) + gsvaenrich2 <- gsvaEnrichment(gsvaranks, + geneSet=c("g1", "g4", "g7"), + plot="no") + checkTrue(!is.na(gsvaenrich2$score)) ## test the ggplotting from gsvaEnrichment() - ggp <- gsvaEnrichment(gsvarankspar, plot="ggplot") + ggp <- gsvaEnrichment(gsvaranks, plot="ggplot") checkTrue(is(ggp, "ggplot")) checkTrue(identical(gsvaenrich$stats, ggp@data)) } diff --git a/inst/unitTests/test_inputdatacontainers.R b/inst/unitTests/test_inputdatacontainers.R index 133b62b..55a64f6 100644 --- a/inst/unitTests/test_inputdatacontainers.R +++ b/inst/unitTests/test_inputdatacontainers.R @@ -79,6 +79,9 @@ test_inputdatacontainers <- function() { out <- capture.output(show(param)) checkTrue(length(out) > 0 && sum(nchar(out)) > 0, "gsvaParam object show method output is empty") + out <- capture.output(details(param)) + checkTrue(length(out) > 0 && sum(nchar(out)) > 0, + "gsvaParam object details method output is empty") es.dgCMat <- gsva(param, verbose=FALSE) gsets.dgCMat <- geneSets(es.dgCMat) diff --git a/man/GsvaExprData-class.Rd b/man/GsvaExprData-class.Rd index 6946e54..60e5188 100644 --- a/man/GsvaExprData-class.Rd +++ b/man/GsvaExprData-class.Rd @@ -16,11 +16,11 @@ of another class as well as defining common methods for all of them. \seealso{ \code{\link{matrix}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, -\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, +\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, -\code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, +\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, \code{\link[HDF5Array:HDF5Array-class]{HDF5Array}} } diff --git a/man/GsvaMethodParam-class.Rd b/man/GsvaMethodParam-class.Rd index 27e42af..61ec897 100644 --- a/man/GsvaMethodParam-class.Rd +++ b/man/GsvaMethodParam-class.Rd @@ -1,14 +1,29 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllClasses.R, R/GsvaMethodParam.R +% Please edit documentation in R/AllClasses.R, R/GsvaMethodParam.R, R/gsva.R, +% R/ssgsea.R \docType{class} \name{GsvaMethodParam-class} \alias{GsvaMethodParam-class} +\alias{details,GsvaMethodParam-method} +\alias{details,gsvaParam-method} +\alias{details,ssgseaParam-method} \title{\code{GsvaMethodParam} class} +\usage{ +\S4method{details}{GsvaMethodParam}(object) + +\S4method{details}{gsvaParam}(object) + +\S4method{details}{ssgseaParam}(object) +} +\arguments{ +\item{object}{An object of class \code{GsvaMethodParam} or one of its subclasses.} +} \description{ Virtual superclass of method parameter classes supported by \code{GSVA}. A virtual superclass of the \code{GSVA} packages' method-specific -parameter classes. +parameter classes. The method 'details()' provides a detailed summary of the +parameter values stored in this class and its subclasses. } \details{ \code{GSVA} implements four single-sample gene set analysis methods: PLAGE, @@ -89,12 +104,12 @@ is(gsvapar, "GsvaMethodParam") } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}}, +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} \code{\link{plageParam}}, \code{\link{zscoreParam}}, \code{\link{ssgseaParam}}, \code{\link{gsvaParam}} diff --git a/man/geneIdsToGeneSetCollection.Rd b/man/geneIdsToGeneSetCollection.Rd index 9bc8d98..ebfe8e1 100644 --- a/man/geneIdsToGeneSetCollection.Rd +++ b/man/geneIdsToGeneSetCollection.Rd @@ -54,7 +54,7 @@ geneIdsToGeneSetCollection(gsets) \seealso{ \code{\link[GSEABase:GeneSetCollection-class]{GeneSetCollection}}, \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, -\code{\link[GSEABase:GeneSet-class]{geneIds}}, +\code{\link[GSEABase:geneIds]{geneIds}}, \code{\link{deduplicateGeneSets}}, \code{\link{guessGeneIdType}}, \code{\link[GSEABase:GeneSet-class]{GeneSet}} diff --git a/man/geneSets.Rd b/man/geneSets.Rd index 8d94088..c59f294 100644 --- a/man/geneSets.Rd +++ b/man/geneSets.Rd @@ -30,9 +30,9 @@ \item{obj}{An object of one of the following classes: \itemize{ \item An expression data object of one of the classes described in -\code{\linkS4class{GsvaExprData}} that is the return value of a call to \code{gsva()}. +\code{\link[=GsvaExprData-class]{GsvaExprData}} that is the return value of a call to \code{gsva()}. \item A parameter object of one of the classes described in -\code{\linkS4class{GsvaMethodParam}} that could be used in a call to \code{gsva()}. +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}} that could be used in a call to \code{gsva()}. }} } \value{ diff --git a/man/gsva.Rd b/man/gsva.Rd index f998718..c966643 100644 --- a/man/gsva.Rd +++ b/man/gsva.Rd @@ -70,7 +70,7 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} @@ -81,18 +81,18 @@ container object of the same type as the input expression data container, except for the fact that enrichment scores are always dense, irrespective of whether the input is sparse, such as in single-cell data. If the input was a base matrix, a \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, a -\code{\link[SparseArray:SVT_SparseArray-class]{SVT_SparseMatrix}}, or a -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}} object, then the output +\code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, or a +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}} object, then the output will be either a base matrix object or a -\code{\link[DelayedArray:DelayedArray-class]{DelayedMatrix}}, with the gene sets +\code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, with the gene sets employed in the calculations stored in an attribute called \code{geneSets} of that object. If the input was an \code{ExpressionSet} object, then the output will be also an \code{ExpressionSet} object with the gene sets employed in the calculations stored in an attribute called \code{geneSets}. If the input was an object of either class \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}}, -or \code{\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}}, +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, +or \code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, then the output will be of the same class, where enrichment scores will be stored in an assay called \code{es} and the gene sets employed in the calculations will be stored in the \code{rowData} slot of the object under the @@ -175,5 +175,5 @@ using singular value decomposition. \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaAnnotation.Rd b/man/gsvaAnnotation.Rd index 09b0ac3..860f4af 100644 --- a/man/gsvaAnnotation.Rd +++ b/man/gsvaAnnotation.Rd @@ -49,7 +49,7 @@ } \arguments{ \item{object}{An expression data object of one of the classes described in -\code{\linkS4class{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not +\code{\link[=GsvaExprData-class]{GsvaExprData}}. Simple \code{matrix} and \code{dgCMatrix} objects are not capable of storing annotation metadata and will return \code{NULL}.} \item{value}{For the replacement methods, the annotation metadata to be diff --git a/man/gsvaEnrichment.Rd b/man/gsvaEnrichment.Rd index 4de6be2..f2b064b 100644 --- a/man/gsvaEnrichment.Rd +++ b/man/gsvaEnrichment.Rd @@ -2,11 +2,11 @@ % Please edit documentation in R/gsva.R \name{gsvaEnrichment} \alias{gsvaEnrichment} -\alias{gsvaEnrichment,gsvaRanksParam-method} +\alias{gsvaEnrichment,GsvaExprData-method} \title{GSVA enrichment data and visualization} \usage{ -\S4method{gsvaEnrichment}{gsvaRanksParam}( - param, +\S4method{gsvaEnrichment}{GsvaExprData}( + rankExprData, column = 1, geneSet = 1, plot = c("auto", "base", "ggplot", "no"), @@ -14,15 +14,20 @@ ) } \arguments{ -\item{param}{A \code{\linkS4class{gsvaRanksParam}} object obtained with the method -\code{\link{gsvaRanks}}.} +\item{rankExprData}{A column-rank expression data set obtained with +\code{\link{gsvaColRanks}}. Must be one of the classes +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its +help page using \code{help(GsvaExprData)}.} \item{column}{The column for which we want to retrieve the enrichment data. This parameter is only available in the \code{gsvaEnrichment()} method.} -\item{geneSet}{Either a positive integer number between 1 and the number of -available gene sets in \code{param}, or a character string with the name of -one of the gene sets available in \code{param}.} +\item{geneSet}{Either a single positive integer number between 1 and the +number of available gene sets in parameter object stored in \code{rankExprData}, +or a single character string with the name of one of the gene sets available +in that object, or a vector of integers or character strings with the index +values or names of rows in \code{rankExprData} that should be considered as the +gene set for which the enrichment data should be retrieved.} \item{plot}{A character string indicating whether an enrichment plot should be produced using either base R graphics (\code{plot="base"}) or the ggplot2 package @@ -59,19 +64,24 @@ geneSets <- list(gset1=paste0("g", 1:3), y <- matrix(rnorm(n*p), nrow=p, ncol=n, dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) -## genes in set1 are expressed at higher levels in the last 'nGrp1+1' to 'n' samples -y[geneSets$set1, (nGrp1+1):n] <- y[geneSets$set1, (nGrp1+1):n] + 2 - ## build GSVA parameter object gsvapar <- gsvaParam(y, geneSets) ## calculate GSVA ranks -gsvarankspar <- gsvaRanks(gsvapar) -gsvarankspar +gsvarownorm <- gsvaRowNorm(gsvapar) +gsvaranks <- gsvaColRanks(gsvarownorm) ## by default the enrichment data for the first column and the first -## gene set are retrieved -gsvaEnrichment(gsvarankspar) +## gene set in the input parameter object, are retrieved +gsvaEnrichment(gsvaranks) + +## we can calculate the enrichment data for any of the gene sets given +## in the input parameter object +gsvaEnrichment(gsvaranks, geneSet="gset2") + +## we can calculate the enrichment data for a new gene set that did not +## form part of the input parameter object +gsvaEnrichment(gsvaranks, geneSet=c("g1", "g4", "g7")) } \references{ diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index 628e67d..1501529 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -6,7 +6,7 @@ \alias{gsvaRanksParam-class} \alias{gsvaParam} \alias{anyNA,gsvaParam-method} -\alias{geneSets<-,gsvaRanksParam,GsvaGeneSets-method} +\alias{geneSets<-,gsvaParam,GsvaGeneSets-method} \alias{geneSets<-} \title{\code{gsvaParam} class} \usage{ @@ -32,15 +32,15 @@ gsvaParam( \S4method{anyNA}{gsvaParam}(x, recursive = FALSE) -\S4method{geneSets}{gsvaRanksParam,GsvaGeneSets}(object) <- value +\S4method{geneSets}{gsvaParam,GsvaGeneSets}(object) <- value } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -163,19 +163,19 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\linkS4class{gsvaParam}}.} +\item{x}{An object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\linkS4class{gsvaParam}}.} +class \code{\link[=gsvaParam-class]{gsvaParam}}.} \item{object}{For the replacement method, an object of class -\code{\linkS4class{gsvaRanksParam}}.} +\code{\link[=gsvaParam-class]{gsvaParam}}.} -\item{value}{For the replacement method, an object of the classes supported by -\code{\linkS4class{GsvaGeneSets}}.} +\item{value}{For the replacement method, an object of the classes supported +by \code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}.} } \value{ -A new \code{\linkS4class{gsvaParam}} object. +A new \code{\link[=gsvaParam-class]{gsvaParam}} object. } \description{ S4 class for GSVA method parameter objects. @@ -294,16 +294,16 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{ssgseaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 42e65e9..8497306 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -43,13 +43,14 @@ \S4method{gsvaColScores}{GsvaExprData}( rankExprData, + geneSets, verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) } \arguments{ -\item{param}{A parameter object of the \code{\linkS4class{gsvaRanksParam}} class.} +\item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} @@ -65,30 +66,34 @@ or terabytes (T), which GSVA will use to attempt bounding the maximum amount of main memory used across all threads of execution to that given quantity. By default \code{maxmem="auto"}, indicating that the maximum memory will be the 90\% of the total main memory, as calculated by -\code{\link[memuse:meminfo]{Sys.meminfo()}}. To avoid setting any bound on the +\code{\link[memuse:Sys.meminfo]{Sys.meminfo()}}. To avoid setting any bound on the maximum memory, use \code{maxmem=Inf}. Note that the amount of main memory used in an R session or script may depend on other commands and packages used in that same session or script.} \item{rowNormExprData}{A row-normalized expression data set obtained with \code{\link{gsvaRowNorm}}. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{rankExprData}{A column-rank expression data set obtained with \code{\link{gsvaColRanks}}. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} + +\item{geneSets}{An object of the classes supported by \code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. +Currently, either a \code{\link[GSEABase:GeneSetCollection-class]{GeneSetCollection}} +object or a \code{list} object.} } \value{ In the case of the \code{gsvaRowNorm()} method, an object of class -\code{\linkS4class{gsvaRanksParam}}. +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. In the case of the \code{gsvaColRanks()} method, an object of class -\code{\linkS4class{gsvaRanksParam}}. +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. In the case of the \code{gsvaRanks()} method, an object of class -\code{\linkS4class{gsvaRanksParam}}. +\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. In the case of the \code{gsvaScores()} method, a gene-set by sample matrix of GSVA enrichment scores stored in a container object of the same type as @@ -160,9 +165,9 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\linkS4class{gsvaParam}}, \code{\linkS4class{gsvaRanksParam}}, \code{\link{gsva}}, +\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}, \code{\link{gsva}}, \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/gsvaRanks_serialization.Rd b/man/gsvaRanks_serialization.Rd index 8eb95d5..abb5133 100644 --- a/man/gsvaRanks_serialization.Rd +++ b/man/gsvaRanks_serialization.Rd @@ -10,7 +10,7 @@ saveHDF5GSVAranks(x, dir, ...) loadHDF5GSVAranks(dir, ...) } \arguments{ -\item{x}{A \code{\linkS4class{gsvaRanksParam}} object to save to disk.} +\item{x}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object to save to disk.} \item{dir}{The path to the directory where to save or load the GSVA ranks data.} @@ -18,7 +18,7 @@ data.} \item{...}{Additional arguments to be passed to the underlying HDF5 saving/loading functions \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{saveHDF5SummarizedExperiment}} -and \code{\link[HDF5Array:saveHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, +and \code{\link[HDF5Array:loadHDF5SummarizedExperiment]{loadHDF5SummarizedExperiment}}, respectively.} } \value{ diff --git a/man/plageParam-class.Rd b/man/plageParam-class.Rd index 5fbf4ed..e85b003 100644 --- a/man/plageParam-class.Rd +++ b/man/plageParam-class.Rd @@ -19,11 +19,11 @@ plageParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\linkS4class{plageParam}} object. +A new \code{\link[=plageParam-class]{plageParam}} object. } \description{ S4 class for PLAGE method parameter objects. @@ -104,12 +104,12 @@ using singular value decomposition. \doi{10.1186/1471-2105-6-225} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } diff --git a/man/ssgseaParam-class.Rd b/man/ssgseaParam-class.Rd index f3a24b0..7db1043 100644 --- a/man/ssgseaParam-class.Rd +++ b/man/ssgseaParam-class.Rd @@ -26,11 +26,11 @@ ssgseaParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -103,13 +103,13 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\linkS4class{ssgseaParam}}.} +\item{x}{An object of class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} \item{recursive}{Not used with \code{x} being an object of -class \code{\linkS4class{ssgseaParam}}.} +class \code{\link[=ssgseaParam-class]{ssgseaParam}}.} } \value{ -A new \code{\linkS4class{ssgseaParam}} object. +A new \code{\link[=ssgseaParam-class]{ssgseaParam}} object. } \description{ S4 class for ssGSEA method parameter objects. @@ -180,16 +180,16 @@ oncogenic KRAS-driven cancers require TBK1. \doi{10.1038/nature08460} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{zscoreParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=zscoreParam-class]{zscoreParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}}, \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } diff --git a/man/zscoreParam-class.Rd b/man/zscoreParam-class.Rd index 2cc8d2f..f8186e4 100644 --- a/man/zscoreParam-class.Rd +++ b/man/zscoreParam-class.Rd @@ -19,11 +19,11 @@ zscoreParam( } \arguments{ \item{exprData}{The expression data set. Must be one of the classes -supported by \code{\linkS4class{GsvaExprData}}. For a list of these classes, see its +supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} \item{geneSets}{The gene sets. Must be one of the classes supported by -\code{\linkS4class{GsvaGeneSets}}. For a list of these classes, see its help page using +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}. For a list of these classes, see its help page using \code{help(GsvaGeneSets)}.} \item{assay}{Character vector of length 1. The name of the assay to use in @@ -63,7 +63,7 @@ decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} } \value{ -A new \code{\linkS4class{zscoreParam}} object. +A new \code{\link[=zscoreParam-class]{zscoreParam}} object. } \description{ S4 class for combined z-scores method parameter objects. @@ -104,12 +104,12 @@ disease classification. \doi{10.1371/journal.pcbi.1000217} } \seealso{ -\code{\linkS4class{GsvaExprData}}, -\code{\linkS4class{GsvaGeneSets}}, -\code{\linkS4class{GsvaMethodParam}}, -\code{\linkS4class{plageParam}}, -\code{\linkS4class{ssgseaParam}}, -\code{\linkS4class{gsvaParam}} +\code{\link[=GsvaExprData-class]{GsvaExprData}}, +\code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}, +\code{\link[=GsvaMethodParam-class]{GsvaMethodParam}}, +\code{\link[=plageParam-class]{plageParam}}, +\code{\link[=ssgseaParam-class]{ssgseaParam}}, +\code{\link[=gsvaParam-class]{gsvaParam}} \code{\link[GSEABase:GeneIdentifierType-class]{GeneIdentifierType}} } diff --git a/vignettes/GSVA.bib b/vignettes/GSVA.bib index ab21a08..ca3fe36 100644 --- a/vignettes/GSVA.bib +++ b/vignettes/GSVA.bib @@ -555,3 +555,12 @@ @article{amezquita2020orchestrating year={2020}, publisher={Nature Publishing Group US New York} } + +@article{lun2022powering, + title={Powering single-cell analyses in the browser with WebAssembly}, + author={Lun, Aaron and Kancherla, Jayaram}, + journal={bioRxiv}, + pages={2022--03}, + year={2022}, + publisher={Cold Spring Harbor Laboratory} +} diff --git a/vignettes/GSVA_proteomics.Rmd b/vignettes/GSVA_proteomics.Rmd index 23a800b..49e30cf 100644 --- a/vignettes/GSVA_proteomics.Rmd +++ b/vignettes/GSVA_proteomics.Rmd @@ -154,12 +154,12 @@ We can force the `gsvaParam()` function to check for missing values irrespective of the input expression data container by setting the argument `checkNA="yes"`, or disable that check altogether with `checkNA="no"`. By default `checkNA="auto"`. Once missing values have been detected when we build the -parameter object, the `gsva()` function (or `gsvaRanks()` and `gsvaScores()`) -will apply a missing data policy specified through a parameter called `use`, -which takes one of the following three possible character string values: -`everything`, `all.obs` or `na.rm`. The first value (`everything`) is the -default value and it propagates the missing `NA` values through the -calculations. +parameter object, the `gsva()` function (or `gsvaRowNorm()`, `gsvaColRanks()` +and `gsvaColScores()`) will apply a missing data policy specified through a +parameter called `use`, which takes one of the following three possible +character string values: `everything`, `all.obs` or `na.rm`. The first value +(`everything`) is the default value and it propagates the missing `NA` values +through the calculations. ```{r} es_gsva_everything <- gsva(gsvapar) diff --git a/vignettes/GSVA_scRNAseq.Rmd b/vignettes/GSVA_scRNAseq.Rmd index 5d5050e..76d3148 100644 --- a/vignettes/GSVA_scRNAseq.Rmd +++ b/vignettes/GSVA_scRNAseq.Rmd @@ -55,26 +55,36 @@ specific support consists of the following features: sparse data containers can be broadly categorized in those that only store the expression values, and those that may store additional row and column metadata. The currently available value-only containers for - input are `dgCMatrix`, `SVT_SparseArray`, and `DelayedMatrix`. + input are `dgCMatrix`, `SVT_SparseArray`, `HDF5Matrix` and `DelayedMatrix`. The currently available container for single-cell data that allows one to input additional row and column metadata is a `SingleCellExperiment` object. * While the input single-cell data is always sparse, the output of enrichment scores will be always dense, and therefore, the container storing those scores will be different from the input data, typically a `matrix` or a - dense `DelayedMatrix` object. The latter will be particularly used when the - total number of values exceeds 2^31, which is the largest 32-bit standard - integer value in R. + dense `DelayedMatrix` object using an `HDF5Matrix` backend. The latter will + be particularly used when the total number of values exceeds 2^31, which is + the largest 32-bit standard integer value in R. * By default, when the input expression data is stored in a sparse data - container, as it typically happens with single-cell data, then a slightly - a slightly modified GSVA algorithm will run, if GSVA is the choice of - algorithm, by which nonzero values are treated differently from zero values, - leading to slightly different results than those obtained by applying the - classical GSVA algorithm. If we set the parameter `sparse=FALSE` in the call - to `gsvaParam()`, the classical GSVA algorithm will be used, which for a + container, as it typically happens with single-cell data, then a sparse + regime of the GSVA algorithm will run, if GSVA is the chosen method, by + which nonzero values are treated differently from zero values, leading to + slightly different results than those obtained by applying the classical + GSVA algorithm. If we set the parameter `sparse=FALSE` in the call to + `gsvaParam()`, the classical GSVA algorithm will be used, which for a typical single-cell data set will result in longer running times and larger memory consumption than running it in the default sparse regime for this type of data. + * The GSVA algorithm can be run either at once through a called to `gsva()` + with a parameter object or in three steps: (1) row normalization with + `gsvaRowNorm()`; (2) column rank transformation with `gsvaColRanks()`; + and (3) column enrichment scores calculation with `gsvaColScores()`. + Splitting the GSVA algorithm into these three steps allows one to distribute + and balance the computational load of the algorithm in a high-performance + computing (HPC) environment with multiple nodes, and to reuse the output of + the first two steps, which are independent of the gene sets, to calculate + enrichment scores for different collections of gene sets, without having to + repeat the first two steps. In what follows, we will illustrate the use of GSVA on a publicly available single-cell transcriptomics data set of peripheral blood mononuclear cells @@ -83,8 +93,7 @@ single-cell transcriptomics data set of peripheral blood mononuclear cells # Import data We import the PBMC data using the `r Biocpkg("TENxPBMCData")` package, as a -`SingleCellExperiment` object, defined in the -`r Biocpkg("SingleCellExperiment")` package. +`r Biocpkg("SingleCellExperiment")` object. ```{r, message=FALSE, warning=FALSE} @@ -95,10 +104,10 @@ sce <- TENxPBMCData(dataset="pbmc4k") sce ``` -# Quality assessment and pre-processing +# Quality control and pre-processing -Here, we perform a quality assessment and pre-processing steps using the -package `r Biocpkg("scuttle")` [@mccarthy2017scater]. We start identifying +Here, we perform a quality control (QC) and pre-processing steps using the +package `r Biocpkg("scrapper")` [@lun2022powering]. We start identifying mitochondrial genes. ```{r, message=FALSE, warning=FALSE} @@ -107,28 +116,18 @@ library(scrapper) is_mito <- grepl("^MT-", rowData(sce)$Symbol_TENx) table(is_mito) ``` -Calculate quality control (QC) metrics and filter out low-quality cells. +Calculate QC metrics and filter out low-quality cells. ```{r} sce <- quickRnaQc.se(sce, subsets=list(mito=is_mito)) sce <- sce[, sce$keep] dim(sce) ``` -Figure \@ref(fig:cntxgene) below shows the empirical cumulative distribution of -counts per gene in logarithmic scale. - -```{r cntxgene, fig.width=5, fig.height=5, out.width="600px", fig.cap="Empirical cumulative distribution of UMI counts per gene. The red vertical bar indicates a cutoff value of 100 UMI counts per gene across all cells, below which genes will be filtered out."} -cntxgene <- rowSums(assays(sce)$counts)+1 -plot.ecdf(cntxgene, xaxt="n", panel.first=grid(), xlab="UMI counts per gene", - log="x", main="", xlim=c(1, 1e5), las=1) -axis(1, at=10^(0:5), labels=10^(0:5)) -abline(v=100, lwd=2, col="red") -``` -We filter out lowly-expressed genes, by selecting those with at least 100 UMI -counts across all cells for downstream analysis. +We filter out genes that are expressed in less than 1% of the cells. ```{r} -sce <- sce[cntxgene >= 100, ] +cellsxgene <- rowSums(counts(sce) > 0) +sce <- sce[cellsxgene > floor(ncol(sce)*0.01), ] dim(sce) ``` Calculate library size factors and normalized units of expression in @@ -141,7 +140,8 @@ assayNames(sce) # Annotate cell types using GSVA -Here, we illustrate how to annotate cell types in the PBMC data using GSVA. +Here, we illustrate how to annotate cell types in the PBMC data using GSVA and +a collection of relevant gene sets. ## Read gene sets in GMT format @@ -157,8 +157,10 @@ LM22 signature is stored in the `r Biocpkg("GSVAdata")` experiment data package as a compressed text file in [GMT format](https://www.genepattern.org/file-formats-guide/#GMT), which can be read into R using the `readGMT()` function from the `r Biocpkg("GSVA")` -package, and will return the gene sets into a `GeneSetCollection` object, -defined in the `r Biocpkg("GSEABase")` package. +package, which will return the gene sets, by default, into a +`GeneSetCollection` object, defined in the `r Biocpkg("GSEABase")` package. +This default argument can be changed to return the gene sets into a base +`list` object by setting `valueType="list"` in the call to `readGMT()`. ```{r, message=FALSE, warning=FALSE} library(GSEABase) @@ -197,7 +199,7 @@ gsvaAnnotation(sce) ## Build parameter object We first build a parameter object using the function `gsvaParam()`. By -default, the expression values in the `logocounts` assay will be selected for +default, the expression values in the `logcounts` assay will be selected for downstream analysis. ```{r} @@ -208,28 +210,43 @@ gsvapar ## Calculate GSVA scores While at this point, we could already run the entire GSVA algorithm with a call -to the `gsva(gsvapar)` function. We show here how to do it in two steps. -First we calculate GSVA rank values using the function `gsvaRanks()`. +to the `gsva(gsvapar)` function. We show here how to do it in three steps. +First we calculate row-normalized expression values using the function +`gsvaRowNorm()`, which if, as in this example, the given input is a +`SingleCellExperiment` object, then the output will be the same object with +an additional assay called `gsvarownr` containing the row-normalized expression +values. ```{r} -gsvaranks <- gsvaRanks(gsvapar) -gsvaranks +gsvarownorm <- gsvaRowNorm(gsvapar) +gsvarownorm +assayNames(gsvarownorm) ``` +Second, we calculate GSVA column rank values using the function +`gsvaColRanks()`, which takes as input the output of `gsvaRowNorm()`, and returns +the column rank values in a new assay called `gsvaranks`, if the input is a +`SingleCellExperiment` object. -Second, we calculate the GSVA scores using the output of `gsvaRanks()` as input -to the function `gsvaScores()`. By default, this function will calculate the -scores for all gene sets specified in the input parameter object. +```{r} +gsvacolranks <- gsvaColRanks(gsvarownorm) +gsvacolranks +assayNames(gsvacolranks) +``` +Third, we finally calculate the GSVA scores using the output of +`gsvaColRanks()` as input to the function `gsvaColScores()`. By default, this +function will calculate the scores for all gene sets specified in the input +parameter object given in the call to `gsvaRowNorm()`. ```{r} -es <- gsvaScores(gsvaranks) +es <- gsvaColScores(gsvacolranks) es ``` -However, we could calculate the scores for another collection of gene sets by -updating them in the `gsvaranks` object as follows. +However, we could calculate the scores for another collection of gene sets, +without having to calculate the column ranks again, by giving this other +collection of gene sets as second argument to the call to `gsvaColScores()`. ```{r, eval=FALSE} -geneSets(gsvaranks) <- geneSets(gsvapar)[1:2] -es2 <- gsvaScores(gsvaranks) +es2 <- gsvaColScores(gsvacolranks, alternative_gsets) ``` ## Using GSVA scores to assign cell types @@ -238,11 +255,10 @@ Following @amezquita2020orchestrating, and some of the steps described in "Chapter 5 Clustering" of the first version of the [OSCA book](https://bioconductor.org/books/3.16/OSCA.basic/clustering.html), we use GSVA scores to build a nearest-neighbor graph of the cells using the -function `buildSNNGraph()` from the `r Biocpkg("scran")` -package [@lun2016step]. The parameter `k` in the call to `buildSNNGraph()` -specifies the number of nearest neighbors to consider during graph -construction, and here we set `k=20` because it leads to a number of clusters -close to the expected number of cell types. +function `makeSNNGraph()` from the `r Biocpkg("bluster")` package. The +parameter `k` in the call to `makeSNNGraph()` specifies the number of nearest +neighbors to consider during graph construction, and here we set `k=20` because +it leads to a number of clusters close to the expected number of cell types. ```{r, message=FALSE, warning=FALSE} library(bluster) @@ -290,19 +306,22 @@ plot(res$rotation[, 1], res$rotation[, 2], col=hmcol[colLabels(es)], pch=19, xlab=sprintf("PCA 1 (%.0f%%)", varexp[1]*100), ylab=sprintf("PCA 2 (%.0f%%)", varexp[2]*100), las=1, cex.axis=1.2, cex.lab=1.5) -legend("topright", gsub("_", " ", levels(colLabels(es))), fill=hmcol, inset=0.01) +mask <- colLabels(es) == "NK_CELLS_RESTING" +points(res$rotation[mask, 1], res$rotation[mask, 2], ## show the overlap better + col=hmcol[colLabels(es)[mask]], pch=19) +legend("bottomright", gsub("_", " ", levels(colLabels(es))), fill=hmcol, inset=0.01) ``` Finally, if we want to better understand why a specific cell type is annotated to a given cell, we can use the `gsvaEnrichment()` function, which will show a GSEA enrichment plot. This function takes as input the output of -`gsvaRanks()`, a given column (cell) in the input singl-cell data, and a given +`gsvaRanks()`, a given column (cell) in the input single-cell data, and a given gene set. In Figure \@ref(fig:gsvaenrichment) below, we show such a plot for the -first cell annotated to the eosinophil cell type. +first cell annotated to the monocytes cell type. ```{r gsvaenrichment, echo=TRUE, fig.height=5, fig.width=5, out.width="600px", fig.cap="GSVA enrichment plot of the EOSINOPHILS gene set in the expression profile of the first cell annotated to that cell type."} -firsteosinophilcell <- which(colLabels(es) == "EOSINOPHILS")[1] +firstmonocytecell <- which(colLabels(es) == "MONOCYTES")[1] par(mar=c(4, 5, 1, 1)) -gsvaEnrichment(gsvaranks, column=firsteosinophilcell, geneSet="EOSINOPHILS", +gsvaEnrichment(gsvacolranks, column=firstmonocytecell, geneSet="MONOCYTES", cex.axis=1.2, cex.lab=1.5, plot="ggplot") ``` In the previous call to `gsvaEnrichment()` we used the argument `plot="ggplot"` From 4ee72889ffa303f92a95667f73eabf499131d9b4 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Sun, 24 May 2026 21:55:26 +0200 Subject: [PATCH 05/12] Added deprecation of gsvaRanks() and gsvaScores() --- .covrignore | 2 + R/GSVA-pkg-deprecated.R | 192 ++++++++++++++++++++++++++ R/gsva.R | 248 ++++------------------------------ inst/unitTests/test_mainAPI.R | 6 +- man/GSVA-pkg-deprecated.Rd | 28 ++++ man/gsva.Rd | 6 +- man/gsvaEnrichment.Rd | 5 +- man/gsvaParam-class.Rd | 1 + man/gsvaRanks.Rd | 72 ++++------ 9 files changed, 288 insertions(+), 272 deletions(-) diff --git a/.covrignore b/.covrignore index 3bc3bc1..bd6d468 100644 --- a/.covrignore +++ b/.covrignore @@ -1,3 +1,5 @@ inst/* +R/GSVA-pkg-deprecated.R +R/GSVA-pkg-defunct.R R/methods-ShinyApp.R R/zzz.R diff --git a/R/GSVA-pkg-deprecated.R b/R/GSVA-pkg-deprecated.R index a5a187d..c0b0f1e 100644 --- a/R/GSVA-pkg-deprecated.R +++ b/R/GSVA-pkg-deprecated.R @@ -6,3 +6,195 @@ #' @name GSVA-pkg-deprecated #' @keywords internal NULL + +#' @description The `gsvaRanks()` method is deprecated. Please use `gsvaRowNorm()` +#' and `gsvaColRanks()` instead. +#' +#' @aliases gsvaRanks,gsvaParam-method +#' @name gsvaRanks +#' @rdname GSVA-pkg-deprecated +#' +#' @importFrom cli cli_alert_info cli_alert_success +#' @exportMethod gsvaRanks +setMethod("gsvaRanks", signature(param="gsvaParam"), + function(param, + verbose=TRUE, + BPPARAM=SerialParam(progressbar=verbose), + maxmem="auto") { + + .Deprecated(new="gsvaRowNorm() and gsvaColRanks()", + package="GSVA", + msg=paste("The 'gsvaRanks()' method is deprecated.", + "Please use 'gsvaRowNorm()' and", + "'gsvaColRanks()'.")) + + if (verbose && gsva_global$show_start_and_end_messages) { + pkgversion <- packageDescription("GSVA")[["Version"]] + cli_alert_info("GSVA version {pkgversion}") + } + + .check_bpparam(BPPARAM) + + exprData <- get_exprData(param) + dataMatrix <- unwrapData(exprData, get_assay(param)) + maxmem <- .check_maxmem(param, maxmem, verbose) + ondisk <- .check_ondisk(param, maxmem, verbose) + + dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", + ondisk, verbose) + + filtDataMatrix <- dataMatrix + BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, + minparrows=100, minparcols=100, + verbose) + + if (.get_filterRows(param)) + filtDataMatrix <- .filterGenes(dataMatrix, anyNA(param), + removeConstant=TRUE, + removeNzConstant=TRUE, + verbose, BPPARAM=BPPARAM, + maxmem=maxmem) + else if (verbose) { + msg <- "Skipping filtering of constant rows (filterRows=FALSE)" + cli_alert_warning(msg) + } + + if (verbose) + cli_alert_info(sprintf("Calculating GSVA ranks")) + + kcdfminssize <- .get_kcdfNoneMinSampleSize(param) + gsvarownr <- .compute_row_norm(expr=filtDataMatrix, + kcdf=.get_kcdf(param), + kcdf.min.ssize=kcdfminssize, + sparse=.get_sparse(param), + any_na=anyNA(param), + na_use=.get_NAuse(param), + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + gsvarnks <- .compute_gsva_ranks(Z=gsvarownr, + verbose=verbose, + BPPARAM=BPPARAM, + maxmem=maxmem) + + rownames(gsvarnks) <- rownames(filtDataMatrix) + colnames(gsvarnks) <- colnames(filtDataMatrix) + + rnkscontainer <- wrapData(get_exprData(param), gsvarnks, param, + "gsvaranks") + rval <- new("gsvaRanksParam", + exprData=rnkscontainer, geneSets=get_geneSets(param), + assay="gsvaranks", annotation=get_annotation(param), + minSize=get_minSize(param), maxSize=get_maxSize(param), + kcdf=.get_kcdf(param), + kcdfNoneMinSampleSize=.get_kcdfNoneMinSampleSize(param), + tau=.get_tau(param), maxDiff=.get_maxDiff(param), + absRanking=.get_absRanking(param), + sparse=.get_sparse(param), checkNA=.get_checkNA(param), + didCheckNA=.get_didCheckNA(param), anyNA=anyNA(param), + use=.get_NAuse(param), filterRows=.get_filterRows(param), + nzcount=nzcount(param), ondisk=.get_ondisk(param)) + + if (verbose && gsva_global$show_start_and_end_messages) + cli_alert_success("Calculations finished") + + return(rval) + }) + + +#' @description The `gsvaScores()` method is deprecated. Please use +#' `gsvaColScores()` instead. +#' +#' @param param A parameter object of the [`gsvaRanksParam-class`] class. +#' +#' @aliases gsvaScores,gsvaRanksParam-method +#' @name gsvaScores +#' @rdname GSVA-pkg-deprecated +#' +#' @importFrom S4Arrays is_sparse +#' @importFrom cli cli_alert_info cli_alert_success +#' @exportMethod gsvaScores +setMethod("gsvaScores", signature(param="gsvaRanksParam"), + function(param, verbose=TRUE, + BPPARAM=SerialParam(progressbar=verbose), + maxmem="auto") { + + .Deprecated(new="gsvaColScores()", + package="GSVA", + msg=paste("The 'gsvaScores()' method is deprecated.", + "Please use 'gsvaColScores()'.")) + + if (verbose && gsva_global$show_start_and_end_messages) { + pkgversion <- packageDescription("GSVA")[["Version"]] + cli_alert_info("GSVA version {pkgversion}") + } + + .check_bpparam(BPPARAM) + + ## assuming rows in the rank data have been already filtered + exprData <- get_exprData(param) + filtDataMatrix <- unwrapData(exprData, get_assay(param)) + + filtMappedGeneSets <- .filterAndMapGeneSets(param=param, + filteredDataMatrix=filtDataMatrix, + verbose=verbose) + + sparse <- .get_sparse(param) + if (sparse && !is_sparse(filtDataMatrix)) + sparse <- FALSE + + if (verbose) { + if (sparse) + cli_alert_info("GSVA sparse algorithm") + else + cli_alert_info("GSVA dense (classical) algorithm") + } + + maxmem <- .check_maxmem(param, maxmem, verbose) + ondisk <- .check_ondisk(param, maxmem, verbose) + + filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, + "GSVA", ondisk, + verbose) + + BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, + minparrows=100, minparcols=100, + verbose) + + ondisk <- .check_es_memory_requirements(filtDataMatrix, + filtMappedGeneSets, + ondisk, maxmem) + if (verbose) { + n <- length(filtMappedGeneSets) + cli_alert_info("Calculating GSVA scores for {n} gene sets") + } + + gsva_es <- .processMatrixCols(filtDataMatrix, + FUN=.compute_gsva_scores, + geneSetsIdx=filtMappedGeneSets, + tau=.get_tau(param), + maxDiff=.get_maxDiff(param), + absRanking=.get_absRanking(param), + sparse=sparse, any_na=anyNA(param), + na_use=.get_NAuse(param), + minSize=get_minSize(param), + ondisk=ondisk, verbose=verbose, + minparrows=100, minparcols=100, + BPPARAM=BPPARAM, + maxmem=ceiling(maxmem/100)) ## use + ## of memory increases here about + ## 10-fold over block size memory + + rownames(gsva_es) <- names(filtMappedGeneSets) + colnames(gsva_es) <- colnames(filtDataMatrix) + + gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, + names=rownames(filtDataMatrix)) + rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) + + if (verbose && gsva_global$show_start_and_end_messages) + cli_alert_success("Calculations finished") + + return(rval) + }) diff --git a/R/gsva.R b/R/gsva.R index 5bf60a9..55e7a21 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -60,16 +60,9 @@ #' calculations will be stored in the `rowData` slot of the object under the #' column name `gs`. #' -#' @seealso [`plageParam`], [`zscoreParam`], [`ssgseaParam`], [`gsvaParam`], +#' @seealso [`gsvaParam`], [`plageParam`], [`zscoreParam`], [`ssgseaParam`], #' [`BiocParallelParam`][BiocParallel::BiocParallelParam-class], -#' [`dgCMatrix`][Matrix::dgCMatrix-class], -#' \code{\link[Biobase]{ExpressionSet}}, -### we are using the plain Rd above because -### #' [`ExpressionSet`][Biobase::ExpressionSet-class], -### results in the following R CMD check NOTE: -### Non-topic package-anchored link(s) in Rd file 'gsva.Rd': -### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ -#' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class] +#' [`gsvaRowNorm`], [`gsvaColRanks`], [`gsvaColScores`] #' #' @aliases gsva #' @name gsva @@ -329,6 +322,7 @@ setMethod("gsva", signature(param="gsvaParam"), ### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ #' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment-class], #' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class] +#' [`SpatialExperiment`][SpatialExperiment::SpatialExperiment-class] #' #' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set #' variation analysis for microarray and RNA-Seq data. @@ -735,16 +729,8 @@ setMethod("details", #' in an R session or script may depend on other commands and packages used in #' that same session or script. #' -#' @seealso [`gsvaParam-class`], [`gsvaRanksParam-class`], [`gsva`], +#' @seealso [`gsvaParam-class`], [`gsva`], [`gsvaEnrichment`], #' [`BiocParallelParam`][BiocParallel::BiocParallelParam-class], -#' [`dgCMatrix`][Matrix::dgCMatrix-class], -#' \code{\link[Biobase]{ExpressionSet}}, -### we are using the plain Rd above because -### #' [`ExpressionSet`][Biobase::ExpressionSet-class], -### results in the following R CMD check NOTE: -### Non-topic package-anchored link(s) in Rd file 'gsvaRanks.Rd': -### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ -#' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class] #' #' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set #' variation analysis for microarray and RNA-Seq data. @@ -792,8 +778,14 @@ setMethod("details", #' ## geneSets(gsvarankspar) <- geneSets2 #' ## gsvaScores(gsvarankspar) #' -#' @return In the case of the `gsvaRowNorm()` method, an object of class -#' [`gsvaRanksParam-class`]. +#' @return In the case of 'gsvaRowNorm()', an object of the same class as the +#' input expresssion data given in the argument `exprData` of the `gsvaParam` +#' object, containing the row-normalized expression values. The resulting +#' object will have metadata with a copy of the input `gsvaParam` object, +#' except for the `exprData` slot, and in the case of being a derivative of a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object, +#' an additional assay called "gsvarownr" storing the row-normalized expression +#' values. #' #' @aliases gsvaRowNorm,gsvaParam-method #' @name gsvaRowNorm @@ -872,8 +864,13 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), #' supported by [`GsvaExprData-class`]. For a list of these classes, see its #' help page using `help(GsvaExprData)`. #' -#' @return In the case of the `gsvaColRanks()` method, an object of class -#' [`gsvaRanksParam-class`]. +#' @return In the case of 'gsvaColRanks()', an object of the same class as the +#' input expresssion data given in the argument `exprData` of the `gsvaParam` +#' object, containing the column rank values. The resulting object will have +#' metadata with a copy of the input `gsvaParam` object, except for the +#' `exprData` slot, and in the case of being a derivative of a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object, +#' an additional assay called "gsvaranks" storing the column rank values. #' #' @aliases gsvaColRanks,GsvaExprData-method #' @name gsvaColRanks @@ -921,98 +918,7 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), }) -#' -#' @return In the case of the `gsvaRanks()` method, an object of class -#' [`gsvaRanksParam-class`]. -#' -#' @aliases gsvaRanks,gsvaParam-method -#' @name gsvaRanks -#' @rdname gsvaRanks -#' -#' @importFrom cli cli_alert_info cli_alert_success -#' @exportMethod gsvaRanks -setMethod("gsvaRanks", signature(param="gsvaParam"), - function(param, - verbose=TRUE, - BPPARAM=SerialParam(progressbar=verbose), - maxmem="auto") { - - if (verbose && gsva_global$show_start_and_end_messages) { - pkgversion <- packageDescription("GSVA")[["Version"]] - cli_alert_info("GSVA version {pkgversion}") - } - - .check_bpparam(BPPARAM) - - exprData <- get_exprData(param) - dataMatrix <- unwrapData(exprData, get_assay(param)) - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) - - dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", - ondisk, verbose) - - filtDataMatrix <- dataMatrix - BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, - minparrows=100, minparcols=100, - verbose) - - if (.get_filterRows(param)) - filtDataMatrix <- .filterGenes(dataMatrix, anyNA(param), - removeConstant=TRUE, - removeNzConstant=TRUE, - verbose, BPPARAM=BPPARAM, - maxmem=maxmem) - else if (verbose) { - msg <- "Skipping filtering of constant rows (filterRows=FALSE)" - cli_alert_warning(msg) - } - - if (verbose) - cli_alert_info(sprintf("Calculating GSVA ranks")) - - kcdfminssize <- .get_kcdfNoneMinSampleSize(param) - gsvarownr <- .compute_row_norm(expr=filtDataMatrix, - kcdf=.get_kcdf(param), - kcdf.min.ssize=kcdfminssize, - sparse=.get_sparse(param), - any_na=anyNA(param), - na_use=.get_NAuse(param), - verbose=verbose, - BPPARAM=BPPARAM, - maxmem=maxmem) - - gsvarnks <- .compute_gsva_ranks(Z=gsvarownr, - verbose=verbose, - BPPARAM=BPPARAM, - maxmem=maxmem) - - rownames(gsvarnks) <- rownames(filtDataMatrix) - colnames(gsvarnks) <- colnames(filtDataMatrix) - - rnkscontainer <- wrapData(get_exprData(param), gsvarnks, param, - "gsvaranks") - rval <- new("gsvaRanksParam", - exprData=rnkscontainer, geneSets=get_geneSets(param), - assay="gsvaranks", annotation=get_annotation(param), - minSize=get_minSize(param), maxSize=get_maxSize(param), - kcdf=.get_kcdf(param), - kcdfNoneMinSampleSize=.get_kcdfNoneMinSampleSize(param), - tau=.get_tau(param), maxDiff=.get_maxDiff(param), - absRanking=.get_absRanking(param), - sparse=.get_sparse(param), checkNA=.get_checkNA(param), - didCheckNA=.get_didCheckNA(param), anyNA=anyNA(param), - use=.get_NAuse(param), filterRows=.get_filterRows(param), - nzcount=nzcount(param), ondisk=.get_ondisk(param)) - - if (verbose && gsva_global$show_start_and_end_messages) - cli_alert_success("Calculations finished") - - return(rval) - }) - - -## ----- setters for gsvaRanksParam ----- +## ----- setters for gsvaParam ----- #' @param object For the replacement method, an object of class #' [`gsvaParam-class`]. @@ -1031,109 +937,6 @@ setReplaceMethod("geneSets", signature=signature(object="gsvaParam", object }) -#' @param param A parameter object of the [`gsvaRanksParam-class`] class. -#' -#' @return In the case of the `gsvaScores()` method, a gene-set by sample matrix -#' of GSVA enrichment scores stored in a container object of the same type as -#' the input ranks data container. If -#' the input was a base matrix or a `dgCMatrix` object, then the output will -#' be a base matrix object with the gene sets employed in the calculations -#' stored in an attribute called `geneSets`. If the input was an -#' `ExpressionSet` object, then the output will be also an `ExpressionSet` -#' object with the gene sets employed in the calculations stored in an -#' attributed called `geneSets`. If the input was an object of one of the -#' classes described in [`GsvaExprData`], such as a `SingleCellExperiment`, -#' then the output will be of the same class, where enrichment scores will be -#' stored in an assay called `es` and the gene sets employed in the -#' calculations will be stored in the `rowData` slot of the object under the -#' column name `gs`. -#' -#' @aliases gsvaScores,gsvaRanksParam-method -#' @name gsvaScores -#' @rdname gsvaRanks -#' -#' @importFrom S4Arrays is_sparse -#' @importFrom cli cli_alert_info cli_alert_success -#' @exportMethod gsvaScores -setMethod("gsvaScores", signature(param="gsvaRanksParam"), - function(param, verbose=TRUE, - BPPARAM=SerialParam(progressbar=verbose), - maxmem="auto") { - - if (verbose && gsva_global$show_start_and_end_messages) { - pkgversion <- packageDescription("GSVA")[["Version"]] - cli_alert_info("GSVA version {pkgversion}") - } - - .check_bpparam(BPPARAM) - - ## assuming rows in the rank data have been already filtered - exprData <- get_exprData(param) - filtDataMatrix <- unwrapData(exprData, get_assay(param)) - - filtMappedGeneSets <- .filterAndMapGeneSets(param=param, - filteredDataMatrix=filtDataMatrix, - verbose=verbose) - - sparse <- .get_sparse(param) - if (sparse && !is_sparse(filtDataMatrix)) - sparse <- FALSE - - if (verbose) { - if (sparse) - cli_alert_info("GSVA sparse algorithm") - else - cli_alert_info("GSVA dense (classical) algorithm") - } - - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) - - filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "GSVA", ondisk, - verbose) - - BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, - minparrows=100, minparcols=100, - verbose) - - ondisk <- .check_es_memory_requirements(filtDataMatrix, - filtMappedGeneSets, - ondisk, maxmem) - if (verbose) { - n <- length(filtMappedGeneSets) - cli_alert_info("Calculating GSVA scores for {n} gene sets") - } - - gsva_es <- .processMatrixCols(filtDataMatrix, - FUN=.compute_gsva_scores, - geneSetsIdx=filtMappedGeneSets, - tau=.get_tau(param), - maxDiff=.get_maxDiff(param), - absRanking=.get_absRanking(param), - sparse=sparse, any_na=anyNA(param), - na_use=.get_NAuse(param), - minSize=get_minSize(param), - ondisk=ondisk, verbose=verbose, - minparrows=100, minparcols=100, - BPPARAM=BPPARAM, - maxmem=ceiling(maxmem/100)) ## use - ## of memory increases here about - ## 10-fold over block size memory - - rownames(gsva_es) <- names(filtMappedGeneSets) - colnames(gsva_es) <- colnames(filtDataMatrix) - - gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, - names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) - - if (verbose && gsva_global$show_start_and_end_messages) - cli_alert_success("Calculations finished") - - return(rval) - }) - #' @param rankExprData A column-rank expression data set obtained with #' [`gsvaColRanks`]. Must be one of the classes #' supported by [`GsvaExprData-class`]. For a list of these classes, see its @@ -1143,6 +946,13 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), #' Currently, either a [`GeneSetCollection`][GSEABase::GeneSetCollection-class] #' object or a `list` object. #' +#' @return In the case of 'gsvaColScores()', an object of the same class as the +#' input expression data given in the argument `exprData` of the `gsvaParam` +#' object, containing the enrichment scores for the given gene sets. Note that +#' while it will have the same columns as the input expression data, the rows +#' will correspond to the gene sets for which the enrichment scores were +#' calculated. +#' #' @aliases gsvaColScores,GsvaExprData-method #' @name gsvaColScores #' @rdname gsvaRanks @@ -1244,7 +1054,7 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), #' @description Extract and plot enrichment data from GSVA scores. #' #' @param rankExprData A column-rank expression data set obtained with -#' [`gsvaColRanks`]. Must be one of the classes +#' [`gsvaColRanks`]. Must be one of the classes #' supported by [`GsvaExprData-class`]. For a list of these classes, see its #' help page using `help(GsvaExprData)`. #' @@ -1273,6 +1083,8 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), #' `plot="ggplot"`, this method returns a `ggplot` object. When `plot="base"` #' no value is returned. #' +#' @seealso [`gsvaColRanks`], [`GsvaExprData-class`] +#' #' @aliases gsvaEnrichment,GsvaExprData-method #' @name gsvaEnrichment #' @rdname gsvaEnrichment diff --git a/inst/unitTests/test_mainAPI.R b/inst/unitTests/test_mainAPI.R index ffc61d2..aca4ddb 100644 --- a/inst/unitTests/test_mainAPI.R +++ b/inst/unitTests/test_mainAPI.R @@ -14,10 +14,10 @@ test_mainAPI <- function() { gs <- replicate(ngs, sample(rownames(xf), 25, replace=FALSE), simplify=FALSE) names(gs) <- paste0("gs", seq_len(ngs)) - checkException(g <- gsvaRanks(gsvaParam(exprData=xf, geneSets=gs), - verbose=FALSE, maxmem=c(1, 2))) + checkException(g <- gsvaRowNorm(gsvaParam(exprData=xf, geneSets=gs), + verbose=FALSE, maxmem=c(1, 2))) - g <- gsvaRanks(gsvaParam(xf, gs), verbose=FALSE, maxmem="1M") + g <- gsvaRowNorm(gsvaParam(xf, gs), verbose=FALSE, maxmem="1M") ## check discarding rows with constant values library(cli) diff --git a/man/GSVA-pkg-deprecated.Rd b/man/GSVA-pkg-deprecated.Rd index 259ac1c..b92064f 100644 --- a/man/GSVA-pkg-deprecated.Rd +++ b/man/GSVA-pkg-deprecated.Rd @@ -2,10 +2,38 @@ % Please edit documentation in R/GSVA-pkg-deprecated.R \name{GSVA-pkg-deprecated} \alias{GSVA-pkg-deprecated} +\alias{gsvaRanks} +\alias{gsvaRanks,gsvaParam-method} +\alias{gsvaScores} +\alias{gsvaScores,gsvaRanksParam-method} \title{Deprecated functions in package \code{GSVA}.} +\usage{ +\S4method{gsvaRanks}{gsvaParam}( + param, + verbose = TRUE, + BPPARAM = SerialParam(progressbar = verbose), + maxmem = "auto" +) + +\S4method{gsvaScores}{gsvaRanksParam}( + param, + verbose = TRUE, + BPPARAM = SerialParam(progressbar = verbose), + maxmem = "auto" +) +} +\arguments{ +\item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} +} \description{ The functions listed below are deprecated and will be defunct in the near future. When possible, alternative functions with similar functionality are also mentioned. + +The \code{gsvaRanks()} method is deprecated. Please use \code{gsvaRowNorm()} +and \code{gsvaColRanks()} instead. + +The \code{gsvaScores()} method is deprecated. Please use +\code{gsvaColScores()} instead. } \keyword{internal} diff --git a/man/gsva.Rd b/man/gsva.Rd index c966643..3f5a5b9 100644 --- a/man/gsva.Rd +++ b/man/gsva.Rd @@ -171,9 +171,7 @@ using singular value decomposition. \doi{10.1186/1471-2105-6-225} } \seealso{ -\code{\link{plageParam}}, \code{\link{zscoreParam}}, \code{\link{ssgseaParam}}, \code{\link{gsvaParam}}, +\code{\link{gsvaParam}}, \code{\link{plageParam}}, \code{\link{zscoreParam}}, \code{\link{ssgseaParam}}, \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, -\code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, -\code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link{gsvaRowNorm}}, \code{\link{gsvaColRanks}}, \code{\link{gsvaColScores}} } diff --git a/man/gsvaEnrichment.Rd b/man/gsvaEnrichment.Rd index f2b064b..1a1c1ce 100644 --- a/man/gsvaEnrichment.Rd +++ b/man/gsvaEnrichment.Rd @@ -15,7 +15,7 @@ } \arguments{ \item{rankExprData}{A column-rank expression data set obtained with -\code{\link{gsvaColRanks}}. Must be one of the classes +\code{\link{gsvaColRanks}}. Must be one of the classes supported by \code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page using \code{help(GsvaExprData)}.} @@ -90,3 +90,6 @@ variation analysis for microarray and RNA-Seq data. \emph{BMC Bioinformatics}, 14:7, 2013. \doi{10.1186/1471-2105-14-7} } +\seealso{ +\code{\link{gsvaColRanks}}, \code{\link[=GsvaExprData-class]{GsvaExprData}} +} diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index 1501529..f96d499 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -306,4 +306,5 @@ variation analysis for microarray and RNA-Seq data. \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, \code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}} } diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 8497306..c61d73b 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -5,10 +5,6 @@ \alias{gsvaRowNorm,gsvaParam-method} \alias{gsvaColRanks} \alias{gsvaColRanks,GsvaExprData-method} -\alias{gsvaRanks} -\alias{gsvaRanks,gsvaParam-method} -\alias{gsvaScores} -\alias{gsvaScores,gsvaRanksParam-method} \alias{gsvaColScores} \alias{gsvaColScores,GsvaExprData-method} \title{GSVA ranks and scores} @@ -27,20 +23,6 @@ maxmem = "auto" ) -\S4method{gsvaRanks}{gsvaParam}( - param, - verbose = TRUE, - BPPARAM = SerialParam(progressbar = verbose), - maxmem = "auto" -) - -\S4method{gsvaScores}{gsvaRanksParam}( - param, - verbose = TRUE, - BPPARAM = SerialParam(progressbar = verbose), - maxmem = "auto" -) - \S4method{gsvaColScores}{GsvaExprData}( rankExprData, geneSets, @@ -50,7 +32,8 @@ ) } \arguments{ -\item{param}{A parameter object of the \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} class.} +\item{param}{A \code{\link[=gsvaParam-class]{gsvaParam}} object built using the constructor +function \code{\link{gsvaParam}}.} \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} @@ -86,29 +69,29 @@ Currently, either a \code{\link[GSEABase:GeneSetCollection-class]{GeneSetCollect object or a \code{list} object.} } \value{ -In the case of the \code{gsvaRowNorm()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. - -In the case of the \code{gsvaColRanks()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. - -In the case of the \code{gsvaRanks()} method, an object of class -\code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}. - -In the case of the \code{gsvaScores()} method, a gene-set by sample matrix -of GSVA enrichment scores stored in a container object of the same type as -the input ranks data container. If -the input was a base matrix or a \code{dgCMatrix} object, then the output will -be a base matrix object with the gene sets employed in the calculations -stored in an attribute called \code{geneSets}. If the input was an -\code{ExpressionSet} object, then the output will be also an \code{ExpressionSet} -object with the gene sets employed in the calculations stored in an -attributed called \code{geneSets}. If the input was an object of one of the -classes described in \code{\link{GsvaExprData}}, such as a \code{SingleCellExperiment}, -then the output will be of the same class, where enrichment scores will be -stored in an assay called \code{es} and the gene sets employed in the -calculations will be stored in the \code{rowData} slot of the object under the -column name \code{gs}. +In the case of 'gsvaRowNorm()', an object of the same class as the +input expresssion data given in the argument \code{exprData} of the \code{gsvaParam} +object, containing the row-normalized expression values. The resulting +object will have metadata with a copy of the input \code{gsvaParam} object, +except for the \code{exprData} slot, and in the case of being a derivative of a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object, +an additional assay called "gsvarownr" storing the row-normalized expression +values. + +In the case of 'gsvaColRanks()', an object of the same class as the +input expresssion data given in the argument \code{exprData} of the \code{gsvaParam} +object, containing the column rank values. The resulting object will have +metadata with a copy of the input \code{gsvaParam} object, except for the +\code{exprData} slot, and in the case of being a derivative of a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object, +an additional assay called "gsvaranks" storing the column rank values. + +In the case of 'gsvaColScores()', an object of the same class as the +input expression data given in the argument \code{exprData} of the \code{gsvaParam} +object, containing the enrichment scores for the given gene sets. Note that +while it will have the same columns as the input expression data, the rows +will correspond to the gene sets for which the enrichment scores were +calculated. } \description{ Calculate GSVA scores in three steps: (1) normalize values of @@ -165,9 +148,6 @@ variation analysis for microarray and RNA-Seq data. \doi{10.1186/1471-2105-14-7} } \seealso{ -\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}}, \code{\link{gsva}}, +\code{\link[=gsvaParam-class]{gsvaParam}}, \code{\link{gsva}}, \code{\link{gsvaEnrichment}}, \code{\link[BiocParallel:BiocParallelParam-class]{BiocParallelParam}}, -\code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, -\code{\link[Biobase]{ExpressionSet}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} } From bf082248a311b722f517ccf1469219ad19bb1d21 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 10:50:33 +0200 Subject: [PATCH 06/12] Added dropExistingAssays parameter to gsvaRowNorm() and gsvaColRanks(). Added corresponding unit tests --- R/AllGenerics.R | 2 +- R/GSVA-pkg-deprecated.R | 13 +++-- R/gsva.R | 57 +++++++++++++----- R/plage.R | 8 ++- R/ssgsea.R | 8 ++- R/utils.R | 71 ++++++++++++++--------- R/zscore.R | 8 ++- inst/unitTests/test_inputdatacontainers.R | 7 ++- man/gsvaRanks.Rd | 12 ++++ 9 files changed, 127 insertions(+), 59 deletions(-) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 0eaa78b..a298e13 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -62,7 +62,7 @@ setGeneric("unwrapData", function(container, ...) standardGeneric("unwrapData")) setGeneric("wrapData", - function(container, dataMatrix, param, assay, geneSets) + function(container, dataMatrix, param, assay, dropAssays, geneSets) standardGeneric("wrapData")) setGeneric("mapGeneSetsToAnno", diff --git a/R/GSVA-pkg-deprecated.R b/R/GSVA-pkg-deprecated.R index c0b0f1e..c31f970 100644 --- a/R/GSVA-pkg-deprecated.R +++ b/R/GSVA-pkg-deprecated.R @@ -37,8 +37,8 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), exprData <- get_exprData(param) dataMatrix <- unwrapData(exprData, get_assay(param)) - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", ondisk, verbose) @@ -82,7 +82,7 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), colnames(gsvarnks) <- colnames(filtDataMatrix) rnkscontainer <- wrapData(get_exprData(param), gsvarnks, param, - "gsvaranks") + "gsvaranks", FALSE) rval <- new("gsvaRanksParam", exprData=rnkscontainer, geneSets=get_geneSets(param), assay="gsvaranks", annotation=get_annotation(param), @@ -151,8 +151,8 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), cli_alert_info("GSVA dense (classical) algorithm") } - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, "GSVA", ondisk, @@ -191,7 +191,8 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) + rval <- wrapData(get_exprData(param), gsva_es, param, "es", + FALSE, gs) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") diff --git a/R/gsva.R b/R/gsva.R index 55e7a21..5d7c990 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -157,10 +157,13 @@ setMethod("gsva", signature(param="gsvaParam"), .check_bpparam(BPPARAM) gsvarownr <- gsvaRowNorm(param=param, verbose=verbose, + dropExistingAssays=TRUE, BPPARAM=BPPARAM, maxmem=maxmem) gsvaranks <- gsvaColRanks(rowNormExprData=gsvarownr, - verbose=verbose, BPPARAM=BPPARAM, + verbose=verbose, + dropExistingAssays=TRUE, + BPPARAM=BPPARAM, maxmem=maxmem) es <- gsvaColScores(rankExprData=gsvaranks, verbose=verbose, @@ -418,8 +421,8 @@ gsvaParam <- function(exprData, geneSets, anyNA=naparam$any_na, use=use, filterRows=filterRows, nzcount=nzc, ondisk=ondisk) - maxmem <- .check_maxmem(param, "auto", verbose) - .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem="auto", verbose=verbose) + .check_ondisk(param, maxmem=maxmem, verbose=verbose) return(param) } @@ -431,7 +434,7 @@ setValidity("gsvaParam", function(object) { inv <- NULL xd <- object@exprData dd <- dim(xd) - an <- gsvaAssayNames(xd) + ## an <- gsvaAssayNames(xd) oa <- object@assay if(dd[1] == 0) { @@ -446,9 +449,10 @@ setValidity("gsvaParam", function(object) { if(length(oa) != 1) { inv <- c(inv, "@assay must be of length 1") } - if(.isCharLength1(oa) && .isCharNonEmpty(an) && (!(oa %in% an))) { - inv <- c(inv, "@assay must be one of assayNames(@exprData)") - } + ## this is incompatible with using dropExistingAssays=TRUE + ## if(.isCharLength1(oa) && .isCharNonEmpty(an) && (!(oa %in% an))) { + ## inv <- c(inv, "@assay must be one of assayNames(@exprData)") + ## } if(length(object@annotation) != 1) { inv <- c(inv, "@annotation must be of length 1") } @@ -685,6 +689,9 @@ setMethod("details", if (!any(assayNames(exprData) %in% c("gsvarownr", "gsvaranks"))) cli_abort(c("x"="Wrong metadata in the input expression data.")) } + ## an <- gsvaAssayNames(exprData) + ## if (!is.na(an) && p$assay %in% an) ## original assay have been dropped + ## p$assay <- assay param <- new("gsvaParam", exprData=exprData, geneSets=p$geneSets, @@ -713,6 +720,16 @@ setMethod("details", #' @param verbose Gives information about each calculation step. Default: #' `TRUE`. #' +#' @param dropExistingAssays Logical vector of length 1. It only applies when +#' the input expression data is stored using a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] +#' derivative, which allows one to store more than one matrix of expression +#' values in different assay slots. By default `dropExistingAssays=FALSE` and +#' the new assay with the row-normalized expression values or the column ranks +#' will be stored as a new assay in the same input object. When +#' `dropExistingAssays=TRUE`, any existing assay will be dropped before adding +#' the new assay with the row-normalized expression values or the column ranks. +#' #' @param BPPARAM An object of class `BiocParallelParam` specifying parameters #' related to the parallel execution of some of the tasks and calculations #' within this function. @@ -796,6 +813,7 @@ setMethod("details", setMethod("gsvaRowNorm", signature(param="gsvaParam"), function(param, verbose=TRUE, + dropExistingAssays=FALSE, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { @@ -808,8 +826,8 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), exprData <- get_exprData(param) dataMatrix <- unwrapData(exprData, get_assay(param)) - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", ondisk, verbose) @@ -848,7 +866,7 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), colnames(gsvarownr) <- colnames(filtDataMatrix) rval <- wrapData(get_exprData(param), gsvarownr, param, - "gsvarownr") + "gsvarownr", dropExistingAssays) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -881,6 +899,7 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), function(rowNormExprData, verbose=TRUE, + dropExistingAssays=FALSE, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { @@ -894,8 +913,10 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), .check_bpparam(BPPARAM) dataMatrix <- unwrapData(rowNormExprData, "gsvarownr") - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, assay="gsvarownr", maxmem=maxmem, + verbose=verbose) + ondisk <- .check_ondisk(param, assay="gsvarownr", maxmem=maxmem, + verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", ondisk, verbose) @@ -909,7 +930,7 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), colnames(gsvarnks) <- colnames(dataMatrix) rval <- wrapData(get_exprData(param), gsvarnks, param, - "gsvaranks") + "gsvaranks", dropExistingAssays) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -1001,8 +1022,10 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), cli_alert_info("GSVA dense (classical) algorithm") } - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, assay="gsvaranks", maxmem=maxmem, + verbose=verbose) + ondisk <- .check_ondisk(param, assay="gsvaranks", maxmem=maxmem, + verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, "GSVA", ondisk, @@ -1041,7 +1064,9 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), gsva_es, param, "es", gs) + rval <- wrapData(get_exprData(param), gsva_es, param, "es", + TRUE, gs) ## dropExistingAssays=TRUE for + ## consistency but doesn't apply here if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") diff --git a/R/plage.R b/R/plage.R index df92e5b..7cda756 100644 --- a/R/plage.R +++ b/R/plage.R @@ -30,8 +30,8 @@ setMethod("gsva", signature(param="plageParam"), filtDataMatrix <- famGaGS[["filteredDataMatrix"]] filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, "PLAGE", @@ -58,7 +58,9 @@ setMethod("gsva", signature(param="plageParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), plage_es, param, "es", gs) + rval <- wrapData(get_exprData(param), plage_es, param, "es", + TRUE, gs) ## dropExistingAssays=TRUE for + ## consistency but doesn't apply here if (verbose) cli_alert_success("Calculations finished") diff --git a/R/ssgsea.R b/R/ssgsea.R index 91cef1d..eebe942 100644 --- a/R/ssgsea.R +++ b/R/ssgsea.R @@ -31,8 +31,8 @@ setMethod("gsva", signature(param="ssgseaParam"), filtDataMatrix <- famGaGS[["filteredDataMatrix"]] filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, "ssGSEA", @@ -64,7 +64,9 @@ setMethod("gsva", signature(param="ssgseaParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), ssgsea_es, param, "es", gs) + rval <- wrapData(get_exprData(param), ssgsea_es, param, "es", + TRUE, gs) ## dropExistingAssays=TRUE for + ## consistency but doesn't apply here if (verbose) cli_alert_success("Calculations finished") diff --git a/R/utils.R b/R/utils.R index fe403c1..53d040c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,9 +55,10 @@ setMethod("unwrapData", signature("SpatialExperiment"), ## wrapData: put the resulting data and gene sets into the original data container type setMethod("wrapData", signature(container="matrix"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) @@ -66,9 +67,10 @@ setMethod("wrapData", signature(container="matrix"), }) setMethod("wrapData", signature(container="dgCMatrix"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) @@ -77,9 +79,10 @@ setMethod("wrapData", signature(container="dgCMatrix"), }) setMethod("wrapData", signature(container="SVT_SparseMatrix"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) @@ -88,9 +91,10 @@ setMethod("wrapData", signature(container="SVT_SparseMatrix"), }) setMethod("wrapData", signature(container="DelayedMatrix"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) attr(dataMatrix, "assay") <- assay if (!missing(geneSets)) @@ -99,9 +103,10 @@ setMethod("wrapData", signature(container="DelayedMatrix"), }) setMethod("wrapData", signature(container="ExpressionSet"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) rval <- new("ExpressionSet", exprs=dataMatrix, phenoData=phenoData(container), experimentData=experimentData(container), @@ -114,21 +119,30 @@ setMethod("wrapData", signature(container="ExpressionSet"), return(rval) }) +.check_existing_assay <- function(container, assay) { + if (assay %in% assayNames(container)) + cli_abort(c("x"=paste("Assay {assay} already exists in the input", + "container object."))) +} + #' @importFrom IRanges CharacterList #' @importFrom S4Vectors SimpleList setMethod("wrapData", signature(container="SummarizedExperiment"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) names(adata) <- assay if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) } else { ## missing geneSets implies adding an assay + .check_existing_assay(container, assay) stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), adata) + if (!dropAssays) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SummarizedExperiment( @@ -147,18 +161,21 @@ setMethod("wrapData", signature(container="SummarizedExperiment"), #' @importFrom S4Vectors SimpleList #' @importFrom SingleCellExperiment SingleCellExperiment setMethod("wrapData", signature(container="SingleCellExperiment"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) names(adata) <- assay if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) } else { ## missing geneSets implies adding an assay + .check_existing_assay(container, assay) stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), adata) + if (!dropAssays) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SingleCellExperiment( @@ -177,18 +194,21 @@ setMethod("wrapData", signature(container="SingleCellExperiment"), #' @importFrom S4Vectors SimpleList #' @importFrom SingleCellExperiment SingleCellExperiment setMethod("wrapData", signature(container="SpatialExperiment"), - function(container, dataMatrix, param, assay, geneSets) { - stopifnot(!missing(assay)) + function(container, dataMatrix, param, assay, dropAssays, geneSets) { stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) names(adata) <- assay if (!missing(geneSets)) { ## storing enrichment scores only rdata <- DataFrame(gs=CharacterList(geneSets)) } else { ## missing geneSets implies adding an assay + .check_existing_assay(container, assay) stopifnot(all(rownames(dataMatrix) %in% rownames(container))) mask <- rownames(container) %in% rownames(dataMatrix) - adata <- c(assays(container[mask, ]), adata) + if (!dropAssays) + adata <- c(assays(container[mask, ]), adata) rdata <- rowData(container)[mask, ] } rval <- SpatialExperiment( @@ -588,19 +608,18 @@ setMethod("wrapData", signature(container="SpatialExperiment"), #' @importFrom cli cli_abort cli_alert_info #' @importFrom memuse Sys.meminfo -.check_maxmem <- function(param, x, verbose) { - if (length(x) > 1 || (!is.numeric(x) && !is.character(x))) { +.check_maxmem <- function(param, assay=get_assay(param), maxmem, verbose) { + if (length(maxmem) > 1 || (!is.numeric(maxmem) && !is.character(maxmem))) { msg <- paste("'maxmem' should be a vector of length 1 of either a", "number in bytes or a character string formed by a", "number followed by the suffix K, M, G or T.") cli_abort(c("x"=msg)) } - maxmem <- x - if (is.character(x) && x == "auto") { + if (is.character(maxmem) && maxmem == "auto") { totalram <- Sys.meminfo()$totalram maxmem <- as.numeric(totalram * 0.9) ## auto takes 90% of RAM - X <- unwrapData(get_exprData(param), get_assay(param)) + X <- unwrapData(get_exprData(param), assay) if (verbose && is(X, "DelayedArray") && gsva_global$show_start_and_end_messages) cli_alert_info(sprintf("Maximum available main memory (90%%): %s", @@ -623,10 +642,10 @@ setMethod("wrapData", signature(container="SpatialExperiment"), #' @importFrom BiocGenerics type #' @importFrom S4Arrays is_sparse #' @importFrom memuse howbig -.check_ondisk <- function(param, maxmem, verbose) { +.check_ondisk <- function(param, assay=get_assay(param), maxmem, verbose) { ondisk <- .get_ondisk(param) if (ondisk == "auto") { - X <- unwrapData(get_exprData(param), get_assay(param)) + X <- unwrapData(get_exprData(param), assay) tot <- as.numeric(nrow(X)) * as.numeric(ncol(X)) rep <- "dense" spa <- 1 diff --git a/R/zscore.R b/R/zscore.R index 35ae04a..9ad73b7 100644 --- a/R/zscore.R +++ b/R/zscore.R @@ -31,8 +31,8 @@ setMethod("gsva", signature(param="zscoreParam"), filtDataMatrix <- famGaGS[["filteredDataMatrix"]] filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] - maxmem <- .check_maxmem(param, maxmem, verbose) - ondisk <- .check_ondisk(param, maxmem, verbose) + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, "Z-score", @@ -58,7 +58,9 @@ setMethod("gsva", signature(param="zscoreParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) - rval <- wrapData(get_exprData(param), zscore_es, param, "es", gs) + rval <- wrapData(get_exprData(param), zscore_es, param, "es", + TRUE, gs) ## dropExistingAssays=TRUE for + ## consistency but doesn't apply here if (verbose) cli_alert_success("Calculations finished") diff --git a/inst/unitTests/test_inputdatacontainers.R b/inst/unitTests/test_inputdatacontainers.R index 55a64f6..89f2cb4 100644 --- a/inst/unitTests/test_inputdatacontainers.R +++ b/inst/unitTests/test_inputdatacontainers.R @@ -50,7 +50,7 @@ test_inputdatacontainers <- function() { ## estimate GSVA enrichment scores with input as a SummarizedExperiment object suppressPackageStartupMessages({ library(S4Vectors) - library(SummarizedExperiment) + library(SummarizedExperiment) }) se <- SummarizedExperiment(assay=list(counts=y2), @@ -69,6 +69,11 @@ test_inputdatacontainers <- function() { checkTrue(substr(out, 3, nchar(out)) == "No assay name provided; using default assay 'counts'") checkException(gsvaParam(se, gsets, assay="dummy")) + gsvarownr <- gsvaRowNorm(gsvapar, dropExistingAssays=TRUE, verbose=FALSE) + gsvaranks <- gsvaColRanks(gsvarownr, dropExistingAssays=TRUE, verbose=FALSE) + es.se2 <- gsvaColScores(gsvaranks, verbose=FALSE) + checkEqualsNumeric(assay(es.se), assay(es.se2)) + ## estimate GSVA enrichment scores with input as a dgCMatrix object suppressPackageStartupMessages(library(Matrix)) diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index c61d73b..3177e15 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -12,6 +12,7 @@ \S4method{gsvaRowNorm}{gsvaParam}( param, verbose = TRUE, + dropExistingAssays = FALSE, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) @@ -19,6 +20,7 @@ \S4method{gsvaColRanks}{GsvaExprData}( rowNormExprData, verbose = TRUE, + dropExistingAssays = FALSE, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) @@ -38,6 +40,16 @@ function \code{\link{gsvaParam}}.} \item{verbose}{Gives information about each calculation step. Default: \code{TRUE}.} +\item{dropExistingAssays}{Logical vector of length 1. It only applies when +the input expression data is stored using a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} +derivative, which allows one to store more than one matrix of expression +values in different assay slots. By default \code{dropExistingAssays=FALSE} and +the new assay with the row-normalized expression values or the column ranks +will be stored as a new assay in the same input object. When +\code{dropExistingAssays=TRUE}, any existing assay will be dropped before adding +the new assay with the row-normalized expression values or the column ranks.} + \item{BPPARAM}{An object of class \code{BiocParallelParam} specifying parameters related to the parallel execution of some of the tasks and calculations within this function.} From af5941ab45426cb44ae0fb28fde8d63f463d05fc Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 17:07:11 +0200 Subject: [PATCH 07/12] Added first and last parameters to enable restricting calculations to a range of rows in gsvaRowNorm(), or columns in gsvaColRanks() and gsvaColScores(). Added corresponding unit tests --- NAMESPACE | 2 + R/AllGenerics.R | 3 +- R/GSVA-pkg-deprecated.R | 20 +-- R/gsva.R | 104 +++++++++++----- R/plage.R | 10 +- R/ssgsea.R | 10 +- R/utils.R | 207 +++++++++++++++++++++++++------- R/zscore.R | 10 +- inst/unitTests/test_gsvaCcode.R | 112 +++++++++-------- inst/unitTests/test_gsvaRanks.R | 20 +++ man/gsvaParam-class.Rd | 16 +-- man/gsvaRanks.Rd | 18 +++ 12 files changed, 372 insertions(+), 160 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 78489f1..ec3a7ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,6 +129,8 @@ importFrom(S4Vectors,DataFrame) importFrom(S4Vectors,SimpleList) importFrom(S4Vectors,metadata) importFrom(SingleCellExperiment,SingleCellExperiment) +importFrom(SingleCellExperiment,altExps) +importFrom(SingleCellExperiment,reducedDims) importFrom(SparseArray,nzcount) importFrom(SparseArray,rowMeans) importFrom(SparseArray,rowSds) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index a298e13..bd3dc2a 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -62,8 +62,7 @@ setGeneric("unwrapData", function(container, ...) standardGeneric("unwrapData")) setGeneric("wrapData", - function(container, dataMatrix, param, assay, dropAssays, geneSets) - standardGeneric("wrapData")) + function(container, ...) standardGeneric("wrapData")) setGeneric("mapGeneSetsToAnno", function(geneSets, anno, ...) standardGeneric("mapGeneSetsToAnno")) diff --git a/R/GSVA-pkg-deprecated.R b/R/GSVA-pkg-deprecated.R index c31f970..03e818a 100644 --- a/R/GSVA-pkg-deprecated.R +++ b/R/GSVA-pkg-deprecated.R @@ -38,10 +38,13 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), exprData <- get_exprData(param) dataMatrix <- unwrapData(exprData, get_assay(param)) maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, + whdim=2, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", - ondisk, verbose) + first=NA, last=NA, + whdim=2, ondisk, + verbose) filtDataMatrix <- dataMatrix BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, @@ -82,7 +85,8 @@ setMethod("gsvaRanks", signature(param="gsvaParam"), colnames(gsvarnks) <- colnames(filtDataMatrix) rnkscontainer <- wrapData(get_exprData(param), gsvarnks, param, - "gsvaranks", FALSE) + "gsvaranks", first=NA, last=NA, whdim=2, + dropAssays=FALSE) rval <- new("gsvaRanksParam", exprData=rnkscontainer, geneSets=get_geneSets(param), assay="gsvaranks", annotation=get_annotation(param), @@ -152,11 +156,13 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), } maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, + whdim=2, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "GSVA", ondisk, - verbose) + "GSVA", first=NA, + last=NA, whdim=2, + ondisk, verbose) BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, minparrows=100, minparcols=100, @@ -192,7 +198,7 @@ setMethod("gsvaScores", signature(param="gsvaRanksParam"), gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) rval <- wrapData(get_exprData(param), gsva_es, param, "es", - FALSE, gs) + first=NA, last=NA, whdim=2, dropAssays=FALSE, gs) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") diff --git a/R/gsva.R b/R/gsva.R index 5d7c990..152095f 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -422,11 +422,30 @@ gsvaParam <- function(exprData, geneSets, nzcount=nzc, ondisk=ondisk) maxmem <- .check_maxmem(param, maxmem="auto", verbose=verbose) - .check_ondisk(param, maxmem=maxmem, verbose=verbose) + .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, whdim=1, + verbose=verbose) return(param) } +## ----- setters for gsvaParam ----- + +#' @param object For the replacement method, an object of class +#' [`gsvaParam-class`]. +#' +#' @param value For the replacement method, an object of the classes supported +#' by [`GsvaGeneSets-class`]. +#' +#' @aliases geneSets<- +#' @aliases geneSets<-,gsvaParam,GsvaGeneSets-method +#' @rdname gsvaParam-class +#' @exportMethod geneSets +setReplaceMethod("geneSets", signature=signature(object="gsvaParam", + value="GsvaGeneSets"), + function(object, value) { + object@geneSets <- value + object + }) ## ----- validator ----- @@ -689,9 +708,6 @@ setMethod("details", if (!any(assayNames(exprData) %in% c("gsvarownr", "gsvaranks"))) cli_abort(c("x"="Wrong metadata in the input expression data.")) } - ## an <- gsvaAssayNames(exprData) - ## if (!is.na(an) && p$assay %in% an) ## original assay have been dropped - ## p$assay <- assay param <- new("gsvaParam", exprData=exprData, geneSets=p$geneSets, @@ -730,6 +746,18 @@ setMethod("details", #' `dropExistingAssays=TRUE`, any existing assay will be dropped before adding #' the new assay with the row-normalized expression values or the column ranks. #' +#' @param first Numeric vector of length 1. First row, in the case of +#' `gsvaRowNorm()`, or first column, in the case of `gsvaColRanks()` and +#' `gsvaColScores()`, to which calculations should be restricted. By default, +#' `first=NA_real_`, which implies that calculations start at the first row or +#' column of the input expression data. +#' +#' @param last Numeric vector of length 1. Last row, in the case of +#' `gsvaRowNorm()`, or last column, in the case of `gsvaColRanks()` and +#' `gsvaColScores()`, to which calculations should be restricted. By default, +#' `last=NA_real_`, which implies that calculations end at the last row or +#' column of the input expression data. +#' #' @param BPPARAM An object of class `BiocParallelParam` specifying parameters #' related to the parallel execution of some of the tasks and calculations #' within this function. @@ -814,6 +842,7 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), function(param, verbose=TRUE, dropExistingAssays=FALSE, + first=NA_real_, last=NA_real_, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { @@ -826,10 +855,18 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), exprData <- get_exprData(param) dataMatrix <- unwrapData(exprData, get_assay(param)) + + checkedfl <- .check_first_last_values(dataMatrix, nrow, "rows", + first, last) + first <- checkedfl$first + last <- checkedfl$last + maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, first=first, last=last, whdim=1, + maxmem=maxmem, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", + first, last, whdim=1, ondisk, verbose) filtDataMatrix <- dataMatrix @@ -866,7 +903,8 @@ setMethod("gsvaRowNorm", signature(param="gsvaParam"), colnames(gsvarownr) <- colnames(filtDataMatrix) rval <- wrapData(get_exprData(param), gsvarownr, param, - "gsvarownr", dropExistingAssays) + "gsvarownr", first, last, whdim=1, + dropExistingAssays) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -900,6 +938,7 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), function(rowNormExprData, verbose=TRUE, dropExistingAssays=FALSE, + first=NA_real_, last=NA_real_, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { @@ -913,12 +952,20 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), .check_bpparam(BPPARAM) dataMatrix <- unwrapData(rowNormExprData, "gsvarownr") + + checkedfl <- .check_first_last_values(dataMatrix, ncol, "columns", + first, last) + first <- checkedfl$first + last <- checkedfl$last + maxmem <- .check_maxmem(param, assay="gsvarownr", maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, assay="gsvarownr", maxmem=maxmem, + ondisk <- .check_ondisk(param, assay="gsvarownr", first=first, + last=last, whdim=1, maxmem=maxmem, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", + first, last, whdim=2, ondisk, verbose) gsvarnks <- .compute_gsva_ranks(Z=dataMatrix, @@ -930,7 +977,8 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), colnames(gsvarnks) <- colnames(dataMatrix) rval <- wrapData(get_exprData(param), gsvarnks, param, - "gsvaranks", dropExistingAssays) + "gsvaranks", first, last, whdim=2, + dropExistingAssays) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -939,25 +987,6 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), }) -## ----- setters for gsvaParam ----- - -#' @param object For the replacement method, an object of class -#' [`gsvaParam-class`]. -#' -#' @param value For the replacement method, an object of the classes supported -#' by [`GsvaGeneSets-class`]. -#' -#' @aliases geneSets<- -#' @aliases geneSets<-,gsvaParam,GsvaGeneSets-method -#' @rdname gsvaParam-class -#' @exportMethod geneSets -setReplaceMethod("geneSets", signature=signature(object="gsvaParam", - value="GsvaGeneSets"), - function(object, value) { - object@geneSets <- value - object - }) - #' @param rankExprData A column-rank expression data set obtained with #' [`gsvaColRanks`]. Must be one of the classes #' supported by [`GsvaExprData-class`]. For a list of these classes, see its @@ -983,6 +1012,7 @@ setReplaceMethod("geneSets", signature=signature(object="gsvaParam", #' @exportMethod gsvaColScores setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), function(rankExprData, geneSets, verbose=TRUE, + first=NA_real_, last=NA_real_, BPPARAM=SerialParam(progressbar=verbose), maxmem="auto") { @@ -1022,14 +1052,22 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), cli_alert_info("GSVA dense (classical) algorithm") } + checkedfl <- .check_first_last_values(filtDataMatrix, ncol, + "columns", first, last) + first <- checkedfl$first + last <- checkedfl$last + maxmem <- .check_maxmem(param, assay="gsvaranks", maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, assay="gsvaranks", maxmem=maxmem, + ondisk <- .check_ondisk(param, assay="gsvaranks", first=first, + last=last, whdim=1, maxmem=maxmem, verbose=verbose) + filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "GSVA", ondisk, - verbose) + "GSVA", first, + last, whdim=2, + ondisk, verbose) BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, minparrows=100, minparcols=100, @@ -1064,9 +1102,10 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), gs <- .geneSetsIndices2Names(indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) + + ## dropAssays=TRUE for consistency but doesn't apply here rval <- wrapData(get_exprData(param), gsva_es, param, "es", - TRUE, gs) ## dropExistingAssays=TRUE for - ## consistency but doesn't apply here + first, last, whdim=2, dropAssays=TRUE, gs) if (verbose && gsva_global$show_start_and_end_messages) cli_alert_success("Calculations finished") @@ -1267,7 +1306,6 @@ setMethod("gsvaEnrichment", signature(rankExprData="GsvaExprData"), - #' @importFrom S4Arrays is_sparse #' @importFrom DelayedArray seed #' @importFrom cli cli_abort diff --git a/R/plage.R b/R/plage.R index 7cda756..4be827a 100644 --- a/R/plage.R +++ b/R/plage.R @@ -31,10 +31,12 @@ setMethod("gsva", signature(param="plageParam"), filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, + whdim=2, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "PLAGE", + "PLAGE", first=NA, + last=NA, whdim=2, ondisk, verbose) BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, @@ -58,9 +60,9 @@ setMethod("gsva", signature(param="plageParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) + ## dropAssays=TRUE for consistency but doesn't apply here rval <- wrapData(get_exprData(param), plage_es, param, "es", - TRUE, gs) ## dropExistingAssays=TRUE for - ## consistency but doesn't apply here + first=NA, last=NA, whdim=2, dropAssays=TRUE, gs) if (verbose) cli_alert_success("Calculations finished") diff --git a/R/ssgsea.R b/R/ssgsea.R index eebe942..459f57e 100644 --- a/R/ssgsea.R +++ b/R/ssgsea.R @@ -32,10 +32,12 @@ setMethod("gsva", signature(param="ssgseaParam"), filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, + whdim=2, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "ssGSEA", + "ssGSEA", first=NA, + last=NA, whdim=2, ondisk, verbose) BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, @@ -64,9 +66,9 @@ setMethod("gsva", signature(param="ssgseaParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) + ## dropAssays=TRUE for consistency but doesn't apply here rval <- wrapData(get_exprData(param), ssgsea_es, param, "es", - TRUE, gs) ## dropExistingAssays=TRUE for - ## consistency but doesn't apply here + first=NA, last=NA, whdim=2, dropAssays=TRUE, gs) if (verbose) cli_alert_success("Calculations finished") diff --git a/R/utils.R b/R/utils.R index 53d040c..1c45d1a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -53,59 +53,61 @@ setMethod("unwrapData", signature("SpatialExperiment"), }) +.wrapdata_nonSE <- function(dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { + stopifnot(!missing(param)) + stopifnot(!missing(assay)) + stopifnot(!missing(first)) + stopifnot(!missing(last)) + stopifnot(!missing(whdim)) + stopifnot(!missing(dropAssays)) + attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) + attr(dataMatrix, "assay") <- assay + if (!is.na(first) || !is.na(last)) + attr(dataMatrix, "restrict") <- list(first=first, last=last, + whdim=whdim) + if (!missing(geneSets)) + attr(dataMatrix, "geneSets") <- geneSets + return(dataMatrix) +} + ## wrapData: put the resulting data and gene sets into the original data container type setMethod("wrapData", signature(container="matrix"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { - stopifnot(!missing(param)) - stopifnot(!missing(assay)) - stopifnot(!missing(dropAssays)) - attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) - attr(dataMatrix, "assay") <- assay - if (!missing(geneSets)) - attr(dataMatrix, "geneSets") <- geneSets - return(dataMatrix) + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { + .wrapdata_nonSE(dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) }) setMethod("wrapData", signature(container="dgCMatrix"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { - stopifnot(!missing(param)) - stopifnot(!missing(assay)) - stopifnot(!missing(dropAssays)) - attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) - attr(dataMatrix, "assay") <- assay - if (!missing(geneSets)) - attr(dataMatrix, "geneSets") <- geneSets - return(dataMatrix) + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { + .wrapdata_nonSE(dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) }) setMethod("wrapData", signature(container="SVT_SparseMatrix"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { - stopifnot(!missing(param)) - stopifnot(!missing(assay)) - stopifnot(!missing(dropAssays)) - attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) - attr(dataMatrix, "assay") <- assay - if (!missing(geneSets)) - attr(dataMatrix, "geneSets") <- geneSets - return(dataMatrix) + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { + .wrapdata_nonSE(dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) }) setMethod("wrapData", signature(container="DelayedMatrix"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { - stopifnot(!missing(param)) - stopifnot(!missing(assay)) - stopifnot(!missing(dropAssays)) - attr(dataMatrix, "gsvaParam") <- .gsvaParam_as_list(param) - attr(dataMatrix, "assay") <- assay - if (!missing(geneSets)) - attr(dataMatrix, "geneSets") <- geneSets - return(dataMatrix) + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { + .wrapdata_nonSE(dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) }) setMethod("wrapData", signature(container="ExpressionSet"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { stopifnot(!missing(param)) stopifnot(!missing(assay)) + stopifnot(!missing(first)) + stopifnot(!missing(last)) + stopifnot(!missing(whdim)) stopifnot(!missing(dropAssays)) rval <- new("ExpressionSet", exprs=dataMatrix, phenoData=phenoData(container), @@ -113,6 +115,9 @@ setMethod("wrapData", signature(container="ExpressionSet"), annotation="") attr(rval, "gsvaParam") <- .gsvaParam_as_list(param) attr(rval, "assay") <- assay + if (!is.na(first) || !is.na(last)) + attr(rval, "restrict") <- list(first=first, last=last, + whdim=whdim) if (!missing(geneSets)) attr(rval, "geneSets") <- geneSets @@ -128,9 +133,13 @@ setMethod("wrapData", signature(container="ExpressionSet"), #' @importFrom IRanges CharacterList #' @importFrom S4Vectors SimpleList setMethod("wrapData", signature(container="SummarizedExperiment"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { stopifnot(!missing(param)) stopifnot(!missing(assay)) + stopifnot(!missing(first)) + stopifnot(!missing(last)) + stopifnot(!missing(whdim)) stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) @@ -151,6 +160,9 @@ setMethod("wrapData", signature(container="SummarizedExperiment"), rowData=rdata, metadata=metadata(container)) metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!is.na(first) || !is.na(last)) + metadata(rval)$restrict <- list(first=first, last=last, + whdim=whdim) if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL @@ -159,11 +171,15 @@ setMethod("wrapData", signature(container="SummarizedExperiment"), #' @importFrom IRanges CharacterList #' @importFrom S4Vectors SimpleList -#' @importFrom SingleCellExperiment SingleCellExperiment +#' @importFrom SingleCellExperiment SingleCellExperiment reducedDims altExps setMethod("wrapData", signature(container="SingleCellExperiment"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { stopifnot(!missing(param)) stopifnot(!missing(assay)) + stopifnot(!missing(first)) + stopifnot(!missing(last)) + stopifnot(!missing(whdim)) stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) @@ -182,8 +198,13 @@ setMethod("wrapData", signature(container="SingleCellExperiment"), assays=adata, colData=colData(container), rowData=rdata, + reducedDims=reducedDims(container), + altExps=altExps(container), metadata=metadata(container)) metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!is.na(first) || !is.na(last)) + metadata(rval)$restrict <- list(first=first, last=last, + whdim=whdim) if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL @@ -194,9 +215,13 @@ setMethod("wrapData", signature(container="SingleCellExperiment"), #' @importFrom S4Vectors SimpleList #' @importFrom SingleCellExperiment SingleCellExperiment setMethod("wrapData", signature(container="SpatialExperiment"), - function(container, dataMatrix, param, assay, dropAssays, geneSets) { + function(container, dataMatrix, param, assay, first, last, whdim, + dropAssays, geneSets) { stopifnot(!missing(param)) stopifnot(!missing(assay)) + stopifnot(!missing(first)) + stopifnot(!missing(last)) + stopifnot(!missing(whdim)) stopifnot(!missing(dropAssays)) rdata <- NULL adata <- SimpleList(dataMatrix) @@ -215,10 +240,15 @@ setMethod("wrapData", signature(container="SpatialExperiment"), assays=adata, colData=colData(container), rowData=rdata, + reducedDims=reducedDims(container), + altExps=altExps(container), metadata=metadata(container), imgData=imgData(container), spatialCoords=spatialCoords(container)) metadata(rval)$gsvaParam <- .gsvaParam_as_list(param) + if (!is.na(first) || !is.na(last)) + metadata(rval)$restrict <- list(first=first, last=last, + whdim=whdim) if (!missing(geneSets)) ## row data has been replaced metadata(rval)$annotation <- NULL @@ -269,7 +299,8 @@ setMethod("wrapData", signature(container="SpatialExperiment"), } } -.check_sparse_load_input_expr <- function(expr, method, ondisk, verbose) { +.check_sparse_load_input_expr <- function(expr, method, first, last, whdim, + ondisk, verbose) { if (method != "GSVA" && is_sparse(expr)) { msg <- paste("Input expression data is sparse, but the {method}", "algorithm does not deal with sparsity", @@ -278,6 +309,27 @@ setMethod("wrapData", signature(container="SpatialExperiment"), cli_alert_warning(msg) } + if (!is.na(first) || !is.na(last)) { + if (is.na(first)) + first <- 1 + if (is.na(last)) + last <- if (whdim == 1) nrow(expr) else ncol(expr) + if (whdim == 1) { + if (verbose) { + msg <- "Restricting to rows {first}:{last}" + cli_alert_info(msg) + } + expr <- expr[first:last, , drop=FALSE] + } else if (whdim == 2) { + if (verbose) { + msg <- "Restricting to columns {first}:{last}" + cli_alert_info(msg) + } + expr <- expr[, first:last, drop=FALSE] + } else + cli_abort(c("x"="Invalid internal value for 'whdim' argument.")) + } + if (is(expr, "DelayedMatrix") && !ondisk) { if (verbose) cli_alert_info("Loading input expression data into main memory") @@ -642,7 +694,8 @@ setMethod("wrapData", signature(container="SpatialExperiment"), #' @importFrom BiocGenerics type #' @importFrom S4Arrays is_sparse #' @importFrom memuse howbig -.check_ondisk <- function(param, assay=get_assay(param), maxmem, verbose) { +.check_ondisk <- function(param, assay=get_assay(param), first, last, whdim, + maxmem, verbose) { ondisk <- .get_ondisk(param) if (ondisk == "auto") { X <- unwrapData(get_exprData(param), assay) @@ -653,8 +706,21 @@ setMethod("wrapData", signature(container="SpatialExperiment"), rep <- "sparse" spa <- nzcount(param) / tot } - sze <- howbig(as.numeric(nrow(X)), as.numeric(ncol(X)), - representation=rep, sparsity=spa, type=type(X)) + sze <- 0 + if (is.na(first) && is.na(last)) + sze <- howbig(as.numeric(nrow(X)), as.numeric(ncol(X)), + representation=rep, sparsity=spa, type=type(X)) + else { + if (whdim == 1) { + sze <- howbig(as.numeric(last - first + 1), as.numeric(ncol(X)), + representation=rep, sparsity=spa, type=type(X)) + } else if (whdim == 2) { + sze <- howbig(as.numeric(nrow(X)), as.numeric(last - first + 1), + representation=rep, sparsity=spa, type=type(X)) + } else + cli_abort(c("x"="Invalid internal value for 'whdim' argument.")) + } + ondisk <- "no" if (as.numeric(sze) > maxmem) { ondisk <- "yes" @@ -738,6 +804,57 @@ setMethod("wrapData", signature(container="SpatialExperiment"), return((.isCharNonEmpty(x)) && (length(x) == 1)) } +.isNumNonEmpty <- function(x) { + return((!is.null(x)) && + (length(x) > 0) && + (is.numeric(x)) && + (!all(is.na(x))) && + (!all(is.nan(x)))) +} + +.isNumLength1 <- function(x) { + return((.isNumNonEmpty(x)) && (length(x) == 1)) +} + +#' @importFrom cli cli_abort +.check_first_last_values <- function(X, dimfun, dimname, first, last) { + dimfun <- match.fun(dimfun) + + if (all(is.na(first)) && all(is.na(last))) + return(list(first=NA_real_, last=NA_real_)) + + if (all(is.na(first))) + first <- 1 + if (all(is.na(last))) + last <- dimfun(X) + + if (.isNumLength1(first) && .isNumLength1(last)) { + if (first < 1 || last < 1 || first != as.integer(first) || + last != as.integer(last)) { + msg <- paste("arguments 'first' and 'last' must be", + "positive integers.") + cli_abort(c("x"=msg)) + } + if (first >= last) { + msg <- paste("argument 'first' must be smaller than", + "argument 'last'.") + cli_abort(c("x"=msg)) + } + } else { + msg <- paste("arguments 'first' and 'last' must be numeric vectors of", + "length 1.") + cli_abort(c("x"=msg)) + } + + if (first > dimfun(X) || last > dimfun(X)) { + msg <- paste("arguments 'first' and 'last' must be smaller than or equal", + "to the number of {dimname} of the input expression data.") + cli_abort(c("x"=msg)) + } + + return(list(first=first, last=last)) +} + ## annotation package checks .isAnnoPkgValid <- function(ap) { return(.isCharLength1(ap)) diff --git a/R/zscore.R b/R/zscore.R index 9ad73b7..9d936af 100644 --- a/R/zscore.R +++ b/R/zscore.R @@ -32,10 +32,12 @@ setMethod("gsva", signature(param="zscoreParam"), filtMappedGeneSets <- famGaGS[["filteredMappedGeneSets"]] maxmem <- .check_maxmem(param, maxmem=maxmem, verbose=verbose) - ondisk <- .check_ondisk(param, maxmem=maxmem, verbose=verbose) + ondisk <- .check_ondisk(param, maxmem=maxmem, first=NA, last=NA, + whdim=2, verbose=verbose) filtDataMatrix <- .check_sparse_load_input_expr(filtDataMatrix, - "Z-score", + "Z-score", first=NA, + last=NA, whdim=2, ondisk, verbose) BPPARAM <- .check_open_parallelism(filtDataMatrix, BPPARAM, @@ -58,9 +60,9 @@ setMethod("gsva", signature(param="zscoreParam"), gs <- .geneSetsIndices2Names( indices=filtMappedGeneSets, names=rownames(filtDataMatrix)) + ## dropAssays=TRUE for consistency but doesn't apply here rval <- wrapData(get_exprData(param), zscore_es, param, "es", - TRUE, gs) ## dropExistingAssays=TRUE for - ## consistency but doesn't apply here + first=NA, last=NA, whdim=2, dropAssays=TRUE, gs) if (verbose) cli_alert_success("Calculations finished") diff --git a/inst/unitTests/test_gsvaCcode.R b/inst/unitTests/test_gsvaCcode.R index 1a742ec..0fe0f54 100644 --- a/inst/unitTests/test_gsvaCcode.R +++ b/inst/unitTests/test_gsvaCcode.R @@ -22,10 +22,12 @@ test_gsvaCcode <- function() { gsvapar <- gsvaParam(y, geneSets) ## calculate GSVA ranks - gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE) - exprData <- GSVA:::get_exprData(gsvarankspar) - R <- GSVA:::unwrapData(exprData, get_assay(gsvarankspar)) - geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=gsvarankspar, + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + param <- GSVA:::.pull_param(gsvacolranks) + exprData <- GSVA:::get_exprData(param) + R <- GSVA:::unwrapData(exprData, get_assay(param)) + geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=param, filteredDataMatrix=R, verbose=FALSE) @@ -35,12 +37,12 @@ test_gsvaCcode <- function() { GSVA:::.gsva_score_genesets_Rimp(geneSetsIdx, decOrdStat=rnkstats$dos, symRnkStat=rnkstats$srs, - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar)) + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param)) }, R=R) sco_R <- do.call("cbind", sco_R) @@ -48,13 +50,13 @@ test_gsvaCcode <- function() { wna_env <- new.env() assign("w", FALSE, envir=wna_env) sco_C <- GSVA:::.gsva_score_genesets(R, geneSetsIdx, is.integer(R[1, 1]), - sparse=GSVA:::.get_sparse(gsvarankspar), - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar), + sparse=GSVA:::.get_sparse(param), + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param), wna_env=wna_env, verbose=FALSE) ## both approaches to calculate GSVA scores must give @@ -73,15 +75,17 @@ test_gsvaCcode <- function() { gsvapar <- gsvaParam(y, geneSets, use="na.rm") ## calculate GSVA ranks - gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE) - exprData <- GSVA:::get_exprData(gsvarankspar) - R <- GSVA:::unwrapData(exprData, get_assay(gsvarankspar)) - geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=gsvarankspar, + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + param <- GSVA:::.pull_param(gsvacolranks) + exprData <- GSVA:::get_exprData(param) + R <- GSVA:::unwrapData(exprData, get_assay(param)) + geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=param, filteredDataMatrix=R, verbose=FALSE) sco_R <- lapply(as.list(1:ncol(R)), function(j, R) { - if (anyNA(gsvarankspar)) + if (anyNA(param)) rnkstats <- GSVA:::.ranks2stats_nas(R[, j], sparse=FALSE) else rnkstats <- GSVA:::.ranks2stats(R[, j], sparse=FALSE) @@ -90,12 +94,12 @@ test_gsvaCcode <- function() { sco_R <- GSVA:::.gsva_score_genesets_Rimp(geneSetsIdx, decOrdStat=rnkstats$dos, symRnkStat=rnkstats$srs, - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar)) + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param)) }, R=R) sco_R <- do.call("cbind", sco_R) @@ -103,13 +107,13 @@ test_gsvaCcode <- function() { wna_env <- new.env() assign("w", FALSE, envir=wna_env) sco_C <- GSVA:::.gsva_score_genesets(R, geneSetsIdx, is.integer(R[1, 1]), - sparse=GSVA:::.get_sparse(gsvarankspar), - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar), + sparse=GSVA:::.get_sparse(param), + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param), wna_env=wna_env, verbose=FALSE) ## both approaches to calculate GSVA scores must give @@ -120,15 +124,17 @@ test_gsvaCcode <- function() { gsvapar <- gsvaParam(y, geneSets, use="everything") ## calculate GSVA ranks - gsvarankspar <- gsvaRanks(gsvapar, verbose=FALSE) - exprData <- GSVA:::get_exprData(gsvarankspar) - R <- GSVA:::unwrapData(exprData, get_assay(gsvarankspar)) - geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=gsvarankspar, + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + param <- GSVA:::.pull_param(gsvacolranks) + exprData <- GSVA:::get_exprData(param) + R <- GSVA:::unwrapData(exprData, get_assay(param)) + geneSetsIdx <- GSVA:::.filterAndMapGeneSets(param=param, filteredDataMatrix=R, verbose=FALSE) sco_R <- lapply(as.list(1:ncol(R)), function(j, R) { - if (anyNA(gsvarankspar)) + if (anyNA(param)) rnkstats <- GSVA:::.ranks2stats_nas(R[, j], sparse=FALSE) else rnkstats <- GSVA:::.ranks2stats(R[, j], sparse=FALSE) @@ -137,12 +143,12 @@ test_gsvaCcode <- function() { sco_R <- GSVA:::.gsva_score_genesets_Rimp(geneSetsIdx, decOrdStat=rnkstats$dos, symRnkStat=rnkstats$srs, - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar)) + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param)) }, R=R) sco_R <- do.call("cbind", sco_R) @@ -150,13 +156,13 @@ test_gsvaCcode <- function() { wna_env <- new.env() assign("w", FALSE, envir=wna_env) sco_C <- GSVA:::.gsva_score_genesets(R, geneSetsIdx, is.integer(R[1, 1]), - sparse=GSVA:::.get_sparse(gsvarankspar), - maxDiff=GSVA:::.get_maxDiff(gsvarankspar), - absRanking=GSVA:::.get_absRanking(gsvarankspar), - tau=GSVA:::.get_tau(gsvarankspar), - any_na=anyNA(gsvarankspar), - na_use=GSVA:::.get_NAuse(gsvarankspar), - minSize=GSVA:::get_minSize(gsvarankspar), + sparse=GSVA:::.get_sparse(param), + maxDiff=GSVA:::.get_maxDiff(param), + absRanking=GSVA:::.get_absRanking(param), + tau=GSVA:::.get_tau(param), + any_na=anyNA(param), + na_use=GSVA:::.get_NAuse(param), + minSize=GSVA:::get_minSize(param), wna_env=wna_env, verbose=FALSE) ## both approaches to calculate GSVA scores must give diff --git a/inst/unitTests/test_gsvaRanks.R b/inst/unitTests/test_gsvaRanks.R index d8584cc..2f459df 100644 --- a/inst/unitTests/test_gsvaRanks.R +++ b/inst/unitTests/test_gsvaRanks.R @@ -49,4 +49,24 @@ test_gsvaRanks <- function() { ggp <- gsvaEnrichment(gsvaranks, plot="ggplot") checkTrue(is(ggp, "ggplot")) checkTrue(identical(gsvaenrich$stats, ggp@data)) + + ## calculate again row-normalized expression values, + ## but this time in chunks + gsvarownorm1 <- gsvaRowNorm(gsvapar, first=1, last=5, verbose=FALSE) + gsvarownorm2 <- gsvaRowNorm(gsvapar, first=6, last=10, verbose=FALSE) + checkEqualsNumeric(gsvarownorm, rbind(gsvarownorm1, gsvarownorm2)) + checkException(gsvaRowNorm(gsvapar, first=10, last=6, verbose=FALSE)) + checkException(gsvaRowNorm(gsvapar, first=11, last=20, verbose=FALSE)) + + ## calculate again GSVA column ranks, but this time in chunks + gsvaranks1 <- gsvaColRanks(gsvarownorm, first=1, last=10, verbose=FALSE) + gsvaranks2 <- gsvaColRanks(gsvarownorm, first=11, last=20, verbose=FALSE) + gsvaranks3 <- gsvaColRanks(gsvarownorm, first=21, last=30, verbose=FALSE) + checkEqualsNumeric(gsvaranks, cbind(gsvaranks1, gsvaranks2, gsvaranks3)) + + ## calculate again GSVA scores from column ranks, but this time in chunks + gsva_es_c1 <- gsvaColScores(gsvaranks, first=1, last=10, verbose=FALSE) + gsva_es_c2 <- gsvaColScores(gsvaranks, first=11, last=20, verbose=FALSE) + gsva_es_c3 <- gsvaColScores(gsvaranks, first=21, last=30, verbose=FALSE) + checkEqualsNumeric(gsva_es1, cbind(gsva_es_c1, gsva_es_c2, gsva_es_c3)) } diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index f96d499..00af59d 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -5,9 +5,9 @@ \alias{gsvaParam-class} \alias{gsvaRanksParam-class} \alias{gsvaParam} -\alias{anyNA,gsvaParam-method} \alias{geneSets<-,gsvaParam,GsvaGeneSets-method} \alias{geneSets<-} +\alias{anyNA,gsvaParam-method} \title{\code{gsvaParam} class} \usage{ gsvaParam( @@ -30,9 +30,9 @@ gsvaParam( verbose = TRUE ) -\S4method{anyNA}{gsvaParam}(x, recursive = FALSE) - \S4method{geneSets}{gsvaParam,GsvaGeneSets}(object) <- value + +\S4method{anyNA}{gsvaParam}(x, recursive = FALSE) } \arguments{ \item{exprData}{The expression data set. Must be one of the classes @@ -163,16 +163,16 @@ when \code{ondisk="no"} it will attempt to load all the data in main memory.} decisions made by the software during parameter object construction when \code{verbose=TRUE} (default) and remains silent otherwise.} -\item{x}{An object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} - -\item{recursive}{Not used with \code{x} being an object of -class \code{\link[=gsvaParam-class]{gsvaParam}}.} - \item{object}{For the replacement method, an object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} \item{value}{For the replacement method, an object of the classes supported by \code{\link[=GsvaGeneSets-class]{GsvaGeneSets}}.} + +\item{x}{An object of class \code{\link[=gsvaParam-class]{gsvaParam}}.} + +\item{recursive}{Not used with \code{x} being an object of +class \code{\link[=gsvaParam-class]{gsvaParam}}.} } \value{ A new \code{\link[=gsvaParam-class]{gsvaParam}} object. diff --git a/man/gsvaRanks.Rd b/man/gsvaRanks.Rd index 3177e15..ecb2d76 100644 --- a/man/gsvaRanks.Rd +++ b/man/gsvaRanks.Rd @@ -13,6 +13,8 @@ param, verbose = TRUE, dropExistingAssays = FALSE, + first = NA_real_, + last = NA_real_, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) @@ -21,6 +23,8 @@ rowNormExprData, verbose = TRUE, dropExistingAssays = FALSE, + first = NA_real_, + last = NA_real_, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) @@ -29,6 +33,8 @@ rankExprData, geneSets, verbose = TRUE, + first = NA_real_, + last = NA_real_, BPPARAM = SerialParam(progressbar = verbose), maxmem = "auto" ) @@ -50,6 +56,18 @@ will be stored as a new assay in the same input object. When \code{dropExistingAssays=TRUE}, any existing assay will be dropped before adding the new assay with the row-normalized expression values or the column ranks.} +\item{first}{Numeric vector of length 1. First row, in the case of +\code{gsvaRowNorm()}, or first column, in the case of \code{gsvaColRanks()} and +\code{gsvaColScores()}, to which calculations should be restricted. By default, +\code{first=NA_real_}, which implies that calculations start at the first row or +column of the input expression data.} + +\item{last}{Numeric vector of length 1. Last row, in the case of +\code{gsvaRowNorm()}, or last column, in the case of \code{gsvaColRanks()} and +\code{gsvaColScores()}, to which calculations should be restricted. By default, +\code{last=NA_real_}, which implies that calculations end at the last row or +column of the input expression data.} + \item{BPPARAM}{An object of class \code{BiocParallelParam} specifying parameters related to the parallel execution of some of the tasks and calculations within this function.} From f3fccf56ae5198a85be7b7a2b3c336374b4be9df Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 20:11:02 +0200 Subject: [PATCH 08/12] Fixed saveHDF5GSVAranks() and loadHDF5GSVAranks() --- R/AllClasses.R | 19 ++- R/gsva.R | 18 ++- R/gsvaRanks_serialization.R | 164 +++++++++++++---------- inst/unitTests/test_ranksserialization.R | 46 ++++--- man/GsvaExprData-class.Rd | 20 ++- man/gsvaRanks_serialization.Rd | 39 ++++-- 6 files changed, 187 insertions(+), 119 deletions(-) diff --git a/R/AllClasses.R b/R/AllClasses.R index 700d70f..6cc5e67 100644 --- a/R/AllClasses.R +++ b/R/AllClasses.R @@ -6,14 +6,26 @@ #' Virtual superclass of expression data classes supported by `GSVA`. #' #' `GSVA` supports expression data matrices in a growing number of containers -#' and representations. This class union allows to store any of these in a slot -#' of another class as well as defining common methods for all of them. +#' and representations. This class union allows to store any of these in a slot +#' of another class as well as defining common methods for all of them. The +#' current list of supported classes is: +#' +#' * `matrix` +#' * `dgCMatrix` from package `Matrix` +#' * `SVT_SparseMatrix` from package `SparseArray` +#' * `DelayedMatrix` from package `DelayedArray` +#' * `HDF5Matrix` from package `HDF5Array` +#' * `ExpressionSet` from package `Biobase` +#' * `SummarizedExperiment` from package `SummarizedExperiment` +#' * `SingleCellExperiment` from package `SingleCellExperiment` +#' * `SpatialExperiment` from package `SpatialExperiment` #' #' @seealso #' [`matrix`], #' [`dgCMatrix`][Matrix::dgCMatrix-class], #' [`SVT_SparseMatrix`][SparseArray::SVT_SparseMatrix-class], #' [`DelayedMatrix`][DelayedArray::DelayedMatrix-class], +#' [`HDF5Array`][HDF5Array::HDF5Array-class], #' \code{\link[Biobase]{ExpressionSet}}, ### we are using the plain Rd above because ### #' [`ExpressionSet`][Biobase::ExpressionSet-class], @@ -22,8 +34,7 @@ ### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ #' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment-class], #' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class], -#' [`SpatialExperiment`][SpatialExperiment::SpatialExperiment-class], -#' [`HDF5Array`][HDF5Array::HDF5Array-class] +#' [`SpatialExperiment`][SpatialExperiment::SpatialExperiment-class] #' #' @importClassesFrom Matrix dgCMatrix #' @importClassesFrom Biobase ExpressionSet diff --git a/R/gsva.R b/R/gsva.R index 152095f..7daa762 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -967,10 +967,12 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", first, last, whdim=2, ondisk, verbose) + sparse <- .get_sparse(param) + if (sparse && !is_sparse(dataMatrix)) + sparse <- FALSE - gsvarnks <- .compute_gsva_ranks(Z=dataMatrix, - verbose=verbose, - BPPARAM=BPPARAM, + gsvarnks <- .compute_gsva_ranks(Z=dataMatrix, sparse=sparse, + verbose=verbose, BPPARAM=BPPARAM, maxmem=maxmem) rownames(gsvarnks) <- rownames(dataMatrix) @@ -1533,9 +1535,13 @@ compute.col.ranks <- function(Z, ties.method="last", drop.sparsity=FALSE, #' @importFrom cli cli_alert_info #' @importFrom cli cli_progress_done cli_abort -.compute_gsva_ranks <- function(Z, verbose, BPPARAM=NULL, maxmem=Inf) { - if (verbose) - cli_alert_info("Calculating column ranks") +.compute_gsva_ranks <- function(Z, sparse, verbose, BPPARAM=NULL, maxmem=Inf) { + if (verbose) { + if (sparse) + cli_alert_info("Calculating sparse column ranks") + else + cli_alert_info("Calculating column ranks") + } ## here 'ties.method="last"' allows one to obtain the result ## from 'order()' based on ranks diff --git a/R/gsvaRanks_serialization.R b/R/gsvaRanks_serialization.R index 8e53ad3..0778daa 100644 --- a/R/gsvaRanks_serialization.R +++ b/R/gsvaRanks_serialization.R @@ -2,13 +2,16 @@ #' #' @description The functions `saveHDF5GSVAranks` and `loadHDF5GSVAranks` can #' be used to save and load the GSVA rank values to/from disk, respectively. -#' The `saveHDF5GSVAranks` function takes a `gsvaRanksParam` object and saves -#' the rank values along with the relevant metadata to a specified directory. -#' The `loadHDF5GSVAranks` function reads the saved data from the specified -#' directory and reconstructs the `gsvaRanksParam` object with the rank values +#' The `saveHDF5GSVAranks()` function takes the output of [`gsvaColRanks`] as +#' input, and saves the rank values along with the relevant metadata to a +#' specified directory. The `loadHDF5GSVAranks()` function reads the saved data +#' from the specified directory and returns an object with the GSVA rank values #' and their corresponding metadata. #' -#' @param x A [`gsvaRanksParam-class`] object to save to disk. +#' @param rankExprData A column-rank expression data set obtained with +#' [`gsvaColRanks`]. Must be one of the classes supported by +#' [`GsvaExprData-class`]. For a list of these classes, see its help page +#' using `help(GsvaExprData)`. #' #' @param dir The path to the directory where to save or load the GSVA ranks #' data. @@ -21,8 +24,13 @@ #' #' @return For `saveHDF5GSVAranks`, the path to the directory where the data #' has been saved is returned invisibly. For `loadHDF5GSVAranks`, a -#' `gsvaRanksParam` object is returned containing the loaded GSVA rank values -#' and their corresponding metadata. +#' an object is returned containing the loaded GSVA rank values and their +#' corresponding metadata. If the saved ranks were originally stored in a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment] object +#' or one of its derived classes, then the returned object will be a +#' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment]. +#' Otherwise, the returned object will be a +#' [`DelayedMatrix`][DelayedArray::DelayedMatrix] object. #' #' @examples #' @@ -43,22 +51,25 @@ #' ## build GSVA parameter object #' gsvapar <- gsvaParam(y, geneSets) #' -#' ## calculate GSVA ranks -#' gsvarankspar <- gsvaRanks(gsvapar) +#' ## calculate row-normalized expression values +#' gsvarownorm <- gsvaRowNorm(gsvapar) +#' +#' ## calculate GSVA column ranks +#' gsvacolranks <- gsvaColRanks(gsvarownorm) #' #' ## calculate GSVA scores -#' es <- gsvaScores(gsvarankspar) +#' es <- gsvaColScores(gsvacolranks) #' #' ## save the GSVA ranks to disk #' dir <- tempfile() -#' saveHDF5GSVAranks(gsvarankspar, dir) +#' saveHDF5GSVAranks(gsvacolranks, dir) #' #' ## load the GSVA ranks from disk -#' loaded_gsvarankspar <- loadHDF5GSVAranks(dir) +#' loaded_gsvacolranks <- loadHDF5GSVAranks(dir) #' #' ## check that the loaded ranks provide the #' ## same scores as the original ranks -#' loaded_es <- gsvaScores(loaded_gsvarankspar) +#' loaded_es <- gsvaColScores(loaded_gsvacolranks) #' identical(es, loaded_es) #' #' @importFrom cli cli_abort @@ -69,35 +80,51 @@ #' @rdname gsvaRanks_serialization #' #' @export -saveHDF5GSVAranks <- function(x, dir, ...) { - if (!inherits(x, "gsvaRanksParam")) - cli_abort("The input object in 'x' must be of class 'gsvaRanksParam'") - - edata <- get_exprData(x) - if (is(edata, "SummarizedExperiment")) { - an <- assayNames(edata) - if (!"gsvaranks" %in% an) - cli_abort("Cannot find the ranks in the input object 'x'") +saveHDF5GSVAranks <- function(rankExprData, dir, ...) { + if (!is(rankExprData, "GsvaExprData")) { + msg <- paste("The input object in 'rankExprData' must a subclass of", + "'GsvaExprData'. See 'help(GsvaExprData)' for details.") + cli_abort(msg) + } - for (a in an) - if (a != "gsvaranks") - assay(edata, a) <- NULL - } else { + se <- rankExprData + if (!is(se, "SummarizedExperiment")) { + param <- .pull_param(rankExprData, "gsvaranks") + first <- last <- NA_real_ + whdim <- NA_integer_ annot <- NULL - if (!is.null(attributes(edata)$annotation) && - is(attributes(edata)$annotation, "GeneIdentifierType")) { - annot <- attributes(edata)$annotation - attributes(edata)$annotation <- NULL + if (!is.null(attributes(rankExprData)$annotation) && + is(attributes(rankExprData)$annotation, "GeneIdentifierType")) { + annot <- attributes(rankExprData)$annotation + attributes(rankExprData)$annotation <- NULL + } + if (!is.null(attributes(rankExprData)$restrict)) { + first <- attributes(rankExprData)$restrict$first + last <- attributes(rankExprData)$restrict$last + whdim <- attributes(rankExprData)$restrict$whdim + attributes(rankExprData)$restrict <- NULL } - edata <- SummarizedExperiment(assays=list(gsvaranks=edata)) + se <- SummarizedExperiment(assays=list(dummy=rankExprData)) if (!is.null(annot)) - gsvaAnnotation(edata) <- annot - } + gsvaAnnotation(se) <- annot + se <- wrapData(se, rankExprData, param, "gsvaranks", first=first, + last=last, whdim=whdim, dropAssays=TRUE) + + } else { ## 'SummarizedExperiment' object, remove all assays except 'gsvaranks' + an <- assayNames(se) + if (!"gsvaranks" %in% an) + cli_abort("Cannot find the ranks in the input object given in the", + "'rankExprData' parameter.") + if (is.null(metadata(se)$gsvaParam)) + cli_abort("Cannot find the GSVA parameters in the metadata of the", + "input object given in the 'rankExprData' parameter.") - metadata(edata) <- c(metadata(edata), - list(gsvaRanksParam=.gsvaParam_as_list(x))) + for (a in an) ## remove all assays except the one with the ranks + if (a != "gsvaranks") + assay(se, a) <- NULL + } - saveHDF5SummarizedExperiment(edata, dir, ...) + saveHDF5SummarizedExperiment(se, dir, ...) invisible(dir) } @@ -111,47 +138,36 @@ saveHDF5GSVAranks <- function(x, dir, ...) { #' @export loadHDF5GSVAranks <- function(dir, ...) { - x <- loadHDF5SummarizedExperiment(dir, ...) - rnksmdata <- metadata(x)$gsvaRanksParam - if (is.null(rnksmdata)) { - msg <- "The given directory does not contain valid GSVA ranks data" + rankscontainer <- loadHDF5SummarizedExperiment(dir, ...) + + an <- assayNames(rankscontainer) + if (!"gsvaranks" %in% an) + cli_abort("Cannot find the ranks in the loaded object.") + + if (is.null(metadata(rankscontainer)$gsvaParam)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "loaded GSVA ranks object.") cli_abort(c("x"=msg)) } - md <- metadata(x) - md$gsvaRanksParam <- NULL - metadata(x) <- md - if (is.null(rnksmdata$originalClassWasSE)) - cli_abort("Metadata is missing the original class information") - rnkscontainer <- x - if (!rnksmdata$originalClassWasSE) { - if (!"gsvaranks" %in% assayNames(x)) { - msg <- "The given directory does not contain valid GSVA ranks data" - cli_abort(c("x"=msg)) - } - rnkscontainer <- assay(x, "gsvaranks") - if (!is.null(gsvaAnnotation(x))) - gsvaAnnotation(rnkscontainer) <- gsvaAnnotation(x) + if (is.null(metadata(rankscontainer)$gsvaParam$originalClassWasSE)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "loaded GSVA ranks object.") + cli_abort(c("x"=msg)) + } + + if (!metadata(rankscontainer)$gsvaParam$originalClassWasSE) { + gsvapar <- metadata(rankscontainer)$gsvaParam + restrict <- metadata(rankscontainer)$restrict + annotation <- metadata(rankscontainer)$annotation + rankscontainer <- unwrapData(rankscontainer, "gsvaranks") + attr(rankscontainer, "gsvaParam") <- gsvapar + attr(rankscontainer, "assay") <- "gsvaranks" + if (!is.null(annotation)) + attr(rankscontainer, "geneIdType") <- annotation + if (!is.null(restrict)) + attr(rankscontainer, "restrict") <- restrict } - new("gsvaRanksParam", - exprData=rnkscontainer, - geneSets=rnksmdata$geneSets, - assay=rnksmdata$assay, - annotation=rnksmdata$annotation, - minSize=rnksmdata$minSize, - maxSize=rnksmdata$maxSize, - kcdf=rnksmdata$kcdf, - kcdfNoneMinSampleSize=rnksmdata$kcdfNoneMinSampleSize, - tau=rnksmdata$tau, - maxDiff=rnksmdata$maxDiff, - absRanking=rnksmdata$absRanking, - sparse=rnksmdata$sparse, - checkNA=rnksmdata$checkNA, - didCheckNA=rnksmdata$didCheckNA, - anyNA=rnksmdata$anyNA, - use=rnksmdata$use, - filterRows=rnksmdata$filterRows, - nzcount=rnksmdata$nzcount, - ondisk=rnksmdata$ondisk) + return(rankscontainer) } diff --git a/inst/unitTests/test_ranksserialization.R b/inst/unitTests/test_ranksserialization.R index 86df5c4..9c719e1 100644 --- a/inst/unitTests/test_ranksserialization.R +++ b/inst/unitTests/test_ranksserialization.R @@ -2,49 +2,61 @@ test_ranksserialization <- function() { message("Running unit tests for ranks serialization") + suppressPackageStartupMessages(library(Matrix)) suppressPackageStartupMessages(library(GSEABase)) suppressPackageStartupMessages(library(SummarizedExperiment)) p <- 10 ## number of genes n <- 30 ## number of samples - nGrp1 <- 15 ## number of samples in group 1 - nGrp2 <- n - nGrp1 ## number of samples in group 2 ## consider three disjoint gene sets gsets <- list(gset1=paste0("g", 1:3), gset2=paste0("g", 4:6), gset3=paste0("g", 7:10)) - ## sample data from a normal distribution with mean 0 and st.dev. 1 - y <- matrix(rnorm(n*p), nrow=p, ncol=n, - dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) - se <- SummarizedExperiment(assays=list(counts=y)) - gsvaAnnotation(se) <- SymbolIdentifier("org.Hs.eg.db") + ## build a random sparse count matrix with 85% sparsity + cnt <- integer(n*p) + idx <- sample(length(cnt), size=round(length(cnt)*0.15)) ## 85% sparsity + cnt[idx] <- rpois(length(idx), lambda=2)+1 + cnt <- matrix(cnt, nrow=p, ncol=n, + dimnames=list(paste("g", 1:p, sep="") , paste("s", 1:n, sep=""))) + cnt <- Matrix(cnt, sparse=TRUE) + + se <- SummarizedExperiment(assays=list(counts=cnt)) ## build GSVA parameter object - gsvapar <- gsvaParam(se, gsets) + gsvapar <- gsvaParam(se, gsets, verbose=FALSE) + + ## calculate row-normalized expression values + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) - ## calculate GSVA ranks - gsvarankspar <- gsvaRanks(gsvapar) + ## calculate GSVA column ranks + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) ## calculate GSVA scores - es <- gsvaScores(gsvarankspar) + es <- gsvaColScores(gsvacolranks, verbose=FALSE) ## save the GSVA ranks to disk dir <- tempfile() checkException(saveHDF5GSVAranks(gsvapar, dir)) - saveHDF5GSVAranks(gsvarankspar, dir) + saveHDF5GSVAranks(gsvacolranks, dir) ## load the GSVA ranks from disk - loaded_gsvarankspar <- loadHDF5GSVAranks(dir) + loaded_gsvacolranks <- loadHDF5GSVAranks(dir) ## check that the loaded ranks provide the ## same scores as the original ranks - loaded_es <- gsvaScores(loaded_gsvarankspar) + loaded_es <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) checkTrue(identical(es, loaded_es)) - ## tweak the input object to test the error handling of the loading function - assay(gsvarankspar@exprData, "gsvaranks") <- NULL - checkException(saveHDF5GSVAranks(gsvarankspar, dir)) + ## check it again saving and loading the ranks stored in a + ## non-'SummarizedExperiment' object + gsvapar <- gsvaParam(cnt, gsets, verbose=FALSE) + gsvarownorm <- gsvaRowNorm(gsvapar, verbose=FALSE) + gsvacolranks <- gsvaColRanks(gsvarownorm, verbose=FALSE) + saveHDF5GSVAranks(gsvacolranks, dir, replace=TRUE) + loaded_gsvacolranks <- loadHDF5GSVAranks(dir) + loaded_es <- gsvaColScores(loaded_gsvacolranks, verbose=FALSE) + checkEqualsNumeric(assay(es), loaded_es) } diff --git a/man/GsvaExprData-class.Rd b/man/GsvaExprData-class.Rd index 60e5188..ab85cbd 100644 --- a/man/GsvaExprData-class.Rd +++ b/man/GsvaExprData-class.Rd @@ -10,17 +10,29 @@ Virtual superclass of expression data classes supported by \code{GSVA}. } \details{ \code{GSVA} supports expression data matrices in a growing number of containers -and representations. This class union allows to store any of these in a slot -of another class as well as defining common methods for all of them. +and representations. This class union allows to store any of these in a slot +of another class as well as defining common methods for all of them. The +current list of supported classes is: +\itemize{ +\item \code{matrix} +\item \code{dgCMatrix} from package \code{Matrix} +\item \code{SVT_SparseMatrix} from package \code{SparseArray} +\item \code{DelayedMatrix} from package \code{DelayedArray} +\item \code{HDF5Matrix} from package \code{HDF5Array} +\item \code{ExpressionSet} from package \code{Biobase} +\item \code{SummarizedExperiment} from package \code{SummarizedExperiment} +\item \code{SingleCellExperiment} from package \code{SingleCellExperiment} +\item \code{SpatialExperiment} from package \code{SpatialExperiment} +} } \seealso{ \code{\link{matrix}}, \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}, \code{\link[SparseArray:SVT_SparseMatrix-class]{SVT_SparseMatrix}}, \code{\link[DelayedArray:DelayedMatrix-class]{DelayedMatrix}}, +\code{\link[HDF5Array:HDF5Array-class]{HDF5Array}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, \code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, -\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}}, -\code{\link[HDF5Array:HDF5Array-class]{HDF5Array}} +\code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}} } diff --git a/man/gsvaRanks_serialization.Rd b/man/gsvaRanks_serialization.Rd index abb5133..96caff5 100644 --- a/man/gsvaRanks_serialization.Rd +++ b/man/gsvaRanks_serialization.Rd @@ -5,12 +5,15 @@ \alias{loadHDF5GSVAranks} \title{Save/load GSVA rank values to disk using HDF5 format} \usage{ -saveHDF5GSVAranks(x, dir, ...) +saveHDF5GSVAranks(rankExprData, dir, ...) loadHDF5GSVAranks(dir, ...) } \arguments{ -\item{x}{A \code{\link[=gsvaRanksParam-class]{gsvaRanksParam}} object to save to disk.} +\item{rankExprData}{A column-rank expression data set obtained with +\code{\link{gsvaColRanks}}. Must be one of the classes supported by +\code{\link[=GsvaExprData-class]{GsvaExprData}}. For a list of these classes, see its help page +using \code{help(GsvaExprData)}.} \item{dir}{The path to the directory where to save or load the GSVA ranks data.} @@ -24,16 +27,21 @@ respectively.} \value{ For \code{saveHDF5GSVAranks}, the path to the directory where the data has been saved is returned invisibly. For \code{loadHDF5GSVAranks}, a -\code{gsvaRanksParam} object is returned containing the loaded GSVA rank values -and their corresponding metadata. +an object is returned containing the loaded GSVA rank values and their +corresponding metadata. If the saved ranks were originally stored in a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}} object +or one of its derived classes, then the returned object will be a +\code{\link[SummarizedExperiment:SummarizedExperiment]{SummarizedExperiment}}. +Otherwise, the returned object will be a +\code{\link[DelayedArray:DelayedMatrix]{DelayedMatrix}} object. } \description{ The functions \code{saveHDF5GSVAranks} and \code{loadHDF5GSVAranks} can be used to save and load the GSVA rank values to/from disk, respectively. -The \code{saveHDF5GSVAranks} function takes a \code{gsvaRanksParam} object and saves -the rank values along with the relevant metadata to a specified directory. -The \code{loadHDF5GSVAranks} function reads the saved data from the specified -directory and reconstructs the \code{gsvaRanksParam} object with the rank values +The \code{saveHDF5GSVAranks()} function takes the output of \code{\link{gsvaColRanks}} as +input, and saves the rank values along with the relevant metadata to a +specified directory. The \code{loadHDF5GSVAranks()} function reads the saved data +from the specified directory and returns an object with the GSVA rank values and their corresponding metadata. } \examples{ @@ -55,22 +63,25 @@ y <- matrix(rnorm(n*p), nrow=p, ncol=n, ## build GSVA parameter object gsvapar <- gsvaParam(y, geneSets) -## calculate GSVA ranks -gsvarankspar <- gsvaRanks(gsvapar) +## calculate row-normalized expression values +gsvarownorm <- gsvaRowNorm(gsvapar) + +## calculate GSVA column ranks +gsvacolranks <- gsvaColRanks(gsvarownorm) ## calculate GSVA scores -es <- gsvaScores(gsvarankspar) +es <- gsvaColScores(gsvacolranks) ## save the GSVA ranks to disk dir <- tempfile() -saveHDF5GSVAranks(gsvarankspar, dir) +saveHDF5GSVAranks(gsvacolranks, dir) ## load the GSVA ranks from disk -loaded_gsvarankspar <- loadHDF5GSVAranks(dir) +loaded_gsvacolranks <- loadHDF5GSVAranks(dir) ## check that the loaded ranks provide the ## same scores as the original ranks -loaded_es <- gsvaScores(loaded_gsvarankspar) +loaded_es <- gsvaColScores(loaded_gsvacolranks) identical(es, loaded_es) } From 64d5f0527c9c494e893502a6fc35f91d3b35be84 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 21:00:16 +0200 Subject: [PATCH 09/12] Fix whdim argument in call to .check_ondisk() Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- R/gsva.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gsva.R b/R/gsva.R index 7daa762..2b0a93b 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -961,7 +961,7 @@ setMethod("gsvaColRanks", signature(rowNormExprData="GsvaExprData"), maxmem <- .check_maxmem(param, assay="gsvarownr", maxmem=maxmem, verbose=verbose) ondisk <- .check_ondisk(param, assay="gsvarownr", first=first, - last=last, whdim=1, maxmem=maxmem, + last=last, whdim=2, maxmem=maxmem, verbose=verbose) dataMatrix <- .check_sparse_load_input_expr(dataMatrix, "GSVA", From 72e056b9031090e0fa478089fa50ce0335d04b0e Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 21:01:52 +0200 Subject: [PATCH 10/12] Fix parameter whdim in call to .check_ondisk() Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- R/gsva.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gsva.R b/R/gsva.R index 2b0a93b..8b1252a 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -1062,7 +1062,7 @@ setMethod("gsvaColScores", signature(rankExprData="GsvaExprData"), maxmem <- .check_maxmem(param, assay="gsvaranks", maxmem=maxmem, verbose=verbose) ondisk <- .check_ondisk(param, assay="gsvaranks", first=first, - last=last, whdim=1, maxmem=maxmem, + last=last, whdim=2, maxmem=maxmem, verbose=verbose) From 8178bcbba97d5f5cd0d2c27ae2b5137fa7eb5fc7 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 21:06:19 +0200 Subject: [PATCH 11/12] Missing comma in help page Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- R/gsva.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gsva.R b/R/gsva.R index 8b1252a..acb3434 100644 --- a/R/gsva.R +++ b/R/gsva.R @@ -324,7 +324,7 @@ setMethod("gsva", signature(param="gsvaParam"), ### Non-topic package-anchored link(s) in Rd file 'gsvaParam-class.Rd': ### ‘[Biobase:class.ExpressionSet]{ExpressionSet}’ #' [`SummarizedExperiment`][SummarizedExperiment::SummarizedExperiment-class], -#' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class] +#' [`SingleCellExperiment`][SingleCellExperiment::SingleCellExperiment-class], #' [`SpatialExperiment`][SpatialExperiment::SpatialExperiment-class] #' #' @references Hänzelmann, S., Castelo, R. and Guinney, J. GSVA: Gene set From 85eac2a6a5d999fe0e3c2cc956ead5263a93a4e5 Mon Sep 17 00:00:00 2001 From: Robert Castelo Date: Mon, 25 May 2026 21:07:10 +0200 Subject: [PATCH 12/12] Fixed in the documentation --- R/gsvaRanks_serialization.R | 16 ++++++++++------ man/gsvaParam-class.Rd | 2 +- vignettes/GSVA_scRNAseq.Rmd | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/R/gsvaRanks_serialization.R b/R/gsvaRanks_serialization.R index 0778daa..6a21f2b 100644 --- a/R/gsvaRanks_serialization.R +++ b/R/gsvaRanks_serialization.R @@ -112,12 +112,16 @@ saveHDF5GSVAranks <- function(rankExprData, dir, ...) { } else { ## 'SummarizedExperiment' object, remove all assays except 'gsvaranks' an <- assayNames(se) - if (!"gsvaranks" %in% an) - cli_abort("Cannot find the ranks in the input object given in the", - "'rankExprData' parameter.") - if (is.null(metadata(se)$gsvaParam)) - cli_abort("Cannot find the GSVA parameters in the metadata of the", - "input object given in the 'rankExprData' parameter.") + if (!"gsvaranks" %in% an) { + msg <- paste("Cannot find the ranks in the input object given in the", + "'rankExprData' parameter.") + cli_abort(c("x"=msg)) + } + if (is.null(metadata(se)$gsvaParam)) { + msg <- paste("Cannot find the GSVA parameters in the metadata of the", + "input object given in the 'rankExprData' parameter.") + cli_abort(c("x"=msg)) + } for (a in an) ## remove all assays except the one with the ranks if (a != "gsvaranks") diff --git a/man/gsvaParam-class.Rd b/man/gsvaParam-class.Rd index 00af59d..d1714eb 100644 --- a/man/gsvaParam-class.Rd +++ b/man/gsvaParam-class.Rd @@ -305,6 +305,6 @@ variation analysis for microarray and RNA-Seq data. \code{\link{matrix}}, \code{\link[Biobase]{ExpressionSet}}, \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}}, -\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}} +\code{\link[SingleCellExperiment:SingleCellExperiment-class]{SingleCellExperiment}}, \code{\link[SpatialExperiment:SpatialExperiment-class]{SpatialExperiment}} } diff --git a/vignettes/GSVA_scRNAseq.Rmd b/vignettes/GSVA_scRNAseq.Rmd index 76d3148..c7fe456 100644 --- a/vignettes/GSVA_scRNAseq.Rmd +++ b/vignettes/GSVA_scRNAseq.Rmd @@ -75,7 +75,7 @@ specific support consists of the following features: typical single-cell data set will result in longer running times and larger memory consumption than running it in the default sparse regime for this type of data. - * The GSVA algorithm can be run either at once through a called to `gsva()` + * The GSVA algorithm can be run either at once through a call to `gsva()` with a parameter object or in three steps: (1) row normalization with `gsvaRowNorm()`; (2) column rank transformation with `gsvaColRanks()`; and (3) column enrichment scores calculation with `gsvaColScores()`.