From 8c469a0d4ec3c7ec2ed46b3c8dee8686322d1db8 Mon Sep 17 00:00:00 2001 From: Raphiel Date: Thu, 20 Aug 2020 11:46:44 -0400 Subject: [PATCH 1/3] Fixed Wedin bounds Using AJIVE MATLAB code as a guide, I performed two updates to the Wedin re-sampling scheme. First, I calculated angles from section 2.2.1 of Feng AJIVE paper by taking arcsin of block_wedin_samples where appropriate. Second, corrected sampling of perpindicular basis vectors, since there were two many before. --- R/ajive_decomposition.R | 7 ++++--- R/wedin_bound.R | 18 ++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/R/ajive_decomposition.R b/R/ajive_decomposition.R index 61f8e81..a43e94a 100644 --- a/R/ajive_decomposition.R +++ b/R/ajive_decomposition.R @@ -149,7 +149,8 @@ get_joint_scores <- function(blocks, block_svd, initial_signal_ranks, sv_thresho num_samples=n_wedin_samples) } - wedin_samples <- K - colSums(block_wedin_samples) + temp <- cbind(colSums(cos(block_wedin_samples)^2), 1) + wedin_samples <- apply(temp, 1, max) wedin_svsq_threshold <- quantile(wedin_samples, .05) rank_sel_results[['wedin']] <- list(block_wedin_samples=block_wedin_samples, @@ -254,7 +255,7 @@ get_final_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ #' @param full Boolean. Do we compute the full J, I matrices or just the SVD (set to FALSE to save memory). get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ - if(is.na(joint_scores)){ + if(is.na(joint_scores[1,1])){ indiv_decomposition <- get_svd(X) } else{ X_orthog <- (diag(dim(X)[1]) - joint_scores %*% t(joint_scores)) %*% X @@ -284,7 +285,7 @@ get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRU #' @param full Boolean. Do we compute the full J, I matrices or just the SVD (set to FALSE to save memory). get_joint_decomposition <- function(X, joint_scores, full=TRUE){ - if(is.na(joint_scores)){ + if(is.na(joint_scores[1,1])){ joint_decomposition <- list(full= NA, rank=0, u=NA, d=NA, v=NA) return(joint_decomposition) } diff --git a/R/wedin_bound.R b/R/wedin_bound.R index fec0f8f..13d1a56 100644 --- a/R/wedin_bound.R +++ b/R/wedin_bound.R @@ -1,5 +1,5 @@ #' -#' Esimates the wedin bound for a data matrix with the resampling procedure. +#' Estimates the wedin bound for a data matrix with the resampling procedure. #' #' returns min(max(||E tilde{V}||, ||E^T tilde{U}||) / sigma_min(widetilde{A}), 1) from equation (7) #' @@ -15,16 +15,18 @@ get_wedin_bound_samples <- function(X, SVD, signal_rank, num_samples=1000){ U_sampled_norms <- wedin_bound_resampling(X=X, perp_basis=U_perp, right_vectors=FALSE, - num_samples=num_samples) + num_samples=num_samples, + signal_rank) V_perp <- SVD[['v']][ , -(1:signal_rank)] V_sampled_norms <- wedin_bound_resampling(X=X, perp_basis=V_perp, right_vectors=TRUE, - num_samples=num_samples) + num_samples=num_samples, + signal_rank) sigma_min <- SVD[['d']][signal_rank] - wedin_bound_samples <- mapply(function(u, v) min(max(u, v)/sigma_min, 1)^2, U_sampled_norms, V_sampled_norms) + wedin_bound_samples <- mapply(function(u, v) asin(min(max(u, v)/sigma_min, 1)), U_sampled_norms, V_sampled_norms) wedin_bound_samples } @@ -36,9 +38,10 @@ get_wedin_bound_samples <- function(X, SVD, signal_rank, num_samples=1000){ #' @param perp_basis Matrix. Either U_perp or V_perp: the remaining left/right singluar vectors of X after estimating the signal rank. #' @param right_vectors Boolean. Right multiplication or left multiplication. #' @param num_samples Integer. Number of vectors selected for resampling procedure. -wedin_bound_resampling <- function(X, perp_basis, right_vectors, num_samples=1000){ +#' @param signal_rank Integer. The estimated signal rank of X. +wedin_bound_resampling <- function(X, perp_basis, right_vectors, num_samples=1000, signal_rank){ - rank <- dim(perp_basis)[2] + rank <- signal_rank resampled_norms <- rep(0, num_samples) for(s in 1:num_samples){ @@ -57,8 +60,7 @@ wedin_bound_resampling <- function(X, perp_basis, right_vectors, num_samples=100 } # operator L2 norm - resampled_norms[s] <- norm(resampled_projection, - type='2') + resampled_norms[s] <- svd(resampled_projection)[['d']][1] } resampled_norms From 653d760dcbb7a3b4a89d7975241ab0a54064d80b Mon Sep 17 00:00:00 2001 From: Raphiel Date: Thu, 20 Aug 2020 20:32:28 -0400 Subject: [PATCH 2/3] fixed warning for is.na(joint_scores) --- R/ajive_decomposition.R | 6 +++--- DESCRIPTION => vignettes/DESCRIPTION | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename DESCRIPTION => vignettes/DESCRIPTION (100%) diff --git a/R/ajive_decomposition.R b/R/ajive_decomposition.R index a43e94a..fa8c785 100644 --- a/R/ajive_decomposition.R +++ b/R/ajive_decomposition.R @@ -255,7 +255,7 @@ get_final_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ #' @param full Boolean. Do we compute the full J, I matrices or just the SVD (set to FALSE to save memory). get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ - if(is.na(joint_scores[1,1])){ + if(sum(is.na(joint_scores))>0){ indiv_decomposition <- get_svd(X) } else{ X_orthog <- (diag(dim(X)[1]) - joint_scores %*% t(joint_scores)) %*% X @@ -277,7 +277,7 @@ get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRU indiv_decomposition } -#' Computes the joint matix for a data block. +#' Computes the joint matrix for a data block. #' #' #' @param X Matrix. The original data matrix. @@ -285,7 +285,7 @@ get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRU #' @param full Boolean. Do we compute the full J, I matrices or just the SVD (set to FALSE to save memory). get_joint_decomposition <- function(X, joint_scores, full=TRUE){ - if(is.na(joint_scores[1,1])){ + if(sum(is.na(joint_scores))>0){ joint_decomposition <- list(full= NA, rank=0, u=NA, d=NA, v=NA) return(joint_decomposition) } diff --git a/DESCRIPTION b/vignettes/DESCRIPTION similarity index 100% rename from DESCRIPTION rename to vignettes/DESCRIPTION From 29b8b79d12534a1850c07410522b07798a07e8e1 Mon Sep 17 00:00:00 2001 From: Raphiel Date: Mon, 22 Feb 2021 13:47:39 -0500 Subject: [PATCH 3/3] Updated final decomposition to ensure that initial signal ranks are used to obtain PC scores --- R/ajive_decomposition.R | 16 ++++++++-------- R/wedin_bound.R | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/ajive_decomposition.R b/R/ajive_decomposition.R index fa8c785..bb86de3 100644 --- a/R/ajive_decomposition.R +++ b/R/ajive_decomposition.R @@ -31,7 +31,7 @@ ajive <- function(blocks, initial_signal_ranks, full=TRUE, n_wedin_samples=1000, } if(sum(sapply(blocks,function(X) any(is.na(X)))) > 0){ - stop('Some of the blocks has missing data -- ajive expects full data matrices.') + stop('Some of the blocks have missing data -- ajive expects full data matrices.') } # TODO: should we give the option to center the data? @@ -72,7 +72,7 @@ ajive <- function(blocks, initial_signal_ranks, full=TRUE, n_wedin_samples=1000, for(k in 1:K){ block_decomps[[k]] <- get_final_decomposition(X=blocks[[k]], joint_scores=joint_scores, - sv_threshold=sv_thresholds[k]) + signal_rank=initial_signal_ranks[k]) } jive_decomposition <- list(block_decomps=block_decomps) @@ -228,12 +228,12 @@ get_joint_scores <- function(blocks, block_svd, initial_signal_ranks, sv_thresho #' #' @param X Matrix. The original data matrix. #' @param joint_scores Matrix. The basis of the joint space (dimension n x joint_rank). -#' @param sv_threshold Numeric vector. The singular value thresholds from the initial signal rank estimates. +#' @param signal_rank Initial signal rank #' @param full Boolean. Do we compute the full J, I matrices or just the SVDs (set to FALSE to save memory).. -get_final_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ +get_final_decomposition <- function(X, joint_scores, signal_rank, full=TRUE){ jive_decomposition <- list() - jive_decomposition[['individual']] <- get_individual_decomposition(X, joint_scores, sv_threshold, full) + jive_decomposition[['individual']] <- get_individual_decomposition(X, joint_scores, signal_rank, full) jive_decomposition[['joint']] <- get_joint_decomposition(X, joint_scores, full) @@ -251,9 +251,9 @@ get_final_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ #' #' @param X Matrix. The original data matrix. #' @param joint_scores Matrix. The basis of the joint space (dimension n x joint_rank). -#' @param sv_threshold Numeric vector. The singular value thresholds from the initial signal rank estimates. +#' @param signal_rank Initial signal rank #' @param full Boolean. Do we compute the full J, I matrices or just the SVD (set to FALSE to save memory). -get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRUE){ +get_individual_decomposition <- function(X, joint_scores, signal_rank, full=TRUE){ if(sum(is.na(joint_scores))>0){ indiv_decomposition <- get_svd(X) @@ -263,7 +263,7 @@ get_individual_decomposition <- function(X, joint_scores, sv_threshold, full=TRU } - indiv_rank <- sum(indiv_decomposition[['d']] > sv_threshold) + indiv_rank <- signal_rank - ncol(joint_scores) indiv_decomposition <- truncate_svd(decomposition=indiv_decomposition, rank=indiv_rank) diff --git a/R/wedin_bound.R b/R/wedin_bound.R index 13d1a56..fb1c73f 100644 --- a/R/wedin_bound.R +++ b/R/wedin_bound.R @@ -11,14 +11,14 @@ get_wedin_bound_samples <- function(X, SVD, signal_rank, num_samples=1000){ # resample for U and V - U_perp <- SVD[['u']][ , -(1:signal_rank)] + U_perp <- matrix(SVD[['u']][ , -(1:signal_rank)], nrow = nrow(SVD[['u']])) U_sampled_norms <- wedin_bound_resampling(X=X, perp_basis=U_perp, right_vectors=FALSE, num_samples=num_samples, signal_rank) - V_perp <- SVD[['v']][ , -(1:signal_rank)] + V_perp <- matrix(SVD[['v']][ , -(1:signal_rank)], nrow = nrow(SVD[['v']])) V_sampled_norms <- wedin_bound_resampling(X=X, perp_basis=V_perp, right_vectors=TRUE, @@ -60,7 +60,7 @@ wedin_bound_resampling <- function(X, perp_basis, right_vectors, num_samples=100 } # operator L2 norm - resampled_norms[s] <- svd(resampled_projection)[['d']][1] + resampled_norms[s] <- svd(resampled_projection, nu = 0, nv = 0)[['d']][1] } resampled_norms