From 288bd2cf5a5e48c23be1214e12633235229f3d69 Mon Sep 17 00:00:00 2001 From: Alberto Mercurio Date: Mon, 2 Feb 2026 00:07:22 +0100 Subject: [PATCH] Implement addition with UniformScaling --- src/core.jl | 3 ++ src/matrix_coo/matrix_coo.jl | 74 ++++++++++++++++++++++++++++ src/matrix_coo/matrix_coo_kernels.jl | 15 ++++++ src/matrix_csc/matrix_csc.jl | 51 +++++++++++++++++++ src/matrix_csc/matrix_csc_kernels.jl | 66 +++++++++++++++++++++++++ src/matrix_csr/matrix_csr.jl | 51 +++++++++++++++++++ src/matrix_csr/matrix_csr_kernels.jl | 66 +++++++++++++++++++++++++ test/shared/matrix_coo.jl | 49 ++++++++++++++++++ test/shared/matrix_csc.jl | 49 ++++++++++++++++++ test/shared/matrix_csr.jl | 49 ++++++++++++++++++ 10 files changed, 473 insertions(+) diff --git a/src/core.jl b/src/core.jl index 3ebda9d..4a79f46 100644 --- a/src/core.jl +++ b/src/core.jl @@ -86,9 +86,12 @@ function Base.:+(A::AbstractGenericSparseMatrix, B::DenseMatrix) end Base.:+(B::DenseMatrix, A::AbstractGenericSparseMatrix) = A + B +Base.:+(J::UniformScaling, A::AbstractGenericSparseMatrix) = A + J Base.:-(A::AbstractGenericSparseMatrix, B::Union{DenseMatrix, AbstractGenericSparseMatrix}) = A + (-B) +Base.:-(A::AbstractGenericSparseMatrix, J::UniformScaling) = A + (-J) Base.:-(B::DenseMatrix, A::AbstractGenericSparseMatrix) = B + (-A) +Base.:-(J::UniformScaling, A::AbstractGenericSparseMatrix) = J + (-A) LinearAlgebra.issymmetric(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A)) LinearAlgebra.issymmetric(A::Adjoint{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A)) diff --git a/src/matrix_coo/matrix_coo.jl b/src/matrix_coo/matrix_coo.jl index 1441de8..9bdb82d 100644 --- a/src/matrix_coo/matrix_coo.jl +++ b/src/matrix_coo/matrix_coo.jl @@ -383,6 +383,80 @@ function Base.:+(A::GenericSparseMatrixCOO, B::GenericSparseMatrixCOO) return dropzeros(C) end +# Addition with UniformScaling +function Base.:+(A::GenericSparseMatrixCOO{Tv, Ti}, J::UniformScaling) where {Tv, Ti} + m, n = size(A) + m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling.")) + λ = J.λ + iszero(λ) && return copy(A) + + backend = get_backend(A) + + # For COO, we simply append diagonal entries and then sum duplicates + nnz_A = nnz(A) + nnz_concat = nnz_A + n + + Tv_new = promote_type(Tv, typeof(λ)) + + # Allocate arrays for concatenated data + rowind_concat = similar(getrowind(A), nnz_concat) + colind_concat = similar(getcolind(A), nnz_concat) + nzval_concat = similar(nonzeros(A), Tv_new, nnz_concat) + + # Copy A's entries + rowind_concat[1:nnz_A] .= getrowind(A) + colind_concat[1:nnz_A] .= getcolind(A) + nzval_concat[1:nnz_A] .= nonzeros(A) + + # Add diagonal entries + kernel_fill! = kernel_fill_diagonal_coo!(backend) + kernel_fill!(rowind_concat, colind_concat, nzval_concat, Tv_new(λ), nnz_A; ndrange = (n,)) + + # Sort by (row, col) using keys + keys = similar(rowind_concat, Ti, nnz_concat) + kernel_make_keys! = kernel_make_csc_keys!(backend) + kernel_make_keys!(keys, rowind_concat, colind_concat, m; ndrange = (nnz_concat,)) + + # Sort using AcceleratedKernels + perm = _sortperm_AK(keys) + + # Apply permutation to get sorted arrays + rowind_sorted = rowind_concat[perm] + colind_sorted = colind_concat[perm] + nzval_sorted = nzval_concat[perm] + + # Mark unique entries (first occurrence of each (row, col) pair) + keep_mask = similar(rowind_sorted, Bool, nnz_concat) + kernel_mark! = kernel_mark_unique_coo!(backend) + kernel_mark!(keep_mask, rowind_sorted, colind_sorted, nnz_concat; ndrange = (nnz_concat,)) + + # Compute write indices using cumsum + write_indices = _cumsum_AK(keep_mask) + nnz_final = @allowscalar write_indices[nnz_concat] + + # Allocate final arrays + rowind_C = similar(getrowind(A), nnz_final) + colind_C = similar(getcolind(A), nnz_final) + nzval_C = similar(nonzeros(A), Tv_new, nnz_final) + + # Compact: merge duplicates by summing + kernel_compact! = kernel_compact_coo!(backend) + kernel_compact!( + rowind_C, + colind_C, + nzval_C, + rowind_sorted, + colind_sorted, + nzval_sorted, + write_indices, + nnz_concat; + ndrange = (nnz_concat,), + ) + + C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C) + return dropzeros(C) +end + # Addition with transpose/adjoint support for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCOO) for (wrapb, transb, conjb, unwrapb, whereT2) in diff --git a/src/matrix_coo/matrix_coo_kernels.jl b/src/matrix_coo/matrix_coo_kernels.jl index 6d8e08a..bd60173 100644 --- a/src/matrix_coo/matrix_coo_kernels.jl +++ b/src/matrix_coo/matrix_coo_kernels.jl @@ -308,3 +308,18 @@ end end end end + +# Kernel for adding diagonal entries for UniformScaling addition +@kernel inbounds = true function kernel_fill_diagonal_coo!( + rowind, + colind, + nzval, + @Const(λ), + @Const(offset), # Starting index in arrays + ) + i = @index(Global) + + rowind[offset + i] = i + colind[offset + i] = i + nzval[offset + i] = λ +end diff --git a/src/matrix_csc/matrix_csc.jl b/src/matrix_csc/matrix_csc.jl index 3052d00..1030670 100644 --- a/src/matrix_csc/matrix_csc.jl +++ b/src/matrix_csc/matrix_csc.jl @@ -356,6 +356,57 @@ function Base.:+(A::GenericSparseMatrixCSC, B::GenericSparseMatrixCSC) return dropzeros(C) end +# Addition with UniformScaling +function Base.:+(A::GenericSparseMatrixCSC{Tv, Ti}, J::UniformScaling) where {Tv, Ti} + m, n = size(A) + m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling.")) + λ = J.λ + iszero(λ) && return copy(A) + + backend = get_backend(A) + + # Count which columns need new diagonal entries + needs_diag = similar(getcolptr(A), Bool, n) + kernel_count! = kernel_count_diag_entries_csc!(backend) + kernel_count!(needs_diag, getcolptr(A), getrowval(A); ndrange = (n,)) + + # Count extra entries needed + extra_per_col = similar(getcolptr(A), n) + extra_per_col .= ifelse.(needs_diag, one(Ti), zero(Ti)) + + # Build new colptr + old_nnz_per_col = similar(getcolptr(A), n) + old_nnz_per_col .= view(getcolptr(A), 2:(n + 1)) .- view(getcolptr(A), 1:n) + + new_nnz_per_col = old_nnz_per_col .+ extra_per_col + cumsum_nnz = _cumsum_AK(new_nnz_per_col) + + new_colptr = similar(getcolptr(A), n + 1) + new_colptr[1:1] .= one(Ti) + new_colptr[2:(n + 1)] .= cumsum_nnz .+ one(Ti) + + # Allocate new arrays + total_nnz = @allowscalar new_colptr[n + 1] - one(Ti) + Tv_new = promote_type(Tv, typeof(λ)) + new_rowval = similar(getrowval(A), total_nnz) + new_nzval = similar(nonzeros(A), Tv_new, total_nnz) + + # Fill the new arrays + kernel_add! = kernel_add_uniformscaling_csc!(backend) + kernel_add!( + new_rowval, + new_nzval, + new_colptr, + getcolptr(A), + getrowval(A), + nonzeros(A), + Tv_new(λ); + ndrange = (n,), + ) + + return GenericSparseMatrixCSC(m, n, new_colptr, new_rowval, new_nzval) +end + # Addition with transpose/adjoint support for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCSC) for (wrapb, transb, conjb, unwrapb, whereT2) in diff --git a/src/matrix_csc/matrix_csc_kernels.jl b/src/matrix_csc/matrix_csc_kernels.jl index 11f2a77..e1594df 100644 --- a/src/matrix_csc/matrix_csc_kernels.jl +++ b/src/matrix_csc/matrix_csc_kernels.jl @@ -443,3 +443,69 @@ end end end end + +# Kernel for counting extra diagonal entries needed for UniformScaling addition +# For each column col, checks if the diagonal entry is not already stored +@kernel inbounds = true function kernel_count_diag_entries_csc!( + needs_diag, + @Const(colptr), + @Const(rowval), + ) + col = @index(Global) + + # Check if diagonal entry exists in this column + diag_found = false + for j in colptr[col]:(colptr[col + 1] - 1) + if rowval[j] == col + diag_found = true + break + end + end + + needs_diag[col] = !diag_found +end + +# Kernel for merging CSC matrix with diagonal (UniformScaling addition) +# Copies entries from A and inserts/adds λ to diagonal entries +@kernel inbounds = true function kernel_add_uniformscaling_csc!( + new_rowval, + new_nzval, + @Const(new_colptr), + @Const(old_colptr), + @Const(old_rowval), + @Const(old_nzval), + @Const(λ), + ) + col = @index(Global) + write_idx = new_colptr[col] + diag_written = false + + for j in old_colptr[col]:(old_colptr[col + 1] - 1) + row = old_rowval[j] + + # Insert diagonal before this entry if needed + if !diag_written && row > col + new_rowval[write_idx] = col + new_nzval[write_idx] = λ + write_idx += 1 + diag_written = true + end + + # Copy this entry, adding λ if it's the diagonal + new_rowval[write_idx] = row + if row == col + new_nzval[write_idx] = old_nzval[j] + λ + diag_written = true + else + new_nzval[write_idx] = old_nzval[j] + end + write_idx += 1 + end + + # If diagonal not yet written (all entries were before the diagonal position, + # or column had no entries), write it at the end + if !diag_written + new_rowval[write_idx] = col + new_nzval[write_idx] = λ + end +end diff --git a/src/matrix_csr/matrix_csr.jl b/src/matrix_csr/matrix_csr.jl index 7a98738..5cc9945 100644 --- a/src/matrix_csr/matrix_csr.jl +++ b/src/matrix_csr/matrix_csr.jl @@ -354,6 +354,57 @@ function Base.:+(A::GenericSparseMatrixCSR, B::GenericSparseMatrixCSR) return dropzeros(C) end +# Addition with UniformScaling +function Base.:+(A::GenericSparseMatrixCSR{Tv, Ti}, J::UniformScaling) where {Tv, Ti} + m, n = size(A) + m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling.")) + λ = J.λ + iszero(λ) && return copy(A) + + backend = get_backend(A) + + # Count which rows need new diagonal entries + needs_diag = similar(getrowptr(A), Bool, m) + kernel_count! = kernel_count_diag_entries_csr!(backend) + kernel_count!(needs_diag, getrowptr(A), getcolval(A); ndrange = (m,)) + + # Count extra entries needed + extra_per_row = similar(getrowptr(A), m) + extra_per_row .= ifelse.(needs_diag, one(Ti), zero(Ti)) + + # Build new rowptr + old_nnz_per_row = similar(getrowptr(A), m) + old_nnz_per_row .= view(getrowptr(A), 2:(m + 1)) .- view(getrowptr(A), 1:m) + + new_nnz_per_row = old_nnz_per_row .+ extra_per_row + cumsum_nnz = _cumsum_AK(new_nnz_per_row) + + new_rowptr = similar(getrowptr(A), m + 1) + new_rowptr[1:1] .= one(Ti) + new_rowptr[2:(m + 1)] .= cumsum_nnz .+ one(Ti) + + # Allocate new arrays + total_nnz = @allowscalar new_rowptr[m + 1] - one(Ti) + Tv_new = promote_type(Tv, typeof(λ)) + new_colval = similar(getcolval(A), total_nnz) + new_nzval = similar(nonzeros(A), Tv_new, total_nnz) + + # Fill the new arrays + kernel_add! = kernel_add_uniformscaling_csr!(backend) + kernel_add!( + new_colval, + new_nzval, + new_rowptr, + getrowptr(A), + getcolval(A), + nonzeros(A), + Tv_new(λ); + ndrange = (m,), + ) + + return GenericSparseMatrixCSR(m, n, new_rowptr, new_colval, new_nzval) +end + # Addition with transpose/adjoint support for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCSR) for (wrapb, transb, conjb, unwrapb, whereT2) in diff --git a/src/matrix_csr/matrix_csr_kernels.jl b/src/matrix_csr/matrix_csr_kernels.jl index d849730..c6799d3 100644 --- a/src/matrix_csr/matrix_csr_kernels.jl +++ b/src/matrix_csr/matrix_csr_kernels.jl @@ -441,3 +441,69 @@ end end end end + +# Kernel for counting extra diagonal entries needed for UniformScaling addition +# For each row, checks if the diagonal entry is not already stored +@kernel inbounds = true function kernel_count_diag_entries_csr!( + needs_diag, + @Const(rowptr), + @Const(colval), + ) + row = @index(Global) + + # Check if diagonal entry exists in this row + diag_found = false + for j in rowptr[row]:(rowptr[row + 1] - 1) + if colval[j] == row + diag_found = true + break + end + end + + needs_diag[row] = !diag_found +end + +# Kernel for merging CSR matrix with diagonal (UniformScaling addition) +# Copies entries from A and inserts/adds λ to diagonal entries +@kernel inbounds = true function kernel_add_uniformscaling_csr!( + new_colval, + new_nzval, + @Const(new_rowptr), + @Const(old_rowptr), + @Const(old_colval), + @Const(old_nzval), + @Const(λ), + ) + row = @index(Global) + write_idx = new_rowptr[row] + diag_written = false + + for j in old_rowptr[row]:(old_rowptr[row + 1] - 1) + col = old_colval[j] + + # Insert diagonal before this entry if needed + if !diag_written && col > row + new_colval[write_idx] = row + new_nzval[write_idx] = λ + write_idx += 1 + diag_written = true + end + + # Copy this entry, adding λ if it's the diagonal + new_colval[write_idx] = col + if col == row + new_nzval[write_idx] = old_nzval[j] + λ + diag_written = true + else + new_nzval[write_idx] = old_nzval[j] + end + write_idx += 1 + end + + # If diagonal not yet written (all entries were before the diagonal position, + # or row had no entries), write it at the end + if !diag_written + new_colval[write_idx] = row + new_nzval[write_idx] = λ + end +end diff --git a/test/shared/matrix_coo.jl b/test/shared/matrix_coo.jl index 8803ea0..f23143a 100644 --- a/test/shared/matrix_coo.jl +++ b/test/shared/matrix_coo.jl @@ -233,6 +233,55 @@ function shared_test_linearalgebra_matrix_coo( end end + @testset "UniformScaling Addition" begin + for T in (float_types..., complex_types...) + # Test with square matrix + A_sq = sprand(T, 20, 20, 0.2) + dA_sq = adapt(op, GenericSparseMatrixCOO(A_sq)) + + # Test A + I (identity) + result_I = dA_sq + I + expected_I = A_sq + I + @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I) + + # Test I + A (identity) + result_I2 = I + dA_sq + @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I) + + # Test with scaled identity + α = T <: Complex ? T(2.0 + 1.0im) : T(3.0) + result_αI = dA_sq + (α * I) + expected_αI = A_sq + (α * I) + @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI) + + # Test subtraction + result_sub = dA_sq - (α * I) + expected_sub = A_sq - (α * I) + @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub) + + # Test J - A + result_sub2 = (α * I) - dA_sq + expected_sub2 = (α * I) - A_sq + @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2) + + # Test with non-square matrix throws DimensionMismatch + A_nonsq = sprand(T, 30, 20, 0.2) + dA_nonsq = adapt(op, GenericSparseMatrixCOO(A_nonsq)) + @test_throws DimensionMismatch dA_nonsq + I + + # Test with zero λ (should return copy) + result_zero = dA_sq + (zero(T) * I) + @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq) + + # Test with sparse matrix that has no diagonal entries + A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3) + dA_nodiag = adapt(op, GenericSparseMatrixCOO(A_nodiag)) + result_nodiag = dA_nodiag + I + expected_nodiag = A_nodiag + I + @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag) + end + end + @testset "Matrix-Scalar, Matrix-Vector and Matrix-Matrix multiplication" begin for T in (int_types..., float_types..., complex_types...) for (op_A, op_B) in Iterators.product( diff --git a/test/shared/matrix_csc.jl b/test/shared/matrix_csc.jl index 1edcfc2..eb58d09 100644 --- a/test/shared/matrix_csc.jl +++ b/test/shared/matrix_csc.jl @@ -231,6 +231,55 @@ function shared_test_linearalgebra_matrix_csc( end end + @testset "UniformScaling Addition" begin + for T in (float_types..., complex_types...) + # Test with square matrix + A_sq = sprand(T, 20, 20, 0.2) + dA_sq = adapt(op, GenericSparseMatrixCSC(A_sq)) + + # Test A + I (identity) + result_I = dA_sq + I + expected_I = A_sq + I + @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I) + + # Test I + A (identity) + result_I2 = I + dA_sq + @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I) + + # Test with scaled identity + α = T <: Complex ? T(2.0 + 1.0im) : T(3.0) + result_αI = dA_sq + (α * I) + expected_αI = A_sq + (α * I) + @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI) + + # Test subtraction + result_sub = dA_sq - (α * I) + expected_sub = A_sq - (α * I) + @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub) + + # Test J - A + result_sub2 = (α * I) - dA_sq + expected_sub2 = (α * I) - A_sq + @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2) + + # Test with non-square matrix throws DimensionMismatch + A_nonsq = sprand(T, 30, 20, 0.2) + dA_nonsq = adapt(op, GenericSparseMatrixCSC(A_nonsq)) + @test_throws DimensionMismatch dA_nonsq + I + + # Test with zero λ (should return copy) + result_zero = dA_sq + (zero(T) * I) + @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq) + + # Test with sparse matrix that has no diagonal entries + A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3) + dA_nodiag = adapt(op, GenericSparseMatrixCSC(A_nodiag)) + result_nodiag = dA_nodiag + I + expected_nodiag = A_nodiag + I + @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag) + end + end + @testset "Matrix-Scalar, Matrix-Vector and Matrix-Matrix multiplication" begin for T in (int_types..., float_types..., complex_types...) for (op_A, op_B) in Iterators.product( diff --git a/test/shared/matrix_csr.jl b/test/shared/matrix_csr.jl index bd95a62..1e26d12 100644 --- a/test/shared/matrix_csr.jl +++ b/test/shared/matrix_csr.jl @@ -229,6 +229,55 @@ function shared_test_linearalgebra_matrix_csr( end end + @testset "UniformScaling Addition" begin + for T in (float_types..., complex_types...) + # Test with square matrix + A_sq = sprand(T, 20, 20, 0.2) + dA_sq = adapt(op, GenericSparseMatrixCSR(A_sq)) + + # Test A + I (identity) + result_I = dA_sq + I + expected_I = A_sq + I + @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I) + + # Test I + A (identity) + result_I2 = I + dA_sq + @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I) + + # Test with scaled identity + α = T <: Complex ? T(2.0 + 1.0im) : T(3.0) + result_αI = dA_sq + (α * I) + expected_αI = A_sq + (α * I) + @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI) + + # Test subtraction + result_sub = dA_sq - (α * I) + expected_sub = A_sq - (α * I) + @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub) + + # Test J - A + result_sub2 = (α * I) - dA_sq + expected_sub2 = (α * I) - A_sq + @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2) + + # Test with non-square matrix throws DimensionMismatch + A_nonsq = sprand(T, 30, 20, 0.2) + dA_nonsq = adapt(op, GenericSparseMatrixCSR(A_nonsq)) + @test_throws DimensionMismatch dA_nonsq + I + + # Test with zero λ (should return copy) + result_zero = dA_sq + (zero(T) * I) + @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq) + + # Test with sparse matrix that has no diagonal entries + A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3) + dA_nodiag = adapt(op, GenericSparseMatrixCSR(A_nodiag)) + result_nodiag = dA_nodiag + I + expected_nodiag = A_nodiag + I + @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag) + end + end + @testset "Matrix-Vector multiplication" begin for T in (int_types..., float_types..., complex_types...) for (op_A, op_B) in Iterators.product(