From 288bd2cf5a5e48c23be1214e12633235229f3d69 Mon Sep 17 00:00:00 2001
From: Alberto Mercurio <alberto.mercurio96@gmail.com>
Date: Mon, 2 Feb 2026 00:07:22 +0100
Subject: [PATCH] Implement addition with UniformScaling

---
 src/core.jl                          |  3 ++
 src/matrix_coo/matrix_coo.jl         | 74 ++++++++++++++++++++++++++++
 src/matrix_coo/matrix_coo_kernels.jl | 15 ++++++
 src/matrix_csc/matrix_csc.jl         | 51 +++++++++++++++++++
 src/matrix_csc/matrix_csc_kernels.jl | 66 +++++++++++++++++++++++++
 src/matrix_csr/matrix_csr.jl         | 51 +++++++++++++++++++
 src/matrix_csr/matrix_csr_kernels.jl | 66 +++++++++++++++++++++++++
 test/shared/matrix_coo.jl            | 49 ++++++++++++++++++
 test/shared/matrix_csc.jl            | 49 ++++++++++++++++++
 test/shared/matrix_csr.jl            | 49 ++++++++++++++++++
 10 files changed, 473 insertions(+)

diff --git a/src/core.jl b/src/core.jl
index 3ebda9d..4a79f46 100644
--- a/src/core.jl
+++ b/src/core.jl
@@ -86,9 +86,12 @@ function Base.:+(A::AbstractGenericSparseMatrix, B::DenseMatrix)
 end
 
 Base.:+(B::DenseMatrix, A::AbstractGenericSparseMatrix) = A + B
+Base.:+(J::UniformScaling, A::AbstractGenericSparseMatrix) = A + J
 
 Base.:-(A::AbstractGenericSparseMatrix, B::Union{DenseMatrix, AbstractGenericSparseMatrix}) = A + (-B)
+Base.:-(A::AbstractGenericSparseMatrix, J::UniformScaling) = A + (-J)
 Base.:-(B::DenseMatrix, A::AbstractGenericSparseMatrix) = B + (-A)
+Base.:-(J::UniformScaling, A::AbstractGenericSparseMatrix) = J + (-A)
 
 LinearAlgebra.issymmetric(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
 LinearAlgebra.issymmetric(A::Adjoint{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
diff --git a/src/matrix_coo/matrix_coo.jl b/src/matrix_coo/matrix_coo.jl
index 1441de8..9bdb82d 100644
--- a/src/matrix_coo/matrix_coo.jl
+++ b/src/matrix_coo/matrix_coo.jl
@@ -383,6 +383,80 @@ function Base.:+(A::GenericSparseMatrixCOO, B::GenericSparseMatrixCOO)
     return dropzeros(C)
 end
 
+# Addition with UniformScaling
+function Base.:+(A::GenericSparseMatrixCOO{Tv, Ti}, J::UniformScaling) where {Tv, Ti}
+    m, n = size(A)
+    m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling."))
+    λ = J.λ
+    iszero(λ) && return copy(A)
+
+    backend = get_backend(A)
+
+    # For COO, we simply append diagonal entries and then sum duplicates
+    nnz_A = nnz(A)
+    nnz_concat = nnz_A + n
+
+    Tv_new = promote_type(Tv, typeof(λ))
+
+    # Allocate arrays for concatenated data
+    rowind_concat = similar(getrowind(A), nnz_concat)
+    colind_concat = similar(getcolind(A), nnz_concat)
+    nzval_concat = similar(nonzeros(A), Tv_new, nnz_concat)
+
+    # Copy A's entries
+    rowind_concat[1:nnz_A] .= getrowind(A)
+    colind_concat[1:nnz_A] .= getcolind(A)
+    nzval_concat[1:nnz_A] .= nonzeros(A)
+
+    # Add diagonal entries
+    kernel_fill! = kernel_fill_diagonal_coo!(backend)
+    kernel_fill!(rowind_concat, colind_concat, nzval_concat, Tv_new(λ), nnz_A; ndrange = (n,))
+
+    # Sort by (row, col) using keys
+    keys = similar(rowind_concat, Ti, nnz_concat)
+    kernel_make_keys! = kernel_make_csc_keys!(backend)
+    kernel_make_keys!(keys, rowind_concat, colind_concat, m; ndrange = (nnz_concat,))
+
+    # Sort using AcceleratedKernels
+    perm = _sortperm_AK(keys)
+
+    # Apply permutation to get sorted arrays
+    rowind_sorted = rowind_concat[perm]
+    colind_sorted = colind_concat[perm]
+    nzval_sorted = nzval_concat[perm]
+
+    # Mark unique entries (first occurrence of each (row, col) pair)
+    keep_mask = similar(rowind_sorted, Bool, nnz_concat)
+    kernel_mark! = kernel_mark_unique_coo!(backend)
+    kernel_mark!(keep_mask, rowind_sorted, colind_sorted, nnz_concat; ndrange = (nnz_concat,))
+
+    # Compute write indices using cumsum
+    write_indices = _cumsum_AK(keep_mask)
+    nnz_final = @allowscalar write_indices[nnz_concat]
+
+    # Allocate final arrays
+    rowind_C = similar(getrowind(A), nnz_final)
+    colind_C = similar(getcolind(A), nnz_final)
+    nzval_C = similar(nonzeros(A), Tv_new, nnz_final)
+
+    # Compact: merge duplicates by summing
+    kernel_compact! = kernel_compact_coo!(backend)
+    kernel_compact!(
+        rowind_C,
+        colind_C,
+        nzval_C,
+        rowind_sorted,
+        colind_sorted,
+        nzval_sorted,
+        write_indices,
+        nnz_concat;
+        ndrange = (nnz_concat,),
+    )
+
+    C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
+    return dropzeros(C)
+end
+
 # Addition with transpose/adjoint support
 for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCOO)
     for (wrapb, transb, conjb, unwrapb, whereT2) in
diff --git a/src/matrix_coo/matrix_coo_kernels.jl b/src/matrix_coo/matrix_coo_kernels.jl
index 6d8e08a..bd60173 100644
--- a/src/matrix_coo/matrix_coo_kernels.jl
+++ b/src/matrix_coo/matrix_coo_kernels.jl
@@ -308,3 +308,18 @@ end
         end
     end
 end
+
+# Kernel for adding diagonal entries for UniformScaling addition
+@kernel inbounds = true function kernel_fill_diagonal_coo!(
+        rowind,
+        colind,
+        nzval,
+        @Const(λ),
+        @Const(offset),  # Starting index in arrays
+    )
+    i = @index(Global)
+
+    rowind[offset + i] = i
+    colind[offset + i] = i
+    nzval[offset + i] = λ
+end
diff --git a/src/matrix_csc/matrix_csc.jl b/src/matrix_csc/matrix_csc.jl
index 3052d00..1030670 100644
--- a/src/matrix_csc/matrix_csc.jl
+++ b/src/matrix_csc/matrix_csc.jl
@@ -356,6 +356,57 @@ function Base.:+(A::GenericSparseMatrixCSC, B::GenericSparseMatrixCSC)
     return dropzeros(C)
 end
 
+# Addition with UniformScaling
+function Base.:+(A::GenericSparseMatrixCSC{Tv, Ti}, J::UniformScaling) where {Tv, Ti}
+    m, n = size(A)
+    m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling."))
+    λ = J.λ
+    iszero(λ) && return copy(A)
+
+    backend = get_backend(A)
+
+    # Count which columns need new diagonal entries
+    needs_diag = similar(getcolptr(A), Bool, n)
+    kernel_count! = kernel_count_diag_entries_csc!(backend)
+    kernel_count!(needs_diag, getcolptr(A), getrowval(A); ndrange = (n,))
+
+    # Count extra entries needed
+    extra_per_col = similar(getcolptr(A), n)
+    extra_per_col .= ifelse.(needs_diag, one(Ti), zero(Ti))
+
+    # Build new colptr
+    old_nnz_per_col = similar(getcolptr(A), n)
+    old_nnz_per_col .= view(getcolptr(A), 2:(n + 1)) .- view(getcolptr(A), 1:n)
+
+    new_nnz_per_col = old_nnz_per_col .+ extra_per_col
+    cumsum_nnz = _cumsum_AK(new_nnz_per_col)
+
+    new_colptr = similar(getcolptr(A), n + 1)
+    new_colptr[1:1] .= one(Ti)
+    new_colptr[2:(n + 1)] .= cumsum_nnz .+ one(Ti)
+
+    # Allocate new arrays
+    total_nnz = @allowscalar new_colptr[n + 1] - one(Ti)
+    Tv_new = promote_type(Tv, typeof(λ))
+    new_rowval = similar(getrowval(A), total_nnz)
+    new_nzval = similar(nonzeros(A), Tv_new, total_nnz)
+
+    # Fill the new arrays
+    kernel_add! = kernel_add_uniformscaling_csc!(backend)
+    kernel_add!(
+        new_rowval,
+        new_nzval,
+        new_colptr,
+        getcolptr(A),
+        getrowval(A),
+        nonzeros(A),
+        Tv_new(λ);
+        ndrange = (n,),
+    )
+
+    return GenericSparseMatrixCSC(m, n, new_colptr, new_rowval, new_nzval)
+end
+
 # Addition with transpose/adjoint support
 for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCSC)
     for (wrapb, transb, conjb, unwrapb, whereT2) in
diff --git a/src/matrix_csc/matrix_csc_kernels.jl b/src/matrix_csc/matrix_csc_kernels.jl
index 11f2a77..e1594df 100644
--- a/src/matrix_csc/matrix_csc_kernels.jl
+++ b/src/matrix_csc/matrix_csc_kernels.jl
@@ -443,3 +443,69 @@ end
         end
     end
 end
+
+# Kernel for counting extra diagonal entries needed for UniformScaling addition
+# For each column col, checks if the diagonal entry is not already stored
+@kernel inbounds = true function kernel_count_diag_entries_csc!(
+        needs_diag,
+        @Const(colptr),
+        @Const(rowval),
+    )
+    col = @index(Global)
+
+    # Check if diagonal entry exists in this column
+    diag_found = false
+    for j in colptr[col]:(colptr[col + 1] - 1)
+        if rowval[j] == col
+            diag_found = true
+            break
+        end
+    end
+
+    needs_diag[col] = !diag_found
+end
+
+# Kernel for merging CSC matrix with diagonal (UniformScaling addition)
+# Copies entries from A and inserts/adds λ to diagonal entries
+@kernel inbounds = true function kernel_add_uniformscaling_csc!(
+        new_rowval,
+        new_nzval,
+        @Const(new_colptr),
+        @Const(old_colptr),
+        @Const(old_rowval),
+        @Const(old_nzval),
+        @Const(λ),
+    )
+    col = @index(Global)
+    write_idx = new_colptr[col]
+    diag_written = false
+
+    for j in old_colptr[col]:(old_colptr[col + 1] - 1)
+        row = old_rowval[j]
+
+        # Insert diagonal before this entry if needed
+        if !diag_written && row > col
+            new_rowval[write_idx] = col
+            new_nzval[write_idx] = λ
+            write_idx += 1
+            diag_written = true
+        end
+
+        # Copy this entry, adding λ if it's the diagonal
+        new_rowval[write_idx] = row
+        if row == col
+            new_nzval[write_idx] = old_nzval[j] + λ
+            diag_written = true
+        else
+            new_nzval[write_idx] = old_nzval[j]
+        end
+        write_idx += 1
+    end
+
+    # If diagonal not yet written (all entries were before the diagonal position,
+    # or column had no entries), write it at the end
+    if !diag_written
+        new_rowval[write_idx] = col
+        new_nzval[write_idx] = λ
+    end
+end
diff --git a/src/matrix_csr/matrix_csr.jl b/src/matrix_csr/matrix_csr.jl
index 7a98738..5cc9945 100644
--- a/src/matrix_csr/matrix_csr.jl
+++ b/src/matrix_csr/matrix_csr.jl
@@ -354,6 +354,57 @@ function Base.:+(A::GenericSparseMatrixCSR, B::GenericSparseMatrixCSR)
     return dropzeros(C)
 end
 
+# Addition with UniformScaling
+function Base.:+(A::GenericSparseMatrixCSR{Tv, Ti}, J::UniformScaling) where {Tv, Ti}
+    m, n = size(A)
+    m == n || throw(DimensionMismatch("Matrix must be square to add UniformScaling."))
+    λ = J.λ
+    iszero(λ) && return copy(A)
+
+    backend = get_backend(A)
+
+    # Count which rows need new diagonal entries
+    needs_diag = similar(getrowptr(A), Bool, m)
+    kernel_count! = kernel_count_diag_entries_csr!(backend)
+    kernel_count!(needs_diag, getrowptr(A), getcolval(A); ndrange = (m,))
+
+    # Count extra entries needed
+    extra_per_row = similar(getrowptr(A), m)
+    extra_per_row .= ifelse.(needs_diag, one(Ti), zero(Ti))
+
+    # Build new rowptr
+    old_nnz_per_row = similar(getrowptr(A), m)
+    old_nnz_per_row .= view(getrowptr(A), 2:(m + 1)) .- view(getrowptr(A), 1:m)
+
+    new_nnz_per_row = old_nnz_per_row .+ extra_per_row
+    cumsum_nnz = _cumsum_AK(new_nnz_per_row)
+
+    new_rowptr = similar(getrowptr(A), m + 1)
+    new_rowptr[1:1] .= one(Ti)
+    new_rowptr[2:(m + 1)] .= cumsum_nnz .+ one(Ti)
+
+    # Allocate new arrays
+    total_nnz = @allowscalar new_rowptr[m + 1] - one(Ti)
+    Tv_new = promote_type(Tv, typeof(λ))
+    new_colval = similar(getcolval(A), total_nnz)
+    new_nzval = similar(nonzeros(A), Tv_new, total_nnz)
+
+    # Fill the new arrays
+    kernel_add! = kernel_add_uniformscaling_csr!(backend)
+    kernel_add!(
+        new_colval,
+        new_nzval,
+        new_rowptr,
+        getrowptr(A),
+        getcolval(A),
+        nonzeros(A),
+        Tv_new(λ);
+        ndrange = (m,),
+    )
+
+    return GenericSparseMatrixCSR(m, n, new_rowptr, new_colval, new_nzval)
+end
+
 # Addition with transpose/adjoint support
 for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSparseMatrixCSR)
     for (wrapb, transb, conjb, unwrapb, whereT2) in
diff --git a/src/matrix_csr/matrix_csr_kernels.jl b/src/matrix_csr/matrix_csr_kernels.jl
index d849730..c6799d3 100644
--- a/src/matrix_csr/matrix_csr_kernels.jl
+++ b/src/matrix_csr/matrix_csr_kernels.jl
@@ -441,3 +441,69 @@ end
         end
     end
 end
+
+# Kernel for counting extra diagonal entries needed for UniformScaling addition
+# For each row, checks if the diagonal entry is not already stored
+@kernel inbounds = true function kernel_count_diag_entries_csr!(
+        needs_diag,
+        @Const(rowptr),
+        @Const(colval),
+    )
+    row = @index(Global)
+
+    # Check if diagonal entry exists in this row
+    diag_found = false
+    for j in rowptr[row]:(rowptr[row + 1] - 1)
+        if colval[j] == row
+            diag_found = true
+            break
+        end
+    end
+
+    needs_diag[row] = !diag_found
+end
+
+# Kernel for merging CSR matrix with diagonal (UniformScaling addition)
+# Copies entries from A and inserts/adds λ to diagonal entries
+@kernel inbounds = true function kernel_add_uniformscaling_csr!(
+        new_colval,
+        new_nzval,
+        @Const(new_rowptr),
+        @Const(old_rowptr),
+        @Const(old_colval),
+        @Const(old_nzval),
+        @Const(λ),
+    )
+    row = @index(Global)
+    write_idx = new_rowptr[row]
+    diag_written = false
+
+    for j in old_rowptr[row]:(old_rowptr[row + 1] - 1)
+        col = old_colval[j]
+
+        # Insert diagonal before this entry if needed
+        if !diag_written && col > row
+            new_colval[write_idx] = row
+            new_nzval[write_idx] = λ
+            write_idx += 1
+            diag_written = true
+        end
+
+        # Copy this entry, adding λ if it's the diagonal
+        new_colval[write_idx] = col
+        if col == row
+            new_nzval[write_idx] = old_nzval[j] + λ
+            diag_written = true
+        else
+            new_nzval[write_idx] = old_nzval[j]
+        end
+        write_idx += 1
+    end
+
+    # If diagonal not yet written (all entries were before the diagonal position,
+    # or row had no entries), write it at the end
+    if !diag_written
+        new_colval[write_idx] = row
+        new_nzval[write_idx] = λ
+    end
+end
diff --git a/test/shared/matrix_coo.jl b/test/shared/matrix_coo.jl
index 8803ea0..f23143a 100644
--- a/test/shared/matrix_coo.jl
+++ b/test/shared/matrix_coo.jl
@@ -233,6 +233,55 @@ function shared_test_linearalgebra_matrix_coo(
         end
     end
 
+    @testset "UniformScaling Addition" begin
+        for T in (float_types..., complex_types...)
+            # Test with square matrix
+            A_sq = sprand(T, 20, 20, 0.2)
+            dA_sq = adapt(op, GenericSparseMatrixCOO(A_sq))
+
+            # Test A + I (identity)
+            result_I = dA_sq + I
+            expected_I = A_sq + I
+            @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I)
+
+            # Test I + A (identity)
+            result_I2 = I + dA_sq
+            @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I)
+
+            # Test with scaled identity
+            α = T <: Complex ? T(2.0 + 1.0im) : T(3.0)
+            result_αI = dA_sq + (α * I)
+            expected_αI = A_sq + (α * I)
+            @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI)
+
+            # Test subtraction
+            result_sub = dA_sq - (α * I)
+            expected_sub = A_sq - (α * I)
+            @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub)
+
+            # Test J - A
+            result_sub2 = (α * I) - dA_sq
+            expected_sub2 = (α * I) - A_sq
+            @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2)
+
+            # Test with non-square matrix throws DimensionMismatch
+            A_nonsq = sprand(T, 30, 20, 0.2)
+            dA_nonsq = adapt(op, GenericSparseMatrixCOO(A_nonsq))
+            @test_throws DimensionMismatch dA_nonsq + I
+
+            # Test with zero λ (should return copy)
+            result_zero = dA_sq + (zero(T) * I)
+            @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq)
+
+            # Test with sparse matrix that has no diagonal entries
+            A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3)
+            dA_nodiag = adapt(op, GenericSparseMatrixCOO(A_nodiag))
+            result_nodiag = dA_nodiag + I
+            expected_nodiag = A_nodiag + I
+            @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag)
+        end
+    end
+
     @testset "Matrix-Scalar, Matrix-Vector and Matrix-Matrix multiplication" begin
         for T in (int_types..., float_types..., complex_types...)
             for (op_A, op_B) in Iterators.product(
diff --git a/test/shared/matrix_csc.jl b/test/shared/matrix_csc.jl
index 1edcfc2..eb58d09 100644
--- a/test/shared/matrix_csc.jl
+++ b/test/shared/matrix_csc.jl
@@ -231,6 +231,55 @@ function shared_test_linearalgebra_matrix_csc(
         end
     end
 
+    @testset "UniformScaling Addition" begin
+        for T in (float_types..., complex_types...)
+            # Test with square matrix
+            A_sq = sprand(T, 20, 20, 0.2)
+            dA_sq = adapt(op, GenericSparseMatrixCSC(A_sq))
+
+            # Test A + I (identity)
+            result_I = dA_sq + I
+            expected_I = A_sq + I
+            @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I)
+
+            # Test I + A (identity)
+            result_I2 = I + dA_sq
+            @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I)
+
+            # Test with scaled identity
+            α = T <: Complex ? T(2.0 + 1.0im) : T(3.0)
+            result_αI = dA_sq + (α * I)
+            expected_αI = A_sq + (α * I)
+            @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI)
+
+            # Test subtraction
+            result_sub = dA_sq - (α * I)
+            expected_sub = A_sq - (α * I)
+            @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub)
+
+            # Test J - A
+            result_sub2 = (α * I) - dA_sq
+            expected_sub2 = (α * I) - A_sq
+            @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2)
+
+            # Test with non-square matrix throws DimensionMismatch
+            A_nonsq = sprand(T, 30, 20, 0.2)
+            dA_nonsq = adapt(op, GenericSparseMatrixCSC(A_nonsq))
+            @test_throws DimensionMismatch dA_nonsq + I
+
+            # Test with zero λ (should return copy)
+            result_zero = dA_sq + (zero(T) * I)
+            @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq)
+
+            # Test with sparse matrix that has no diagonal entries
+            A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3)
+            dA_nodiag = adapt(op, GenericSparseMatrixCSC(A_nodiag))
+            result_nodiag = dA_nodiag + I
+            expected_nodiag = A_nodiag + I
+            @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag)
+        end
+    end
+
     @testset "Matrix-Scalar, Matrix-Vector and Matrix-Matrix multiplication" begin
         for T in (int_types..., float_types..., complex_types...)
             for (op_A, op_B) in Iterators.product(
diff --git a/test/shared/matrix_csr.jl b/test/shared/matrix_csr.jl
index bd95a62..1e26d12 100644
--- a/test/shared/matrix_csr.jl
+++ b/test/shared/matrix_csr.jl
@@ -229,6 +229,55 @@ function shared_test_linearalgebra_matrix_csr(
         end
     end
 
+    @testset "UniformScaling Addition" begin
+        for T in (float_types..., complex_types...)
+            # Test with square matrix
+            A_sq = sprand(T, 20, 20, 0.2)
+            dA_sq = adapt(op, GenericSparseMatrixCSR(A_sq))
+
+            # Test A + I (identity)
+            result_I = dA_sq + I
+            expected_I = A_sq + I
+            @test collect(SparseMatrixCSC(result_I)) ≈ collect(expected_I)
+
+            # Test I + A (identity)
+            result_I2 = I + dA_sq
+            @test collect(SparseMatrixCSC(result_I2)) ≈ collect(expected_I)
+
+            # Test with scaled identity
+            α = T <: Complex ? T(2.0 + 1.0im) : T(3.0)
+            result_αI = dA_sq + (α * I)
+            expected_αI = A_sq + (α * I)
+            @test collect(SparseMatrixCSC(result_αI)) ≈ collect(expected_αI)
+
+            # Test subtraction
+            result_sub = dA_sq - (α * I)
+            expected_sub = A_sq - (α * I)
+            @test collect(SparseMatrixCSC(result_sub)) ≈ collect(expected_sub)
+
+            # Test J - A
+            result_sub2 = (α * I) - dA_sq
+            expected_sub2 = (α * I) - A_sq
+            @test collect(SparseMatrixCSC(result_sub2)) ≈ collect(expected_sub2)
+
+            # Test with non-square matrix throws DimensionMismatch
+            A_nonsq = sprand(T, 30, 20, 0.2)
+            dA_nonsq = adapt(op, GenericSparseMatrixCSR(A_nonsq))
+            @test_throws DimensionMismatch dA_nonsq + I
+
+            # Test with zero λ (should return copy)
+            result_zero = dA_sq + (zero(T) * I)
+            @test collect(SparseMatrixCSC(result_zero)) ≈ collect(A_sq)
+
+            # Test with sparse matrix that has no diagonal entries
+            A_nodiag = sparse([1, 2], [2, 1], T[1, 2], 3, 3)
+            dA_nodiag = adapt(op, GenericSparseMatrixCSR(A_nodiag))
+            result_nodiag = dA_nodiag + I
+            expected_nodiag = A_nodiag + I
+            @test collect(SparseMatrixCSC(result_nodiag)) ≈ collect(expected_nodiag)
+        end
+    end
+
     @testset "Matrix-Vector multiplication" begin
         for T in (int_types..., float_types..., complex_types...)
             for (op_A, op_B) in Iterators.product(