albertomercurio · albertomercurio · Feb 1, 2026 · Feb 1, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -13,15 +13,17 @@ These guidelines give AI coding agents the minimum project-specific context to b
 - `docs/` (Documenter): `make.jl` sets up docs + doctests. `docs/src/index.md` auto-docs the module; adding docstrings automatically surfaces them.
 
 ## 3. Development Workflows
-- Run tests locally: `make test` from the repo root folder.
+- Run tests locally: `make test` from the repo root folder. Important: this runs with the latest stable Julia version, and might fail in some cases like code quality checks (Aqua, JET).
+- The tests are divided into GROUPs defined in `runtests.jl`. For example, to run only the tests for the CUDA backend, use `GROUP=CUDA make test`.
+- Avoid to run generic tests like `make test`. Instead, run tests for specific backends depending on the available hardware on your machine. For example prefer running `GROUP=CUDA make test` on a machine with an NVIDIA GPU or `GROUP=Metal make test` on an Apple Silicon machine.
 - Add a dependency: `julia --project -e 'using Pkg; Pkg.add("PackageName")'` from the repo root folder, then update `[compat]` manually with a bounded version.
 - Build docs locally: `make docs`.
 - Doctests: Any code block in docstrings marked for execution must pass CI doctest phase.
 - Format code: `make format`.
 - Always check in which directory you are running commands. Change directory to the repo root if needed.
 
 ## 4. Coding Conventions
-- Public API: add docstrings starting with a concise one-line summary, then details (Documenter picks them up).
+- Public API: add docstrings starting with a concise one-line summary, then details (Documenter picks them up). Write docstrings *only* for functions/types that are part of the package's public API, and not methods of other Modules (e.g., Base, LinearAlgebra, SparseArrays).
 - Keep internal helpers non-exported; prefix with an underscore only if truly private.
 - Avoid type piracy: only extend Base / external methods for types you own OR clearly justify in comments.
 - Prefer parametric methods that remain type-stable (JET will flag instability; address before committing).
@@ -32,7 +34,7 @@ These guidelines give AI coding agents the minimum project-specific context to b
 - Always test the same functionality on all the supported backends (CPU, GPU, etc.) to ensure consistent behavior and avoid repetitions when generating the problem. Make tests reusable across backends when possible using shared functions in the `test/shared` folder.
 - The test for different CPU/GPU architectures should be in a separate Julia environment, as each machine may not have all the backends available.
 - When you run tests locally, only tests for the CPU and CUDA backends, as the others are not always available.
-- All added code must pass Aqua + JET. If JET warns about type instability, refactor or add an inline comment explaining any intentional dynamic behavior.
+- All added code must pass Aqua + JET. If JET warns about type instability, refactor or add an inline comment explaining any intentional dynamic behavior. Usually code quality tests fail on the latest stable Julia version; prefer to run them on the latest LTS Julia version.
 
 ## 6. Documentation Patterns
 

diff --git a/src/GenericSparseArrays.jl b/src/GenericSparseArrays.jl
@@ -3,7 +3,7 @@ module GenericSparseArrays
 using LinearAlgebra
 import LinearAlgebra: wrap, copymutable_oftype, __normalize!, kron, Diagonal, issymmetric, ishermitian
 using SparseArrays
-import SparseArrays: SparseVector, SparseMatrixCSC
+import SparseArrays: SparseVector, SparseMatrixCSC, dropzeros, dropzeros!
 import SparseArrays: getcolptr, getrowval, getnzval, nonzeroinds
 import SparseArrays: _show_with_braille_patterns
 

diff --git a/src/conversions/conversions.jl b/src/conversions/conversions.jl
@@ -174,6 +174,15 @@ function GenericSparseMatrixCSC(A::GenericSparseMatrixCOO{Tv, Ti}) where {Tv, Ti
 
     backend = get_backend(A.nzval)
 
+    # Handle empty matrix case
+    if nnz_count == 0
+        colptr = similar(A.rowind, Ti, n + 1)
+        fill!(colptr, one(Ti))
+        rowind = similar(A.rowind, Ti, 0)
+        nzval = similar(A.nzval, Tv, 0)
+        return GenericSparseMatrixCSC(m, n, colptr, rowind, nzval)
+    end
+
     # Create keys for sorting: column first, then row
     keys = similar(A.rowind, Ti, nnz_count)
 
@@ -276,6 +285,15 @@ function GenericSparseMatrixCSR(A::GenericSparseMatrixCOO{Tv, Ti}) where {Tv, Ti
 
     backend = get_backend(A.nzval)
 
+    # Handle empty matrix case
+    if nnz_count == 0
+        rowptr = similar(A.rowind, Ti, m + 1)
+        fill!(rowptr, one(Ti))
+        colind = similar(A.rowind, Ti, 0)
+        nzval = similar(A.nzval, Tv, 0)
+        return GenericSparseMatrixCSR(m, n, rowptr, colind, nzval)
+    end
+
     # Create keys for sorting: row first, then column
     keys = similar(A.rowind, Ti, nnz_count)
 

diff --git a/src/core.jl b/src/core.jl
@@ -83,6 +83,9 @@ end
 
 Base.:+(B::DenseMatrix, A::AbstractGenericSparseMatrix) = A + B
 
+Base.:-(A::AbstractGenericSparseMatrix, B::Union{DenseMatrix, AbstractGenericSparseMatrix}) = A + (-B)
+Base.:-(B::DenseMatrix, A::AbstractGenericSparseMatrix) = B + (-A)
+
 LinearAlgebra.issymmetric(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
 LinearAlgebra.issymmetric(A::Adjoint{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
 LinearAlgebra.ishermitian(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = ishermitian(parent(A))

diff --git a/src/matrix_coo/matrix_coo.jl b/src/matrix_coo/matrix_coo.jl
@@ -391,7 +391,9 @@ function Base.:+(A::GenericSparseMatrixCOO, B::GenericSparseMatrixCOO)
         ndrange = (nnz_concat,),
     )
 
-    return GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
+    C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
+    dropzeros!(C)
+    return C
 end
 
 # Addition with transpose/adjoint support
@@ -508,7 +510,13 @@ for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSpars
                 ndrange = (nnz_concat,),
             )
 
-            return GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
+            C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
+            dropzeros!(C)
+            return C
+        end
+
+        @eval function Base.:-(A::$TypeA, B::$TypeB) where {$(whereT1(:T1)), $(whereT2(:T2))}
+            return A + (-B)
         end
     end
 end
@@ -751,3 +759,110 @@ function LinearAlgebra.ishermitian(A::GenericSparseMatrixCOO)
     m == n || return false
     return ishermitian(GenericSparseMatrixCSC(A))
 end
+
+# Helper function to filter COO entries by mask
+function _filter_by_mask!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask)
+    backend = get_backend(nzval)
+
+    # Compute prefix sum of mask to get write indices
+    mask_int = similar(rowind, Int)
+    mask_int .= mask
+    write_indices = _cumsum_AK(mask_int)
+
+    kernel! = _kernel_filter_coo!(backend)
+    kernel!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask, write_indices; ndrange = length(nzval))
+    return nothing
+end
+
+@kernel inbounds = true function _kernel_filter_coo!(
+        new_rowind,
+        new_colind,
+        new_nzval,
+        @Const(rowind),
+        @Const(colind),
+        @Const(nzval),
+        @Const(mask),
+        @Const(write_indices),
+    )
+    i = @index(Global)
+    if mask[i]
+        idx = write_indices[i]
+        new_rowind[idx] = rowind[i]
+        new_colind[idx] = colind[i]
+        new_nzval[idx] = nzval[i]
+    end
+end
+
+# Internal implementation that writes into the provided arrays
+function _dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A::GenericSparseMatrixCOO, mask)
+    rowind = getrowind(A)
+    colind = getcolind(A)
+    nzval = nonzeros(A)
+
+    # Copy non-zero entries using mask
+    _filter_by_mask!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask)
+    return nothing
+end
+
+function SparseArrays.dropzeros!(A::GenericSparseMatrixCOO)
+    nzval = nonzeros(A)
+    rowind = getrowind(A)
+    colind = getcolind(A)
+
+    # Find non-zero mask
+    mask = nzval .!= zero(eltype(nzval))
+
+    # Count non-zeros
+    total_nnz = sum(mask)
+
+    if total_nnz == length(nzval)
+        # No zeros to drop
+        return A
+    end
+
+    # Allocate temporary arrays for new data
+    new_rowind = similar(rowind, total_nnz)
+    new_colind = similar(colind, total_nnz)
+    new_nzval = similar(nzval, total_nnz)
+
+    # Fill the new arrays
+    _dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A, mask)
+
+    # Copy back to original arrays
+    resize!(A.rowind, total_nnz)
+    copyto!(A.rowind, new_rowind)
+    resize!(A.colind, total_nnz)
+    copyto!(A.colind, new_colind)
+    resize!(A.nzval, total_nnz)
+    copyto!(A.nzval, new_nzval)
+
+    return A
+end
+
+function SparseArrays.dropzeros(A::GenericSparseMatrixCOO)
+    m, n = size(A)
+    nzval = nonzeros(A)
+    rowind = getrowind(A)
+    colind = getcolind(A)
+
+    # Find non-zero mask
+    mask = nzval .!= zero(eltype(nzval))
+
+    # Count non-zeros
+    total_nnz = sum(mask)
+
+    if total_nnz == length(nzval)
+        # No zeros to drop, return a copy
+        return copy(A)
+    end
+
+    # Allocate new arrays
+    new_rowind = similar(rowind, total_nnz)
+    new_colind = similar(colind, total_nnz)
+    new_nzval = similar(nzval, total_nnz)
+
+    # Fill the new arrays
+    _dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A, mask)
+
+    return GenericSparseMatrixCOO(m, n, new_rowind, new_colind, new_nzval)
+end
diff --git a/src/matrix_csc/matrix_csc.jl b/src/matrix_csc/matrix_csc.jl
@@ -364,7 +364,9 @@ function Base.:+(A::GenericSparseMatrixCSC, B::GenericSparseMatrixCSC)
         ndrange = (n,),
     )
 
-    return GenericSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
+    C = GenericSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
+    dropzeros!(C)
+    return C
 end
 
 # Addition with transpose/adjoint support
@@ -394,6 +396,10 @@ for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSpars
             # Convert back to CSC
             return GenericSparseMatrixCSC(result_csr)
         end
+
+        @eval function Base.:-(A::$TypeA, B::$TypeB) where {$(whereT1(:T1)), $(whereT2(:T2))}
+            return A + (-B)
+        end
     end
 end
 
@@ -606,3 +612,94 @@ function LinearAlgebra.ishermitian(A::GenericSparseMatrixCSC)
 
     return @allowscalar result[1]
 end
+
+# Internal implementation that writes into the provided arrays
+function _dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A::GenericSparseMatrixCSC)
+    m, n = size(A)
+    backend = get_backend(A)
+    Ti = eltype(getcolptr(A))
+
+    # Count non-zeros per column
+    nnz_per_col = similar(getcolptr(A), n)
+    kernel_count! = kernel_count_nonzeros_csc!(backend)
+    kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))
+
+    # Build new colptr
+    cumsum_nnz = _cumsum_AK(nnz_per_col)
+    fill!(view(new_colptr, 1:1), one(Ti))
+    view(new_colptr, 2:(n + 1)) .= cumsum_nnz .+ one(Ti)
+
+    # Get total number of non-zeros
+    total_nnz = @allowscalar new_colptr[end] - one(Ti)
+
+    if total_nnz > 0
+        # Copy non-zero entries
+        kernel_drop! = kernel_dropzeros_csc!(backend)
+        kernel_drop!(
+            new_rowval,
+            new_nzval,
+            new_colptr,
+            getcolptr(A),
+            rowvals(A),
+            nonzeros(A);
+            ndrange = (n,),
+        )
+    end
+
+    return total_nnz
+end
+
+function SparseArrays.dropzeros!(A::GenericSparseMatrixCSC)
+    m, n = size(A)
+    backend = get_backend(A)
+    Ti = eltype(getcolptr(A))
+
+    # Count non-zeros per column to determine new size
+    nnz_per_col = similar(getcolptr(A), n)
+    kernel_count! = kernel_count_nonzeros_csc!(backend)
+    kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))
+
+    cumsum_nnz = _cumsum_AK(nnz_per_col)
+    total_nnz = @allowscalar cumsum_nnz[end]
+
+    # Allocate temporary arrays for new data
+    new_colptr = similar(getcolptr(A))
+    new_rowval = similar(rowvals(A), total_nnz)
+    new_nzval = similar(nonzeros(A), total_nnz)
+
+    # Fill the new arrays
+    _dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A)
+
+    # Copy back to original arrays
+    copyto!(A.colptr, new_colptr)
+    resize!(A.rowval, total_nnz)
+    copyto!(A.rowval, new_rowval)
+    resize!(A.nzval, total_nnz)
+    copyto!(A.nzval, new_nzval)
+
+    return A
+end
+
+function SparseArrays.dropzeros(A::GenericSparseMatrixCSC)
+    m, n = size(A)
+    backend = get_backend(A)
+    Ti = eltype(getcolptr(A))
+
+    # Count non-zeros per column to determine new size
+    nnz_per_col = similar(getcolptr(A), n)
+    kernel_count! = kernel_count_nonzeros_csc!(backend)
+    kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))
+
+    cumsum_nnz = _cumsum_AK(nnz_per_col)
+    total_nnz = @allowscalar cumsum_nnz[end]
+
+    # Allocate new arrays
+    new_colptr = similar(getcolptr(A))
+    new_rowval = similar(rowvals(A), total_nnz)
+    new_nzval = similar(nonzeros(A), total_nnz)
+
+    # Fill the new arrays
+    _dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A)
+
+    return GenericSparseMatrixCSC(m, n, new_colptr, new_rowval, new_nzval)
+end
diff --git a/src/matrix_csc/matrix_csc_kernels.jl b/src/matrix_csc/matrix_csc_kernels.jl
@@ -407,3 +407,39 @@ end
         result[1] = false
     end
 end
+
+# Kernel to count non-zeros per column after filtering zeros
+@kernel inbounds = true function kernel_count_nonzeros_csc!(
+        nnz_per_col,
+        @Const(colptr),
+        @Const(nzval),
+    )
+    col = @index(Global)
+    count = 0
+    for j in colptr[col]:(colptr[col + 1] - 1)
+        if !iszero(nzval[j])
+            count += 1
+        end
+    end
+    nnz_per_col[col] = count
+end
+
+# Kernel to copy non-zero entries only (dropping zeros)
+@kernel inbounds = true function kernel_dropzeros_csc!(
+        new_rowval,
+        new_nzval,
+        @Const(new_colptr),
+        @Const(old_colptr),
+        @Const(old_rowval),
+        @Const(old_nzval),
+    )
+    col = @index(Global)
+    write_idx = new_colptr[col]
+    for j in old_colptr[col]:(old_colptr[col + 1] - 1)
+        if !iszero(old_nzval[j])
+            new_rowval[write_idx] = old_rowval[j]
+            new_nzval[write_idx] = old_nzval[j]
+            write_idx += 1
+        end
+    end
+end