Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ These guidelines give AI coding agents the minimum project-specific context to b
- `docs/` (Documenter): `make.jl` sets up docs + doctests. `docs/src/index.md` auto-docs the module; adding docstrings automatically surfaces them.

## 3. Development Workflows
- Run tests locally: `make test` from the repo root folder.
- Run tests locally: `make test` from the repo root folder. Important: this runs with the latest stable Julia version, and might fail in some cases like code quality checks (Aqua, JET).
- The tests are divided into GROUPs defined in `runtests.jl`. For example, to run only the tests for the CUDA backend, use `GROUP=CUDA make test`.
- Avoid to run generic tests like `make test`. Instead, run tests for specific backends depending on the available hardware on your machine. For example prefer running `GROUP=CUDA make test` on a machine with an NVIDIA GPU or `GROUP=Metal make test` on an Apple Silicon machine.
- Add a dependency: `julia --project -e 'using Pkg; Pkg.add("PackageName")'` from the repo root folder, then update `[compat]` manually with a bounded version.
- Build docs locally: `make docs`.
- Doctests: Any code block in docstrings marked for execution must pass CI doctest phase.
- Format code: `make format`.
- Always check in which directory you are running commands. Change directory to the repo root if needed.

## 4. Coding Conventions
- Public API: add docstrings starting with a concise one-line summary, then details (Documenter picks them up).
- Public API: add docstrings starting with a concise one-line summary, then details (Documenter picks them up). Write docstrings *only* for functions/types that are part of the package's public API, and not methods of other Modules (e.g., Base, LinearAlgebra, SparseArrays).
- Keep internal helpers non-exported; prefix with an underscore only if truly private.
- Avoid type piracy: only extend Base / external methods for types you own OR clearly justify in comments.
- Prefer parametric methods that remain type-stable (JET will flag instability; address before committing).
Expand All @@ -32,7 +34,7 @@ These guidelines give AI coding agents the minimum project-specific context to b
- Always test the same functionality on all the supported backends (CPU, GPU, etc.) to ensure consistent behavior and avoid repetitions when generating the problem. Make tests reusable across backends when possible using shared functions in the `test/shared` folder.
- The test for different CPU/GPU architectures should be in a separate Julia environment, as each machine may not have all the backends available.
- When you run tests locally, only tests for the CPU and CUDA backends, as the others are not always available.
- All added code must pass Aqua + JET. If JET warns about type instability, refactor or add an inline comment explaining any intentional dynamic behavior.
- All added code must pass Aqua + JET. If JET warns about type instability, refactor or add an inline comment explaining any intentional dynamic behavior. Usually code quality tests fail on the latest stable Julia version; prefer to run them on the latest LTS Julia version.

## 6. Documentation Patterns

Expand Down
2 changes: 1 addition & 1 deletion src/GenericSparseArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module GenericSparseArrays
using LinearAlgebra
import LinearAlgebra: wrap, copymutable_oftype, __normalize!, kron, Diagonal, issymmetric, ishermitian
using SparseArrays
import SparseArrays: SparseVector, SparseMatrixCSC
import SparseArrays: SparseVector, SparseMatrixCSC, dropzeros, dropzeros!
import SparseArrays: getcolptr, getrowval, getnzval, nonzeroinds
import SparseArrays: _show_with_braille_patterns

Expand Down
18 changes: 18 additions & 0 deletions src/conversions/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ function GenericSparseMatrixCSC(A::GenericSparseMatrixCOO{Tv, Ti}) where {Tv, Ti

backend = get_backend(A.nzval)

# Handle empty matrix case
if nnz_count == 0
colptr = similar(A.rowind, Ti, n + 1)
fill!(colptr, one(Ti))
rowind = similar(A.rowind, Ti, 0)
nzval = similar(A.nzval, Tv, 0)
return GenericSparseMatrixCSC(m, n, colptr, rowind, nzval)
end

# Create keys for sorting: column first, then row
keys = similar(A.rowind, Ti, nnz_count)

Expand Down Expand Up @@ -276,6 +285,15 @@ function GenericSparseMatrixCSR(A::GenericSparseMatrixCOO{Tv, Ti}) where {Tv, Ti

backend = get_backend(A.nzval)

# Handle empty matrix case
if nnz_count == 0
rowptr = similar(A.rowind, Ti, m + 1)
fill!(rowptr, one(Ti))
colind = similar(A.rowind, Ti, 0)
nzval = similar(A.nzval, Tv, 0)
return GenericSparseMatrixCSR(m, n, rowptr, colind, nzval)
end

# Create keys for sorting: row first, then column
keys = similar(A.rowind, Ti, nnz_count)

Expand Down
3 changes: 3 additions & 0 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ end

Base.:+(B::DenseMatrix, A::AbstractGenericSparseMatrix) = A + B

Base.:-(A::AbstractGenericSparseMatrix, B::Union{DenseMatrix, AbstractGenericSparseMatrix}) = A + (-B)
Base.:-(B::DenseMatrix, A::AbstractGenericSparseMatrix) = B + (-A)

LinearAlgebra.issymmetric(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
LinearAlgebra.issymmetric(A::Adjoint{<:Any, <:AbstractGenericSparseMatrix}) = issymmetric(parent(A))
LinearAlgebra.ishermitian(A::Transpose{<:Any, <:AbstractGenericSparseMatrix}) = ishermitian(parent(A))
Expand Down
119 changes: 117 additions & 2 deletions src/matrix_coo/matrix_coo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,9 @@ function Base.:+(A::GenericSparseMatrixCOO, B::GenericSparseMatrixCOO)
ndrange = (nnz_concat,),
)

return GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
dropzeros!(C)
return C
end

# Addition with transpose/adjoint support
Expand Down Expand Up @@ -508,7 +510,13 @@ for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSpars
ndrange = (nnz_concat,),
)

return GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
C = GenericSparseMatrixCOO(m, n, rowind_C, colind_C, nzval_C)
dropzeros!(C)
return C
end

@eval function Base.:-(A::$TypeA, B::$TypeB) where {$(whereT1(:T1)), $(whereT2(:T2))}
return A + (-B)
end
end
end
Expand Down Expand Up @@ -751,3 +759,110 @@ function LinearAlgebra.ishermitian(A::GenericSparseMatrixCOO)
m == n || return false
return ishermitian(GenericSparseMatrixCSC(A))
end

# Helper function to filter COO entries by mask
function _filter_by_mask!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask)
backend = get_backend(nzval)

# Compute prefix sum of mask to get write indices
mask_int = similar(rowind, Int)
mask_int .= mask
write_indices = _cumsum_AK(mask_int)

kernel! = _kernel_filter_coo!(backend)
kernel!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask, write_indices; ndrange = length(nzval))
return nothing
end

@kernel inbounds = true function _kernel_filter_coo!(
new_rowind,
new_colind,
new_nzval,
@Const(rowind),
@Const(colind),
@Const(nzval),
@Const(mask),
@Const(write_indices),
)
i = @index(Global)
if mask[i]
idx = write_indices[i]
new_rowind[idx] = rowind[i]
new_colind[idx] = colind[i]
new_nzval[idx] = nzval[i]
end
end

# Internal implementation that writes into the provided arrays
function _dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A::GenericSparseMatrixCOO, mask)
rowind = getrowind(A)
colind = getcolind(A)
nzval = nonzeros(A)

# Copy non-zero entries using mask
_filter_by_mask!(new_rowind, new_colind, new_nzval, rowind, colind, nzval, mask)
return nothing
end

function SparseArrays.dropzeros!(A::GenericSparseMatrixCOO)
nzval = nonzeros(A)
rowind = getrowind(A)
colind = getcolind(A)

# Find non-zero mask
mask = nzval .!= zero(eltype(nzval))

# Count non-zeros
total_nnz = sum(mask)

if total_nnz == length(nzval)
# No zeros to drop
return A
end

# Allocate temporary arrays for new data
new_rowind = similar(rowind, total_nnz)
new_colind = similar(colind, total_nnz)
new_nzval = similar(nzval, total_nnz)

# Fill the new arrays
_dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A, mask)

# Copy back to original arrays
resize!(A.rowind, total_nnz)
copyto!(A.rowind, new_rowind)
resize!(A.colind, total_nnz)
copyto!(A.colind, new_colind)
resize!(A.nzval, total_nnz)
copyto!(A.nzval, new_nzval)

return A
end

function SparseArrays.dropzeros(A::GenericSparseMatrixCOO)
m, n = size(A)
nzval = nonzeros(A)
rowind = getrowind(A)
colind = getcolind(A)

# Find non-zero mask
mask = nzval .!= zero(eltype(nzval))

# Count non-zeros
total_nnz = sum(mask)

if total_nnz == length(nzval)
# No zeros to drop, return a copy
return copy(A)
end

# Allocate new arrays
new_rowind = similar(rowind, total_nnz)
new_colind = similar(colind, total_nnz)
new_nzval = similar(nzval, total_nnz)

# Fill the new arrays
_dropzeros_impl_coo!(new_rowind, new_colind, new_nzval, A, mask)

return GenericSparseMatrixCOO(m, n, new_rowind, new_colind, new_nzval)
end
99 changes: 98 additions & 1 deletion src/matrix_csc/matrix_csc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,9 @@ function Base.:+(A::GenericSparseMatrixCSC, B::GenericSparseMatrixCSC)
ndrange = (n,),
)

return GenericSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
C = GenericSparseMatrixCSC(m, n, colptr_C, rowval_C, nzval_C)
dropzeros!(C)
return C
end

# Addition with transpose/adjoint support
Expand Down Expand Up @@ -394,6 +396,10 @@ for (wrapa, transa, conja, unwrapa, whereT1) in trans_adj_wrappers(:GenericSpars
# Convert back to CSC
return GenericSparseMatrixCSC(result_csr)
end

@eval function Base.:-(A::$TypeA, B::$TypeB) where {$(whereT1(:T1)), $(whereT2(:T2))}
return A + (-B)
end
end
end

Expand Down Expand Up @@ -606,3 +612,94 @@ function LinearAlgebra.ishermitian(A::GenericSparseMatrixCSC)

return @allowscalar result[1]
end

# Internal implementation that writes into the provided arrays
function _dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A::GenericSparseMatrixCSC)
m, n = size(A)
backend = get_backend(A)
Ti = eltype(getcolptr(A))

# Count non-zeros per column
nnz_per_col = similar(getcolptr(A), n)
kernel_count! = kernel_count_nonzeros_csc!(backend)
kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))

# Build new colptr
cumsum_nnz = _cumsum_AK(nnz_per_col)
fill!(view(new_colptr, 1:1), one(Ti))
view(new_colptr, 2:(n + 1)) .= cumsum_nnz .+ one(Ti)

# Get total number of non-zeros
total_nnz = @allowscalar new_colptr[end] - one(Ti)

if total_nnz > 0
# Copy non-zero entries
kernel_drop! = kernel_dropzeros_csc!(backend)
kernel_drop!(
new_rowval,
new_nzval,
new_colptr,
getcolptr(A),
rowvals(A),
nonzeros(A);
ndrange = (n,),
)
end

return total_nnz
end

function SparseArrays.dropzeros!(A::GenericSparseMatrixCSC)
m, n = size(A)
backend = get_backend(A)
Ti = eltype(getcolptr(A))

# Count non-zeros per column to determine new size
nnz_per_col = similar(getcolptr(A), n)
kernel_count! = kernel_count_nonzeros_csc!(backend)
kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))

cumsum_nnz = _cumsum_AK(nnz_per_col)
total_nnz = @allowscalar cumsum_nnz[end]

# Allocate temporary arrays for new data
new_colptr = similar(getcolptr(A))
new_rowval = similar(rowvals(A), total_nnz)
new_nzval = similar(nonzeros(A), total_nnz)

# Fill the new arrays
_dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A)

# Copy back to original arrays
copyto!(A.colptr, new_colptr)
resize!(A.rowval, total_nnz)
copyto!(A.rowval, new_rowval)
resize!(A.nzval, total_nnz)
copyto!(A.nzval, new_nzval)

return A
end

function SparseArrays.dropzeros(A::GenericSparseMatrixCSC)
m, n = size(A)
backend = get_backend(A)
Ti = eltype(getcolptr(A))

# Count non-zeros per column to determine new size
nnz_per_col = similar(getcolptr(A), n)
kernel_count! = kernel_count_nonzeros_csc!(backend)
kernel_count!(nnz_per_col, getcolptr(A), nonzeros(A); ndrange = (n,))

cumsum_nnz = _cumsum_AK(nnz_per_col)
total_nnz = @allowscalar cumsum_nnz[end]

# Allocate new arrays
new_colptr = similar(getcolptr(A))
new_rowval = similar(rowvals(A), total_nnz)
new_nzval = similar(nonzeros(A), total_nnz)

# Fill the new arrays
_dropzeros_impl_csc!(new_colptr, new_rowval, new_nzval, A)

return GenericSparseMatrixCSC(m, n, new_colptr, new_rowval, new_nzval)
end
36 changes: 36 additions & 0 deletions src/matrix_csc/matrix_csc_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -407,3 +407,39 @@ end
result[1] = false
end
end

# Kernel to count non-zeros per column after filtering zeros
@kernel inbounds = true function kernel_count_nonzeros_csc!(
nnz_per_col,
@Const(colptr),
@Const(nzval),
)
col = @index(Global)
count = 0
for j in colptr[col]:(colptr[col + 1] - 1)
if !iszero(nzval[j])
count += 1
end
end
nnz_per_col[col] = count
end

# Kernel to copy non-zero entries only (dropping zeros)
@kernel inbounds = true function kernel_dropzeros_csc!(
new_rowval,
new_nzval,
@Const(new_colptr),
@Const(old_colptr),
@Const(old_rowval),
@Const(old_nzval),
)
col = @index(Global)
write_idx = new_colptr[col]
for j in old_colptr[col]:(old_colptr[col + 1] - 1)
if !iszero(old_nzval[j])
new_rowval[write_idx] = old_rowval[j]
new_nzval[write_idx] = old_nzval[j]
write_idx += 1
end
end
end
Loading
Loading