Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,18 @@ steps:
env:
CI_USE_OPENCL: "1"

- label: Julia 1.11 (Finch)
timeout_in_minutes: 90
<<: *test
plugins:
- JuliaCI/julia#v1:
version: "1.11"
- JuliaCI/julia-test#v1: ~
- JuliaCI/julia-coverage#v1:
codecov: true
env:
CI_TEST_FINCH: "1"

- label: Julia 1 - TimespanLogging
timeout_in_minutes: 20
<<: *test
Expand Down
15 changes: 14 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df"
ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TaskLocalValues = "ed4db957-447d-4319-bfb6-7fa9ae7ecf34"
Expand All @@ -34,48 +33,62 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Finch = "9177782c-1635-4eb9-9bfb-d9dfa25e6bce"
IncompleteLU = "40713840-3770-5561-ab4c-a76e7d0d7895"
GraphViz = "f526b714-d49f-11e8-06ff-31ed36ee7ee0"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"

[extensions]
AbstractFFTsExt = "AbstractFFTs"
AlgebraicMultigridExt = ["AlgebraicMultigrid", "SparseArrays"]
CUDAExt = "CUDA"
DistributionsExt = "Distributions"
FinchExt = "Finch"
GraphVizExt = "GraphViz"
GraphVizSimpleExt = "Colors"
IncompleteLUExt = ["IncompleteLU", "SparseArrays"]
IntelExt = "oneAPI"
JSON3Ext = "JSON3"
KrylovExt = "Krylov"
MetalExt = "Metal"
OpenCLExt = "OpenCL"
PlotsExt = ["DataFrames", "Plots"]
PythonExt = "PythonCall"
ROCExt = "AMDGPU"
SparseArraysExt = "SparseArrays"

[compat]
AMDGPU = "1, 2"
AbstractFFTs = "1.5.0"
Adapt = "4"
AlgebraicMultigrid = "1"
CUDA = "3, 4, 5"
Colors = "0.12, 0.13"
DataFrames = "1"
DataStructures = "0.18, 0.19"
DistributedNext = "1.0.0"
Distributions = "0.25"
FillArrays = "1.13.0"
Finch = "1.2.9"
GPUArraysCore = "0.2.0"
GraphViz = "0.2"
Graphs = "1"
IncompleteLU = "0.2"
JSON3 = "1"
KernelAbstractions = "0.9"
Krylov = "0.10"
MacroTools = "0.5"
MemPool = "0.4.12"
Metal = "1.1"
Expand Down
45 changes: 45 additions & 0 deletions ext/AlgebraicMultigridExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
module AlgebraicMultigridExt

import AlgebraicMultigrid
import SparseArrays
import Dagger
import Dagger: DMatrix
import LinearAlgebra

# Algebraic-multigrid preconditioner, built per diagonal tile on top of Dagger's
# block-preconditioner machinery (see `src/array/iterativesolvers.jl`). The AMG
# hierarchy's expensive setup runs once per tile at construction; each apply runs
# one V-cycle (a sequence of SpMVs, smoother sweeps, and a coarse solve).

_as_sparse(A::SparseArrays.SparseMatrixCSC) = A
_as_sparse(A::AbstractMatrix) = SparseArrays.sparse(A)

function _amg_operator(tile, method::Symbol; kwargs...)
S = _as_sparse(Dagger._tile_matrix(tile))
ml = if method === :ruge_stuben
AlgebraicMultigrid.ruge_stuben(S; kwargs...)
elseif method === :smoothed_aggregation
AlgebraicMultigrid.smoothed_aggregation(S; kwargs...)
else
throw(ArgumentError("AMGPreconditioner: unknown method $(method); use \
:ruge_stuben or :smoothed_aggregation"))
end
return AlgebraicMultigrid.aspreconditioner(ml)
end

function Dagger.AMGPreconditioner(A::DMatrix; method::Symbol=:ruge_stuben, kwargs...)
build = tile -> _amg_operator(tile, method; kwargs...)
return Dagger._build_block_preconditioner(Dagger.AMGPreconditioner, A,
"AMGPreconditioner", build)
end

# An AMG preconditioner applies a V-cycle via `ldiv!` (`\` is not defined).
Dagger._block_apply!(y, p::AlgebraicMultigrid.Preconditioner, x) =
(LinearAlgebra.ldiv!(y, p, x); return nothing)

# The hierarchy is read-only and pinned to its worker; it is never written and
# its internals (a `Vector{Level}`) aren't introspectable by datadeps, so treat
# it as non-aliasing rather than recursing into it.
Dagger.aliasing(::AlgebraicMultigrid.Preconditioner) = Dagger.NoAliasing()

end # module AlgebraicMultigridExt
184 changes: 184 additions & 0 deletions ext/FinchExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
module FinchExt

import Finch
import LinearAlgebra
import Dagger
import Dagger: Blocks, AutoBlocks, BlocksOrAuto, AssignmentType, DSparseArray, DSparseMatrix

Dagger._sparse_collect(A::Finch.Tensor) = Array(A)
# Finch's generic `similar` produces tensor formats that destabilize later
# `@finch`/`@einsum` kernels (observed as segfaults during tile moves), so
# allocate an empty COO-backed tile explicitly instead.
Dagger._sparse_similar(::Finch.Tensor, ::Type{T}, dims::Dims) where {T} =
Finch.fspzeros(T, dims...)
Dagger.maybe_wrap_tile(x::Finch.Tensor) = DSparseArray(x)

function Finch.fspzeros(p::Blocks, T::Type, dims::Dims; assignment::AssignmentType = :arbitrary)
d = Dagger.ArrayDomain(map(x->1:x, dims))
a = Dagger.AllocateArray(T, (T, _dims) -> DSparseMatrix{T}(Finch.fspzeros(T, _dims...)), false, d, Dagger.partition(p, d), p, assignment)
return Dagger._to_darray(a)
end
Finch.fspzeros(p::BlocksOrAuto, T::Type, dims::Integer...; assignment::AssignmentType = :arbitrary) =
Finch.fspzeros(p, T, dims; assignment)
Finch.fspzeros(p::BlocksOrAuto, dims::Integer...; assignment::AssignmentType = :arbitrary) =
Finch.fspzeros(p, Float64, dims; assignment)
Finch.fspzeros(p::BlocksOrAuto, dims::Dims; assignment::AssignmentType = :arbitrary) =
Finch.fspzeros(p, Float64, dims; assignment)
Finch.fspzeros(::AutoBlocks, T::Type, dims::Dims; assignment::AssignmentType = :arbitrary) =
Finch.fspzeros(Dagger.auto_blocks(dims), T, dims; assignment)

function Finch.fsprand(p::Blocks, T::Type, dims::Dims, sparsity::AbstractFloat; assignment::AssignmentType = :arbitrary)
d = Dagger.ArrayDomain(map(x->1:x, dims))
a = Dagger.AllocateArray(T, (T, _dims) -> DSparseMatrix{T}(Finch.fsprand(T, _dims..., sparsity)), false, d, Dagger.partition(p, d), p, assignment)
return Dagger._to_darray(a)
end
Finch.fsprand(p::BlocksOrAuto, T::Type, dims_and_sparsity::Real...; assignment::AssignmentType = :arbitrary) =
Finch.fsprand(p, T, dims_and_sparsity[1:end-1], dims_and_sparsity[end]; assignment)
Finch.fsprand(p::BlocksOrAuto, dims_and_sparsity::Real...; assignment::AssignmentType = :arbitrary) =
Finch.fsprand(p, Float64, dims_and_sparsity[1:end-1], dims_and_sparsity[end]; assignment)
Finch.fsprand(p::BlocksOrAuto, dims::Dims, sparsity::AbstractFloat; assignment::AssignmentType = :arbitrary) =
Finch.fsprand(p, Float64, dims, sparsity; assignment)
Finch.fsprand(::AutoBlocks, T::Type, dims::Dims, sparsity::AbstractFloat; assignment::AssignmentType = :arbitrary) =
Finch.fsprand(Dagger.auto_blocks(dims), T, dims, sparsity; assignment)

# Materialize a (possibly lazy/`SwizzleArray`) result into a concrete sparse
# `Tensor`. `@einsum` returns a lazy `SwizzleArray` for transposed-output
# patterns, which is not itself a `Finch.Tensor` and corrupts subsequent
# accumulation steps; copying through `@finch` produces a clean `SparseCOO`.
function _finch_materialize(X, ::Type{T}, dims) where {T}
out = Finch.fspzeros(T, dims...)
Finch.@finch begin
out .= 0
for j = _, i = _
if X[i, j] != 0
out[i, j] = X[i, j]
end
end
end
return out
end

# Finch tensors do not define `Base.copy`; build a fresh equivalent tensor.
Dagger._sparse_copy(mat::Finch.Tensor) = _finch_materialize(mat, eltype(mat), size(mat))

# Finch tensors do not support `setindex!`, so copy-buffering writeback into a
# Finch tile densifies, applies the (partial-range) copy, then re-sparsifies.
function Dagger._sparse_copyto_view!(mat::Finch.Tensor, Brange, src)
dense = Array(mat)
copyto!(view(dense, Brange), src)
return _finch_materialize(dense, eltype(mat), size(mat))
end

# Compute `C_out += alpha * op(A) * op(B)` via Finch's `@einsum`, choosing the
# right index pattern for each transpose option. We deliberately use *explicit*
# index patterns (`A[k,i]`) rather than `Finch.swizzle`, because feeding a
# swizzled tensor into `@einsum` makes it return a lazy `SwizzleArray` (rather
# than a materialized `Tensor`), which breaks subsequent accumulation steps.
# Adjoint ('C') additionally applies `conj` inside the einsum.
function _finch_einsum_matmul!(C_out, A, B, transA::Char, transB::Char, alpha)
aT = transA == 'T' || transA == 'C'
aC = transA == 'C'
bT = transB == 'T' || transB == 'C'
bC = transB == 'C'
if !aT && !bT
bC ? Finch.@einsum(C_out[i,j] += alpha * (A[i,k] * conj(B[k,j]))) :
Finch.@einsum(C_out[i,j] += alpha * (A[i,k] * B[k,j]))
elseif !aT && bT
bC ? Finch.@einsum(C_out[i,j] += alpha * (A[i,k] * conj(B[j,k]))) :
Finch.@einsum(C_out[i,j] += alpha * (A[i,k] * B[j,k]))
elseif aT && !bT
aC ? Finch.@einsum(C_out[i,j] += alpha * (conj(A[k,i]) * B[k,j])) :
Finch.@einsum(C_out[i,j] += alpha * (A[k,i] * B[k,j]))
else # aT && bT
if aC && bC
Finch.@einsum C_out[i,j] += alpha * (conj(A[k,i]) * conj(B[j,k]))
elseif aC
Finch.@einsum C_out[i,j] += alpha * (conj(A[k,i]) * B[j,k])
elseif bC
Finch.@einsum C_out[i,j] += alpha * (A[k,i] * conj(B[j,k]))
else
Finch.@einsum C_out[i,j] += alpha * (A[k,i] * B[j,k])
end
end
return C_out
end

function Dagger.matmatmul!(
C::DSparseMatrix,
transA::Char,
transB::Char,
A::Finch.Tensor,
B::Finch.Tensor,
alpha,
beta
)
if !isa(C.mat, Finch.Tensor)
# Not supported here, forward to generic matmatmul!
return Dagger.matmatmul!(C.mat, transA, transB, A, B, alpha, beta)
end
transA in ('N', 'T', 'C') || throw(ArgumentError("Invalid transA: $transA"))
transB in ('N', 'T', 'C') || throw(ArgumentError("Invalid transB: $transB"))

# `@einsum` cannot write into an existing sparse tensor in place, so we build
# a fresh output tensor and (re)assign it to the wrapper. `DSparseMatrix`
# hides this reallocation from Datadeps.
#
# N.B. BEWARE: `@einsum Cm[i,j] = ...` (assignment into an existing tensor)
# does not work; use a fresh `C_out` with `+=`.
Cm = C.mat
C_out = Finch.fspzeros(eltype(Cm), size(Cm)...)
C_out = _finch_einsum_matmul!(C_out, A, B, transA, transB, alpha)
# `@einsum` may return a lazy `SwizzleArray`; materialize to a concrete
# sparse `Tensor` so accumulation and later iterations stay well-typed.
C_out = _finch_materialize(C_out, eltype(Cm), size(Cm))
if beta != 0
C_out = C_out + beta * Cm
end
C.mat = C_out

return C
end

# Sparse matrix-vector multiply tile kernel: `C = alpha*op(A)*B + beta*C` with a
# Finch tensor `A` and dense vectors `B`/`C`. Finch's `@einsum` does not reliably
# accumulate into a dense output, so we densify the single sparse tile and use a
# dense BLAS-backed `mul!`; this is a per-tile densification, not the whole matrix.
function Dagger.matvecmul!(C::AbstractVector, transA::Char, A::Finch.Tensor, B::AbstractVector, alpha, beta)
Ad = Array(A)
opA = transA == 'N' ? Ad :
transA == 'C' ? adjoint(Ad) :
transA == 'T' ? transpose(Ad) :
throw(ArgumentError("Invalid transA: $transA"))
LinearAlgebra.mul!(C, opA, B, alpha, beta)
return C
end

# Tile transpose/symmetrization used by `copytri!`. Finch tensors don't support
# in-place `setindex!`, so we densify, build the result, and re-sparsify via
# `_finch_materialize`.
# - `uplo === nothing`: off-diagonal tile, return the conjugate transpose.
# - `uplo == 'U'`/`'L'`: diagonal tile, build the full Hermitian tile from the
# given triangle (matching the dense `copydiagtile!` semantics).
function Dagger.transpose_tile(B::Finch.Tensor, uplo=nothing)
_B = Array(B)
if uplo === nothing
C = adjoint(_B)
return _finch_materialize(C, eltype(_B), size(C))
end
n = LinearAlgebra.checksquare(_B)
C = zeros(eltype(_B), n, n)
if uplo == 'U'
for j in 1:n, i in 1:n
C[i, j] = i <= j ? _B[i, j] : conj(_B[j, i])
end
elseif uplo == 'L'
for j in 1:n, i in 1:n
C[i, j] = i >= j ? _B[i, j] : conj(_B[j, i])
end
else
throw(ArgumentError("uplo must be 'U' or 'L', got $uplo"))
end
return _finch_materialize(C, eltype(_B), size(C))
end

end # module FinchExt
36 changes: 36 additions & 0 deletions ext/IncompleteLUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module IncompleteLUExt

import IncompleteLU
import SparseArrays
import Dagger
import Dagger: DMatrix
import LinearAlgebra

# Block incomplete-LU preconditioner, built per diagonal tile on top of Dagger's
# block-preconditioner machinery (see `src/array/iterativesolvers.jl`). Each tile
# gets an ILU factorization (drop tolerance `τ`) once at construction; the apply
# is a forward/backward substitution per block.

_as_sparse(A::SparseArrays.SparseMatrixCSC) = A
_as_sparse(A::AbstractMatrix) = SparseArrays.sparse(A)

function _ilu_operator(tile; τ=0.001, kwargs...)
S = _as_sparse(Dagger._tile_matrix(tile))
return IncompleteLU.ilu(S; τ=τ, kwargs...)
end

function Dagger.BlockILUPreconditioner(A::DMatrix; kwargs...)
build = tile -> _ilu_operator(tile; kwargs...)
return Dagger._build_block_preconditioner(Dagger.BlockILUPreconditioner, A,
"BlockILUPreconditioner", build)
end

# An ILU factorization applies via `ldiv!` (`\` is not defined).
Dagger._block_apply!(y, F::IncompleteLU.ILUFactorization, x) =
(LinearAlgebra.ldiv!(y, F, x); return nothing)

# The factorization is read-only and pinned to its worker; treat it as
# non-aliasing rather than recursing into its internals.
Dagger.aliasing(::IncompleteLU.ILUFactorization) = Dagger.NoAliasing()

end # module IncompleteLUExt
36 changes: 36 additions & 0 deletions ext/KrylovExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
module KrylovExt

import Krylov
import Dagger
import Dagger: DVector, DMatrix
import LinearAlgebra

# Distributed iterative solvers, implemented on top of Krylov.jl.
#
# We never form `A⁻¹`: Krylov only needs `mul!(y, A, x)` (and `mul!(y, A', x)`
# for two-sided methods), which Dagger provides over `DVector`s via its
# distributed SpMV/`gemv_dagger!` path. The remaining requirements -- `dot`,
# `norm`, `axpy!`, `axpby!`, `rmul!`, `copyto!`, `fill!`, broadcasting -- are the
# distributed BLAS-1 ops in `Dagger`.
#
# Workspace vectors are allocated through a `KrylovConstructor` built from
# `similar(b)`, so every internal vector inherits `b`'s element type *and*
# partitioning (`similar` preserves both). This keeps all `mul!`/`dot`/`axpy!`
# operands on a compatible chunk layout, which the distributed kernels require.

function _dagger_krylov(method::Symbol, A, b::DVector; kwargs...)
kc = Krylov.KrylovConstructor(similar(b))
workspace = Krylov.krylov_workspace(Val(method), kc)
Krylov.krylov_solve!(workspace, A, b; kwargs...)
return Krylov.solution(workspace), Krylov.statistics(workspace)
end

Dagger.krylov_solve(method::Symbol, A, b::DVector; kwargs...) =
_dagger_krylov(method, A, b; kwargs...)

Dagger.cg(A, b::DVector; kwargs...) = _dagger_krylov(:cg, A, b; kwargs...)
Dagger.minres(A, b::DVector; kwargs...) = _dagger_krylov(:minres, A, b; kwargs...)
Dagger.gmres(A, b::DVector; kwargs...) = _dagger_krylov(:gmres, A, b; kwargs...)
Dagger.bicgstab(A, b::DVector; kwargs...) = _dagger_krylov(:bicgstab, A, b; kwargs...)

end # module KrylovExt
Loading
Loading