Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
AlgebraicMultigrid = "2169fc97-5a83-5252-b627-83903c6c433c"
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
cuSOLVER = "887afef0-6a32-4de5-add4-7827692ba8fc"
CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
Expand Down Expand Up @@ -66,7 +66,7 @@ LinearSolveAlgebraicMultigridExt = "AlgebraicMultigrid"
LinearSolveBLISExt = ["blis_jll", "LAPACK_jll"]
LinearSolveBandedMatricesExt = "BandedMatrices"
LinearSolveBlockDiagonalsExt = "BlockDiagonals"
LinearSolveCUDAExt = "CUDA"
LinearSolveCUDAExt = ["cuSOLVER"]
LinearSolveCUDSSExt = "CUDSS"
LinearSolveCUSOLVERRFExt = ["CUSOLVERRF", "SparseArrays"]
LinearSolveChainRulesCoreExt = "ChainRulesCore"
Expand Down Expand Up @@ -100,8 +100,8 @@ Aqua = "0.8"
ArrayInterface = "7.19"
BandedMatrices = "1.8"
BlockDiagonals = "0.2"
CUDA = "5.5, 6"
CUDSS = "0.6.3, 0.7"
cuSOLVER = "6"
CUDSS = "0.7"
CUSOLVERRF = "0.2.6"
ChainRulesCore = "1.25.1"
CliqueTrees = "1.13.1"
Expand Down
51 changes: 27 additions & 24 deletions ext/LinearSolveCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
module LinearSolveCUDAExt

using CUDA
using cuSOLVER
CUDACore = cuSOLVER.CUDACore
cuSPARSE = cuSOLVER.cuSPARSE

using LinearSolve: LinearSolve, is_cusparse, defaultalg, cudss_loaded, DefaultLinearSolver,
DefaultAlgorithmChoice, ALREADY_WARNED_CUDSS, LinearCache,
needs_concrete_A,
Expand All @@ -11,20 +14,20 @@ using LinearSolve: LinearSolve, is_cusparse, defaultalg, cudss_loaded, DefaultLi
using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface
using SciMLBase: AbstractSciMLOperator

LinearSolve.usecuda(x::Nothing) = CUDA.functional()
LinearSolve.usecuda(x::Nothing) = CUDACore.functional()

function LinearSolve.is_cusparse(
A::Union{
CUDA.CUSPARSE.CuSparseMatrixCSR, CUDA.CUSPARSE.CuSparseMatrixCSC,
cuSPARSE.CuSparseMatrixCSR, cuSPARSE.CuSparseMatrixCSC,
}
)
return true
end
LinearSolve.is_cusparse_csr(::CUDA.CUSPARSE.CuSparseMatrixCSR) = true
LinearSolve.is_cusparse_csc(::CUDA.CUSPARSE.CuSparseMatrixCSC) = true
LinearSolve.is_cusparse_csr(::cuSPARSE.CuSparseMatrixCSR) = true
LinearSolve.is_cusparse_csc(::cuSPARSE.CuSparseMatrixCSC) = true

function LinearSolve.defaultalg(
A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
A::cuSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
assump::OperatorAssumptions{Bool}
) where {Tv, Ti}
return if LinearSolve.cudss_loaded(A)
Expand All @@ -39,7 +42,7 @@ function LinearSolve.defaultalg(
end

function LinearSolve.defaultalg(
A::CUDA.CUSPARSE.CuSparseMatrixCSC, b,
A::cuSPARSE.CuSparseMatrixCSC, b,
assump::OperatorAssumptions{Bool}
)
if LinearSolve.cudss_loaded(A)
Expand All @@ -50,7 +53,7 @@ function LinearSolve.defaultalg(
return LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES)
end

function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)
function LinearSolve.error_no_cudss_lu(A::cuSPARSE.CuSparseMatrixCSR)
if !LinearSolve.cudss_loaded(A)
error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.")
end
Expand All @@ -63,12 +66,12 @@ function SciMLBase.solve!(
)
if cache.isfresh
cacheval = LinearSolve.@get_cacheval(cache, :CudaOffloadLUFactorization)
fact = lu(CUDA.CuArray(cache.A))
fact = lu(CUDACore.CuArray(cache.A))
cache.cacheval = fact
cache.isfresh = false
end
fact = LinearSolve.@get_cacheval(cache, :CudaOffloadLUFactorization)
y = Array(ldiv!(CUDA.CuArray(cache.u), fact, CUDA.CuArray(cache.b)))
y = Array(ldiv!(CUDACore.CuArray(cache.u), fact, CUDACore.CuArray(cache.b)))
cache.u .= y
return SciMLBase.build_linear_solution(alg, y, nothing, cache)
end
Expand All @@ -79,28 +82,28 @@ function LinearSolve.init_cacheval(
assumptions::OperatorAssumptions
)
# Check if CUDA is functional before creating CUDA arrays
if !CUDA.functional()
if !CUDACore.functional()
return nothing
end

T = eltype(A)
noUnitT = typeof(zero(T))
luT = LinearAlgebra.lutype(noUnitT)
ipiv = CuVector{Int32}(undef, 0)
ipiv = CUDACore.CuVector{Int32}(undef, 0)
info = zero(LinearAlgebra.BlasInt)
return LU{luT}(CuMatrix{Float64}(undef, 0, 0), ipiv, info)
return LU{luT}(CUDACore.CuMatrix{Float64}(undef, 0, 0), ipiv, info)
end

function SciMLBase.solve!(
cache::LinearSolve.LinearCache, alg::CudaOffloadQRFactorization;
kwargs...
)
if cache.isfresh
fact = qr(CUDA.CuArray(cache.A))
fact = qr(CUDACore.CuArray(cache.A))
cache.cacheval = fact
cache.isfresh = false
end
y = Array(ldiv!(CUDA.CuArray(cache.u), cache.cacheval, CUDA.CuArray(cache.b)))
y = Array(ldiv!(CUDACore.CuArray(cache.u), cache.cacheval, CUDACore.CuArray(cache.b)))
cache.u .= y
return SciMLBase.build_linear_solution(alg, y, nothing, cache)
end
Expand All @@ -111,11 +114,11 @@ function LinearSolve.init_cacheval(
assumptions::OperatorAssumptions
)
# Check if CUDA is functional before creating CUDA arrays
if !CUDA.functional()
if !CUDACore.functional()
return nothing
end

return qr(CUDA.CuArray(A))
return qr(CUDACore.CuArray(A))
end

# Keep the deprecated CudaOffloadFactorization working by forwarding to QR
Expand All @@ -124,11 +127,11 @@ function SciMLBase.solve!(
kwargs...
)
if cache.isfresh
fact = qr(CUDA.CuArray(cache.A))
fact = qr(CUDACore.CuArray(cache.A))
cache.cacheval = fact
cache.isfresh = false
end
y = Array(ldiv!(CUDA.CuArray(cache.u), cache.cacheval, CUDA.CuArray(cache.b)))
y = Array(ldiv!(CUDACore.CuArray(cache.u), cache.cacheval, CUDACore.CuArray(cache.b)))
cache.u .= y
return SciMLBase.build_linear_solution(alg, y, nothing, cache)
end
Expand All @@ -138,19 +141,19 @@ function LinearSolve.init_cacheval(
maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool},
assumptions::OperatorAssumptions
)
return qr(CUDA.CuArray(A))
return qr(CUDACore.CuArray(A))
end

for AlgType in (SparspakFactorization, LinearSolve.QRFactorization)
@eval function LinearSolve.init_cacheval(
::$AlgType, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
::$AlgType, A::cuSPARSE.CuSparseMatrixCSR, b, u,
Pl, Pr, maxiters::Int, abstol, reltol,
verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions
)
return nothing
end
@eval function LinearSolve.init_cacheval(
::$AlgType, A::CUDA.CUSPARSE.CuSparseMatrixCSC, b, u,
::$AlgType, A::cuSPARSE.CuSparseMatrixCSC, b, u,
Pl, Pr, maxiters::Int, abstol, reltol,
verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions
)
Expand All @@ -159,14 +162,14 @@ for AlgType in (SparspakFactorization, LinearSolve.QRFactorization)
end

function LinearSolve.init_cacheval(
::KLUFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
::KLUFactorization, A::cuSPARSE.CuSparseMatrixCSR, b, u,
Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions
)
return nothing
end

function LinearSolve.init_cacheval(
::UMFPACKFactorization, A::CUDA.CUSPARSE.CuSparseMatrixCSR, b, u,
::UMFPACKFactorization, A::cuSPARSE.CuSparseMatrixCSR, b, u,
Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions
)
return nothing
Expand Down
2 changes: 1 addition & 1 deletion ext/LinearSolveCUDSSExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ module LinearSolveCUDSSExt
using LinearSolve: LinearSolve, cudss_loaded
using CUDSS

LinearSolve.cudss_loaded(A::CUDSS.CUDA.CUSPARSE.CuSparseMatrixCSR) = true
LinearSolve.cudss_loaded(A::CUDSS.cuSPARSE.CuSparseMatrixCSR) = true

end
4 changes: 2 additions & 2 deletions lib/LinearSolveAutotune/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ LinearSolve = {path = "../.."}
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CPUSummary = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
cuSOLVER = "887afef0-6a32-4de5-add4-7827692ba8fc"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641"
Expand Down Expand Up @@ -38,7 +38,7 @@ gh_cli_jll = "5d31d589-30fb-542f-b82d-10325e863e38"
Base64 = "1"
BenchmarkTools = "1"
CPUSummary = "0.2"
CUDA = "5"
cuSOLVER = "6"
DataFrames = "1"
Dates = "1"
FastLapackInterface = "2"
Expand Down
2 changes: 1 addition & 1 deletion lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ using CPUSummary
using RecursiveFactorization
using blis_jll
using LAPACK_jll
using CUDA
using cuSOLVER
using Metal
using FastLapackInterface

Expand Down
32 changes: 16 additions & 16 deletions lib/LinearSolveAutotune/src/gpu_detection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ function is_cuda_available()
if haskey(ENV, "CUDA_VISIBLE_DEVICES") ||
(Sys.islinux() && isfile("/proc/driver/nvidia/version")) ||
(Sys.iswindows() && success(`where nvidia-smi`))
@warn "CUDA hardware may be available but CUDA.jl extension is not loaded. Consider adding `using CUDA` to enable GPU algorithms."
@warn "CUDA hardware may be available but CUDA.jl extension is not loaded. Consider adding `using cuSOLVER` or `using CUDA` to enable GPU algorithms."
end
catch
# Silently continue if detection fails
end
return false
end

# Check if we have CUDA.jl loaded
# Check if we have cuSOLVER.jl loaded
try
CUDA = Base.get_extension(LinearSolve, :LinearSolveCUDAExt).CUDA
return CUDA.functional()
cuSOLVER = Base.get_extension(LinearSolve, :LinearSolveCUDAExt).cuSOLVER
return cuSOLVER.functional()
catch
return false
end
Expand Down Expand Up @@ -86,38 +86,38 @@ function get_cuda_gpu_info()
end

try
# Get CUDA module from the extension
CUDA = ext.CUDA
# Get CUDACore module from the extension
CUDACore = ext.CUDACore

# Check if CUDA is functional
if !CUDA.functional()
# Check if CUDACore is functional
if !CUDACore.functional()
return gpu_info
end

# Get device information
devices = collect(CUDA.devices())
devices = collect(CUDACore.devices())
num_devices = length(devices)

if num_devices > 0
gpu_info["gpu_count"] = num_devices

# Get information from the first GPU
first_device = devices[1]
gpu_info["gpu_type"] = CUDA.name(first_device)
gpu_info["gpu_type"] = CUDACore.name(first_device)

# Convert memory from bytes to GB
total_mem_bytes = CUDA.totalmem(first_device)
total_mem_bytes = CUDACore.totalmem(first_device)
gpu_info["gpu_memory_gb"] = round(total_mem_bytes / (1024^3), digits = 2)

# Get compute capability
capability = CUDA.capability(first_device)
capability = CUDACore.capability(first_device)
gpu_info["gpu_capability"] = "$(capability.major).$(capability.minor)"

# If multiple GPUs, list all types
if num_devices > 1
gpu_types = String[]
for dev in devices
push!(gpu_types, CUDA.name(dev))
push!(gpu_types, CUDACore.name(dev))
end
gpu_info["gpu_types"] = unique(gpu_types)
end
Expand Down Expand Up @@ -302,7 +302,7 @@ function get_package_versions()
"LinearSolve",
"LinearSolveAutotune",
"RecursiveFactorization",
"CUDA",
"cuSOLVER",
"Metal",
"MKL_jll",
"BLISBLAS",
Expand Down Expand Up @@ -626,9 +626,9 @@ function get_detailed_system_info()
system_data["cuda_loaded"] = false
system_data["metal_loaded"] = false
try
# Check if CUDA algorithms are actually available
# Check if cuSOLVER (CUDA algorithms) are actually available
if system_data["cuda_available"]
system_data["cuda_loaded"] = isdefined(Main, :CUDA) || haskey(Base.loaded_modules, Base.PkgId(Base.UUID("052768ef-5323-5732-b1bb-66c8b64840ba"), "CUDA"))
system_data["cuda_loaded"] = isdefined(Main, :cuSOLVER) || haskey(Base.loaded_modules, Base.PkgId(Base.UUID("887afef0-6a32-4de5-add4-7827692ba8fc"), "cuSOLVER"))
end
if system_data["metal_available"]
system_data["metal_loaded"] = isdefined(Main, :Metal) || haskey(Base.loaded_modules, Base.PkgId(Base.UUID("dde4c033-4e86-420c-a63e-0dd931031962"), "Metal"))
Expand Down
6 changes: 3 additions & 3 deletions test/gpu/Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[deps]
BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDACore = "bd0ed864-bdfe-4181-a5ed-ce625a5fdea2"
cuSPARSE = "b26da814-b3bc-49ef-b0ee-c816305aa060"
CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
cuSOLVER = "887afef0-6a32-4de5-add4-7827692ba8fc"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

Loading
Loading