Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
name = "ParallelParticleSwarms"
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
version = "1.0.0"
authors = ["Utkarsh <rajpututkarsh530@gmail.com> and contributors"]
version = "1.0.0"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
NonlinearSolveBase = "be0214bd-f91f-a760-ac4e-3421ce2b2da0"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
Expand All @@ -26,14 +29,16 @@ Enzyme = "0.13"
ForwardDiff = "0.10, 1.3"
JET = "0.9, 0.10, 0.11"
KernelAbstractions = "0.9"
LineSearch = "0.1"
NonlinearSolveBase = "2.24"
Optimization = "4.1, 5.2"
PrecompileTools = "1"
QuasiMonteCarlo = "0.3"
Reexport = "1.2"
SafeTestsets = "0.1"
SciMLBase = "2.79"
Setfield = "1.1"
SimpleNonlinearSolve = "2.2"
SimpleNonlinearSolve = "2.12"
StaticArrays = "1.9"
julia = "1.10"

Expand Down
5 changes: 3 additions & 2 deletions src/ParallelParticleSwarms.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
module ParallelParticleSwarms

using SciMLBase, StaticArrays, Setfield, KernelAbstractions
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff, LineSearch
using NonlinearSolveBase: ImmutableNonlinearProblem
import Adapt
import Adapt: adapt
import Enzyme: autodiff_deferred, Active, Reverse, Const
import Enzyme: autodiff, autodiff_deferred, Active, Reverse, Const, Duplicated, make_zero!
import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
using QuasiMonteCarlo
import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve
Expand Down
29 changes: 17 additions & 12 deletions src/bfgs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,21 @@ function SciMLBase.__solve(
abstol = nothing,
reltol = nothing,
maxiters = 1000,
linesearch = StrongWolfeLineSearch(),
kwargs...
)
f = Base.Fix2(prob.f.f, prob.p)
function _g(θ, _p = nothing)
return ForwardDiff.gradient(f, θ)
end
u0 = as_svector(prob.u0)
∇f = as_svector_grad(instantiate_gradient(prob.f.f, prob.f.adtype))
t0 = time()
nlprob = NonlinearProblem{false}(_g, prob.u0)
nlprob = ImmutableNonlinearProblem{false}(∇f, u0, prob.p)
nlsol = solve(
nlprob,
SimpleLimitedMemoryBroyden(; threshold = opt.threshold, linesearch = Val(true));
SimpleLimitedMemoryBroyden(; threshold = opt.threshold, linesearch);
maxiters,
abstol,
reltol
reltol,
grad_f = ∇f,
kwargs...
)
θ = nlsol.u
t1 = time()
Expand All @@ -31,26 +32,30 @@ function SciMLBase.__solve(
)
end

# `BFGS` here solves ∇f = 0 via `SimpleBroyden` (secant/quasi-Newton on the gradient).
function SciMLBase.__solve(
prob::SciMLBase.OptimizationProblem,
opt::BFGS,
args...;
abstol = nothing,
reltol = nothing,
maxiters = 1000,
linesearch = StrongWolfeLineSearch(),
kwargs...
)
f = Base.Fix2(prob.f.f, prob.p)
∇f = instantiate_gradient(f, prob.f.adtype)
u0 = as_svector(prob.u0)
∇f = as_svector_grad(instantiate_gradient(prob.f.f, prob.f.adtype))

t0 = time()
nlprob = NonlinearProblem{false}(∇f, prob.u0)
nlprob = ImmutableNonlinearProblem{false}(∇f, u0, prob.p)
nlsol = solve(
nlprob,
SimpleBroyden(; linesearch = Val(true));
SimpleBroyden(; linesearch);
maxiters,
abstol,
reltol
reltol,
grad_f = ∇f,
kwargs...
)
θ = nlsol.u
t1 = time()
Expand Down
143 changes: 95 additions & 48 deletions src/hybrid.jl
Original file line number Diff line number Diff line change
@@ -1,69 +1,116 @@
@kernel function simplebfgs_run!(nlprob, x0s, result, opt, maxiters, abstol, reltol)
using KernelAbstractions
using SciMLBase
using Optimization
using LineSearch
using SimpleNonlinearSolve
using NonlinearSolveBase: ImmutableNonlinearProblem
import SciMLBase: NonlinearFunction
import NonlinearSolveBase.Utils as NLBUtils

@inline (f::NonlinearFunction{false, G})(u, p) where {G} = f.f(u, p)

@inline NLBUtils.evaluate_f(prob::ImmutableNonlinearProblem, u) =
prob.f.f(u, prob.p)

@inline NLBUtils.evaluate_f!!(prob::ImmutableNonlinearProblem, fu, u) =
prob.f.f(u, prob.p)

@inline _unwrap_scalar(x::Real) = x
@inline _unwrap_scalar(x) = x[]

function _hybrid_bounds(prob, ::Val{d}, ::Type{T}) where {d, T}
lb = prob.lb === nothing ? nothing : SVector{d, T}(prob.lb)
ub = prob.ub === nothing ? nothing : SVector{d, T}(prob.ub)
return lb, ub
end

struct BoundedGrad{G, LB, UB}
raw::G
lb::LB
ub::UB
end

@inline (bg::BoundedGrad{G, Nothing, Nothing})(θ, p) where {G} = as_svector(bg.raw(θ, p))

@inline function (bg::BoundedGrad)(θ, p)
T = eltype(θ)
w = bg.ub .- bg.lb
in_box = all(isfinite, θ) &&
all(θ .>= bg.lb .- T(2) .* w) &&
all(θ .<= bg.ub .+ T(2) .* w)
g = in_box ? bg.raw(θ, p) : map(_ -> T(1.0e15), θ)
return as_svector(g)
end

@inline function _nlalg(local_opt::LBFGS, linesearch)
return SimpleLimitedMemoryBroyden(; threshold = local_opt.threshold, linesearch)
end
@inline _nlalg(::BFGS, linesearch) = SimpleBroyden(; linesearch)

@kernel function simplebfgs_run!(
grad_f, f_raw, p, x0s, result, result_fx, nlalg, maxiters, abstol, reltol
)
i = @index(Global, Linear)
nlcache = remake(nlprob; u0 = x0s[i])
sol = solve(nlcache, opt; maxiters, abstol, reltol)
@inbounds result[i] = sol.u
@inbounds x0 = as_svector(x0s[i])
nlprob = ImmutableNonlinearProblem{false}(NonlinearFunction{false}(grad_f), x0, p)
sol = SciMLBase.solve(nlprob, nlalg; maxiters, abstol, reltol, grad_f = grad_f)
u = as_svector(sol.u)
T = eltype(u)
v = f_raw(u, p)
@inbounds result[i] = u
@inbounds result_fx[i] = (isnan(v) | !isfinite(v)) ? T(Inf) : convert(T, v)
end

function SciMLBase.solve!(
cache::HybridPSOCache, opt::HybridPSO{Backend, LocalOpt}, args...;
abstol = nothing,
reltol = nothing,
maxiters = 100, local_maxiters = 10, kwargs...
) where {
Backend, LocalOpt <: Union{LBFGS, BFGS},
}
pso_cache = cache.pso_cache

sol_pso = solve!(pso_cache)
x0s = sol_pso.original
maxiters = 100,
local_maxiters = 50,
linesearch = StrongWolfeLineSearch(),
kwargs...
) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}

backend = opt.backend
sol_pso = SciMLBase.solve!(cache.pso_cache; maxiters)
best_u = sol_pso.u
best_obj = _unwrap_scalar(sol_pso.objective)

prob = remake(cache.prob, lb = nothing, ub = nothing)
prob = cache.prob
f_raw = prob.f.f
p = prob.p
T = eltype(prob.u0)
d = length(prob.u0)
lb, ub = _hybrid_bounds(prob, Val(d), T)

result = cache.start_points
copyto!(result, x0s)
grad_f = as_svector_grad(BoundedGrad(ForwardDiffGradient(f_raw), lb, ub))

∇f = instantiate_gradient(prob.f.f, prob.f.adtype)
nlalg = _nlalg(opt.local_opt, linesearch)

kernel = simplebfgs_run!(backend)
nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p)

nlalg = opt.local_opt isa LBFGS ?
SimpleLimitedMemoryBroyden(;
threshold = opt.local_opt.threshold,
linesearch = Val(true)
) : SimpleBroyden(; linesearch = Val(true))
x0s = sol_pso.original
n = length(x0s)
result = similar(x0s)
result_fx = KernelAbstractions.allocate(opt.backend, T, n)

t0 = time()
kernel(
nlprob,
x0s,
result,
nlalg,
local_maxiters,
abstol,
reltol;
ndrange = length(x0s)
simplebfgs_run!(opt.backend)(
grad_f, f_raw, p,
x0s, result, result_fx,
nlalg, local_maxiters, abstol, reltol;
ndrange = n,
)
KernelAbstractions.synchronize(opt.backend)

sol_bfgs = (x -> prob.f(x, prob.p)).(result)
sol_bfgs = (x -> isnan(x) ? convert(eltype(prob.u0), Inf) : x).(sol_bfgs)

minobj, ind = findmin(sol_bfgs)
sol_u,
sol_obj = minobj > sol_pso.objective ? (sol_pso.u, sol_pso.objective) :
(view(result, ind), minobj)
t1 = time()

# @show sol_pso.stats.time

solve_time = (t1 - t0) + sol_pso.stats.time
fx_host = Array(result_fx)
minobj, ind = findmin(fx_host)
if minobj < best_obj
best_obj = minobj
best_u = Array(result)[ind]
end

solve_time = (time() - t0) + sol_pso.stats.time
return SciMLBase.build_solution(
SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
sol_u, sol_obj,
stats = Optimization.OptimizationStats(; time = solve_time)
best_u, best_obj;
stats = Optimization.OptimizationStats(; time = solve_time),
)
end
44 changes: 38 additions & 6 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -376,12 +376,44 @@ Based on the paper: Particle swarm optimization method for constrained optimizat
return penalty
end

#TODO: Possible migration to DifferentiationInterface.jl,
# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025
@inline function instantiate_gradient(f, adtype::AutoForwardDiff)
return (θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ)
@inline function _enzyme_scalar_f(f, θ, p)
val = f(θ, p)
return val isa Number ? val : first(val)
end

@inline function instantiate_gradient(f, adtype::AutoEnzyme)
return (θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1]
struct ForwardDiffGradient{F}
f::F
end
@inline function (g::ForwardDiffGradient)(θ, p)
return ForwardDiff.gradient(x -> g.f(x, p), θ)
end

struct EnzymeGradient{F}
f::F
end
@inline function (g::EnzymeGradient)(θ, p)
θd = θ isa SVector ? MVector(θ) : θ
res = similar(θd)
make_zero!(res)
autodiff(
Reverse,
Const(_enzyme_scalar_f),
Active,
Const(g.f),
Duplicated(θd, res),
Const(p),
)
return θ isa SVector ? SVector(res) : res
end

@inline instantiate_gradient(f, ::AutoForwardDiff) = ForwardDiffGradient(f)
@inline instantiate_gradient(f, ::AutoEnzyme) = EnzymeGradient(f)

@inline as_svector(x::SVector) = x
@inline as_svector(x) = SVector(x)

struct SVectorGradient{G}
g::G
end
@inline (sg::SVectorGradient)(θ, p) = as_svector(sg.g(θ, p))
@inline as_svector_grad(g) = SVectorGradient(g)
Loading