From 186d4884cfec8abd291b0142f7e96f808718f44c Mon Sep 17 00:00:00 2001 From: AdityaPandeyCN Date: Thu, 2 Apr 2026 16:04:21 +0530 Subject: [PATCH] rewrite L-BFGS with custom Strong Wolfe line search Signed-off-by: AdityaPandeyCN --- Project.toml | 9 ++- src/ParallelParticleSwarms.jl | 5 +- src/bfgs.jl | 29 ++++--- src/hybrid.jl | 143 ++++++++++++++++++++++------------ src/utils.jl | 44 +++++++++-- 5 files changed, 160 insertions(+), 70 deletions(-) diff --git a/Project.toml b/Project.toml index a4c76f8..bec91b6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ParallelParticleSwarms" uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" -version = "1.0.0" authors = ["Utkarsh and contributors"] +version = "1.0.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -9,6 +9,9 @@ DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +LineSearch = "87fe0de2-c867-4266-b59a-2f0a94fc965b" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NonlinearSolveBase = "be0214bd-f91f-a760-ac4e-3421ce2b2da0" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" @@ -26,6 +29,8 @@ Enzyme = "0.13" ForwardDiff = "0.10, 1.3" JET = "0.9, 0.10, 0.11" KernelAbstractions = "0.9" +LineSearch = "0.1" +NonlinearSolveBase = "2.24" Optimization = "4.1, 5.2" PrecompileTools = "1" QuasiMonteCarlo = "0.3" @@ -33,7 +38,7 @@ Reexport = "1.2" SafeTestsets = "0.1" SciMLBase = "2.79" Setfield = "1.1" -SimpleNonlinearSolve = "2.2" +SimpleNonlinearSolve = "2.12" StaticArrays = "1.9" julia = "1.10" diff --git a/src/ParallelParticleSwarms.jl b/src/ParallelParticleSwarms.jl index 9f59408..1ea6bda 100644 --- a/src/ParallelParticleSwarms.jl +++ b/src/ParallelParticleSwarms.jl @@ -1,10 +1,11 @@ module ParallelParticleSwarms using SciMLBase, StaticArrays, Setfield, KernelAbstractions -using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff +using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff, LineSearch +using NonlinearSolveBase: ImmutableNonlinearProblem import Adapt import Adapt: adapt -import Enzyme: autodiff_deferred, Active, Reverse, Const +import Enzyme: autodiff, autodiff_deferred, Active, Reverse, Const, Duplicated, make_zero! import KernelAbstractions: @atomic, @atomicreplace, @atomicswap using QuasiMonteCarlo import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve diff --git a/src/bfgs.jl b/src/bfgs.jl index b85e802..97a3189 100644 --- a/src/bfgs.jl +++ b/src/bfgs.jl @@ -5,20 +5,21 @@ function SciMLBase.__solve( abstol = nothing, reltol = nothing, maxiters = 1000, + linesearch = StrongWolfeLineSearch(), kwargs... ) - f = Base.Fix2(prob.f.f, prob.p) - function _g(θ, _p = nothing) - return ForwardDiff.gradient(f, θ) - end + u0 = as_svector(prob.u0) + ∇f = as_svector_grad(instantiate_gradient(prob.f.f, prob.f.adtype)) t0 = time() - nlprob = NonlinearProblem{false}(_g, prob.u0) + nlprob = ImmutableNonlinearProblem{false}(∇f, u0, prob.p) nlsol = solve( nlprob, - SimpleLimitedMemoryBroyden(; threshold = opt.threshold, linesearch = Val(true)); + SimpleLimitedMemoryBroyden(; threshold = opt.threshold, linesearch); maxiters, abstol, - reltol + reltol, + grad_f = ∇f, + kwargs... ) θ = nlsol.u t1 = time() @@ -31,6 +32,7 @@ function SciMLBase.__solve( ) end +# `BFGS` here solves ∇f = 0 via `SimpleBroyden` (secant/quasi-Newton on the gradient). function SciMLBase.__solve( prob::SciMLBase.OptimizationProblem, opt::BFGS, @@ -38,19 +40,22 @@ function SciMLBase.__solve( abstol = nothing, reltol = nothing, maxiters = 1000, + linesearch = StrongWolfeLineSearch(), kwargs... ) - f = Base.Fix2(prob.f.f, prob.p) - ∇f = instantiate_gradient(f, prob.f.adtype) + u0 = as_svector(prob.u0) + ∇f = as_svector_grad(instantiate_gradient(prob.f.f, prob.f.adtype)) t0 = time() - nlprob = NonlinearProblem{false}(∇f, prob.u0) + nlprob = ImmutableNonlinearProblem{false}(∇f, u0, prob.p) nlsol = solve( nlprob, - SimpleBroyden(; linesearch = Val(true)); + SimpleBroyden(; linesearch); maxiters, abstol, - reltol + reltol, + grad_f = ∇f, + kwargs... ) θ = nlsol.u t1 = time() diff --git a/src/hybrid.jl b/src/hybrid.jl index 60f80ed..c07c2d7 100644 --- a/src/hybrid.jl +++ b/src/hybrid.jl @@ -1,69 +1,116 @@ -@kernel function simplebfgs_run!(nlprob, x0s, result, opt, maxiters, abstol, reltol) +using KernelAbstractions +using SciMLBase +using Optimization +using LineSearch +using SimpleNonlinearSolve +using NonlinearSolveBase: ImmutableNonlinearProblem +import SciMLBase: NonlinearFunction +import NonlinearSolveBase.Utils as NLBUtils + +@inline (f::NonlinearFunction{false, G})(u, p) where {G} = f.f(u, p) + +@inline NLBUtils.evaluate_f(prob::ImmutableNonlinearProblem, u) = + prob.f.f(u, prob.p) + +@inline NLBUtils.evaluate_f!!(prob::ImmutableNonlinearProblem, fu, u) = + prob.f.f(u, prob.p) + +@inline _unwrap_scalar(x::Real) = x +@inline _unwrap_scalar(x) = x[] + +function _hybrid_bounds(prob, ::Val{d}, ::Type{T}) where {d, T} + lb = prob.lb === nothing ? nothing : SVector{d, T}(prob.lb) + ub = prob.ub === nothing ? nothing : SVector{d, T}(prob.ub) + return lb, ub +end + +struct BoundedGrad{G, LB, UB} + raw::G + lb::LB + ub::UB +end + +@inline (bg::BoundedGrad{G, Nothing, Nothing})(θ, p) where {G} = as_svector(bg.raw(θ, p)) + +@inline function (bg::BoundedGrad)(θ, p) + T = eltype(θ) + w = bg.ub .- bg.lb + in_box = all(isfinite, θ) && + all(θ .>= bg.lb .- T(2) .* w) && + all(θ .<= bg.ub .+ T(2) .* w) + g = in_box ? bg.raw(θ, p) : map(_ -> T(1.0e15), θ) + return as_svector(g) +end + +@inline function _nlalg(local_opt::LBFGS, linesearch) + return SimpleLimitedMemoryBroyden(; threshold = local_opt.threshold, linesearch) +end +@inline _nlalg(::BFGS, linesearch) = SimpleBroyden(; linesearch) + +@kernel function simplebfgs_run!( + grad_f, f_raw, p, x0s, result, result_fx, nlalg, maxiters, abstol, reltol + ) i = @index(Global, Linear) - nlcache = remake(nlprob; u0 = x0s[i]) - sol = solve(nlcache, opt; maxiters, abstol, reltol) - @inbounds result[i] = sol.u + @inbounds x0 = as_svector(x0s[i]) + nlprob = ImmutableNonlinearProblem{false}(NonlinearFunction{false}(grad_f), x0, p) + sol = SciMLBase.solve(nlprob, nlalg; maxiters, abstol, reltol, grad_f = grad_f) + u = as_svector(sol.u) + T = eltype(u) + v = f_raw(u, p) + @inbounds result[i] = u + @inbounds result_fx[i] = (isnan(v) | !isfinite(v)) ? T(Inf) : convert(T, v) end function SciMLBase.solve!( cache::HybridPSOCache, opt::HybridPSO{Backend, LocalOpt}, args...; abstol = nothing, reltol = nothing, - maxiters = 100, local_maxiters = 10, kwargs... - ) where { - Backend, LocalOpt <: Union{LBFGS, BFGS}, - } - pso_cache = cache.pso_cache - - sol_pso = solve!(pso_cache) - x0s = sol_pso.original + maxiters = 100, + local_maxiters = 50, + linesearch = StrongWolfeLineSearch(), + kwargs... + ) where {Backend, LocalOpt <: Union{LBFGS, BFGS}} - backend = opt.backend + sol_pso = SciMLBase.solve!(cache.pso_cache; maxiters) + best_u = sol_pso.u + best_obj = _unwrap_scalar(sol_pso.objective) - prob = remake(cache.prob, lb = nothing, ub = nothing) + prob = cache.prob + f_raw = prob.f.f + p = prob.p + T = eltype(prob.u0) + d = length(prob.u0) + lb, ub = _hybrid_bounds(prob, Val(d), T) - result = cache.start_points - copyto!(result, x0s) + grad_f = as_svector_grad(BoundedGrad(ForwardDiffGradient(f_raw), lb, ub)) - ∇f = instantiate_gradient(prob.f.f, prob.f.adtype) + nlalg = _nlalg(opt.local_opt, linesearch) - kernel = simplebfgs_run!(backend) - nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p) - - nlalg = opt.local_opt isa LBFGS ? - SimpleLimitedMemoryBroyden(; - threshold = opt.local_opt.threshold, - linesearch = Val(true) - ) : SimpleBroyden(; linesearch = Val(true)) + x0s = sol_pso.original + n = length(x0s) + result = similar(x0s) + result_fx = KernelAbstractions.allocate(opt.backend, T, n) t0 = time() - kernel( - nlprob, - x0s, - result, - nlalg, - local_maxiters, - abstol, - reltol; - ndrange = length(x0s) + simplebfgs_run!(opt.backend)( + grad_f, f_raw, p, + x0s, result, result_fx, + nlalg, local_maxiters, abstol, reltol; + ndrange = n, ) + KernelAbstractions.synchronize(opt.backend) - sol_bfgs = (x -> prob.f(x, prob.p)).(result) - sol_bfgs = (x -> isnan(x) ? convert(eltype(prob.u0), Inf) : x).(sol_bfgs) - - minobj, ind = findmin(sol_bfgs) - sol_u, - sol_obj = minobj > sol_pso.objective ? (sol_pso.u, sol_pso.objective) : - (view(result, ind), minobj) - t1 = time() - - # @show sol_pso.stats.time - - solve_time = (t1 - t0) + sol_pso.stats.time + fx_host = Array(result_fx) + minobj, ind = findmin(fx_host) + if minobj < best_obj + best_obj = minobj + best_u = Array(result)[ind] + end + solve_time = (time() - t0) + sol_pso.stats.time return SciMLBase.build_solution( SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt, - sol_u, sol_obj, - stats = Optimization.OptimizationStats(; time = solve_time) + best_u, best_obj; + stats = Optimization.OptimizationStats(; time = solve_time), ) end diff --git a/src/utils.jl b/src/utils.jl index 24bb27b..77b8910 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -376,12 +376,44 @@ Based on the paper: Particle swarm optimization method for constrained optimizat return penalty end -#TODO: Possible migration to DifferentiationInterface.jl, -# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025 -@inline function instantiate_gradient(f, adtype::AutoForwardDiff) - return (θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ) +@inline function _enzyme_scalar_f(f, θ, p) + val = f(θ, p) + return val isa Number ? val : first(val) end -@inline function instantiate_gradient(f, adtype::AutoEnzyme) - return (θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1] +struct ForwardDiffGradient{F} + f::F end +@inline function (g::ForwardDiffGradient)(θ, p) + return ForwardDiff.gradient(x -> g.f(x, p), θ) +end + +struct EnzymeGradient{F} + f::F +end +@inline function (g::EnzymeGradient)(θ, p) + θd = θ isa SVector ? MVector(θ) : θ + res = similar(θd) + make_zero!(res) + autodiff( + Reverse, + Const(_enzyme_scalar_f), + Active, + Const(g.f), + Duplicated(θd, res), + Const(p), + ) + return θ isa SVector ? SVector(res) : res +end + +@inline instantiate_gradient(f, ::AutoForwardDiff) = ForwardDiffGradient(f) +@inline instantiate_gradient(f, ::AutoEnzyme) = EnzymeGradient(f) + +@inline as_svector(x::SVector) = x +@inline as_svector(x) = SVector(x) + +struct SVectorGradient{G} + g::G +end +@inline (sg::SVectorGradient)(θ, p) = as_svector(sg.g(θ, p)) +@inline as_svector_grad(g) = SVectorGradient(g)