using DiffEqBase, Flux
using ForwardDiff: Dual, partials, value
dual(x, p) = x
dual(x::Real, p) = Dual(x, p)
function mypartial(f, Δs, i, args::Vararg)
dargs = ntuple(j -> dual(args[j], i == j), length(args))
a = f(dargs...)
end
mypartial.(
DiffEqBase.calculate_residuals,
0.036217686f0,
1,
[6.051575f-9] |> gpu, # Remove this gpu call and it works fine
1.0f0,
1.1397732f0,
0.001f0,
0.001f0,
DiffEqBase.ODE_DEFAULT_NORM,
0.0f0
)[1]
InvalidIRError: compiling kernel broadcast_kernel(CUDA.CuKernelContext, CuDeviceArray{Dual{Nothing,Float32,1},1,1}, Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(mypartial),Tuple{CUDA.CuRefValue{typeof(DiffEqBase.calculate_residuals)},Float32,Int64,Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}},Float32,Float32,Float32,Float32,CUDA.CuRefValue{typeof(DiffEqBase.ODE_DEFAULT_NORM)},Float32}}, Int64) resulted in invalid LLVM IR
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
[1] _broadcast_getindex_evalf at broadcast.jl:648
[2] _broadcast_getindex at broadcast.jl:621
[3] getindex at broadcast.jl:575
[4] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:62
Reason: unsupported call to the Julia runtime (call to jl_f_apply_type)
Stacktrace:
[1] mypartial at In[103]:2
[2] _broadcast_getindex_evalf at broadcast.jl:648
[3] _broadcast_getindex at broadcast.jl:621
[4] getindex at broadcast.jl:575
[5] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:62
Reason: unsupported call to the Julia runtime (call to jl_new_structv)
Stacktrace:
[1] mypartial at In[103]:2
[2] _broadcast_getindex_evalf at broadcast.jl:648
[3] _broadcast_getindex at broadcast.jl:621
[4] getindex at broadcast.jl:575
[5] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:62
Reason: unsupported dynamic function invocation (call to ntuple)
Stacktrace:
[1] mypartial at In[103]:2
[2] _broadcast_getindex_evalf at broadcast.jl:648
[3] _broadcast_getindex at broadcast.jl:621
[4] getindex at broadcast.jl:575
[5] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:62
Reason: unsupported call to the Julia runtime (call to jl_f__apply_iterate)
Stacktrace:
[1] mypartial at In[103]:3
[2] _broadcast_getindex_evalf at broadcast.jl:648
[3] _broadcast_getindex at broadcast.jl:621
[4] getindex at broadcast.jl:575
[5] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:62
Stacktrace:
[1] check_ir(::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget,CUDA.CUDACompilerParams}, ::LLVM.Module) at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/validation.jl:123
[2] macro expansion at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/driver.jl:239 [inlined]
[3] macro expansion at /home/avikpal/.julia/packages/TimerOutputs/ZmKD7/src/TimerOutput.jl:206 [inlined]
[4] codegen(::Symbol, ::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/driver.jl:237
[5] compile(::Symbol, ::GPUCompiler.CompilerJob; libraries::Bool, deferred_codegen::Bool, optimize::Bool, strip::Bool, validate::Bool, only_entry::Bool) at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/driver.jl:39
[6] compile at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/driver.jl:35 [inlined]
[7] cufunction_compile(::GPUCompiler.FunctionSpec; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/avikpal/.julia/packages/CUDA/BIYoG/src/compiler/execution.jl:310
[8] cufunction_compile(::GPUCompiler.FunctionSpec) at /home/avikpal/.julia/packages/CUDA/BIYoG/src/compiler/execution.jl:305
[9] check_cache(::Dict{UInt64,Any}, ::Any, ::Any, ::GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#12",Tuple{CUDA.CuKernelContext,CuDeviceArray{Dual{Nothing,Float32,1},1,1},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(mypartial),Tuple{CUDA.CuRefValue{typeof(DiffEqBase.calculate_residuals)},Float32,Int64,Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}},Float32,Float32,Float32,Float32,CUDA.CuRefValue{typeof(DiffEqBase.ODE_DEFAULT_NORM)},Float32}},Int64}}, ::UInt64; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/cache.jl:40
[10] broadcast_kernel at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:60 [inlined]
[11] cached_compilation at /home/avikpal/.julia/packages/GPUCompiler/uTpNx/src/cache.jl:65 [inlined]
[12] cufunction(::GPUArrays.var"#broadcast_kernel#12", ::Type{Tuple{CUDA.CuKernelContext,CuDeviceArray{Dual{Nothing,Float32,1},1,1},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(mypartial),Tuple{CUDA.CuRefValue{typeof(DiffEqBase.calculate_residuals)},Float32,Int64,Base.Broadcast.Extruded{CuDeviceArray{Float32,1,1},Tuple{Bool},Tuple{Int64}},Float32,Float32,Float32,Float32,CUDA.CuRefValue{typeof(DiffEqBase.ODE_DEFAULT_NORM)},Float32}},Int64}}; name::Nothing, kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /home/avikpal/.julia/packages/CUDA/BIYoG/src/compiler/execution.jl:297
[13] cufunction at /home/avikpal/.julia/packages/CUDA/BIYoG/src/compiler/execution.jl:294 [inlined]
[14] #launch_heuristic#853 at /home/avikpal/.julia/packages/CUDA/BIYoG/src/gpuarrays.jl:19 [inlined]
[15] launch_heuristic at /home/avikpal/.julia/packages/CUDA/BIYoG/src/gpuarrays.jl:17 [inlined]
[16] copyto! at /home/avikpal/.julia/packages/GPUArrays/ZxsKE/src/host/broadcast.jl:66 [inlined]
[17] copyto! at ./broadcast.jl:886 [inlined]
[18] copy at ./broadcast.jl:862 [inlined]
[19] materialize(::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1},Nothing,typeof(mypartial),Tuple{Base.RefValue{typeof(DiffEqBase.calculate_residuals)},Float32,Int64,CuArray{Float32,1},Float32,Float32,Float32,Float32,Base.RefValue{typeof(DiffEqBase.ODE_DEFAULT_NORM)},Float32}}) at ./broadcast.jl:837
[20] top-level scope at In[118]:1
[21] include_string(::Function, ::Module, ::String, ::String) at ./loading.jl:1091
MWE:
When used with Tracker, this function fails to compile on GPU
NOTE: The MWE reported is the extracted part from Tracker's backward pass (https://github.com/FluxML/Tracker.jl/blob/master/src/lib/array.jl#L546) which was producing the error.
Package Versions: