using Enzyme
using NNlib: logsoftmax
function mlogloss_core(pred, y)
classes = reshape(Int32(1):Int32(size(pred, 1)), :, 1, 1)
y_idx = reshape(Int32.(y), 1, 1, :)
y_oh = Float32.(classes .== y_idx)
return -sum(y_oh .* logsoftmax(pred; dims=1); dims=1)
end
function loss(W, x, y)
pred = W * x
return sum(mlogloss_core(reshape(pred, size(pred, 1), 1, size(pred, 2)), y))
end
W = randn(Float32, 3, 5)
dW = zero(W)
x = randn(Float32, 5, 32)
y = UInt32.(rand(1:3, 32))
Enzyme.autodiff(
Enzyme.set_runtime_activity(Enzyme.Reverse),
loss,
Enzyme.Duplicated(W, dW),
Enzyme.Const(x),
Enzyme.Const(y),
)
ERROR: LoadError: EnzymeInternalError: Enzyme compilation failed due to an internal error.
Please open an issue with the code to reproduce and full error log on github.com/EnzymeAD/Enzyme.jl
To toggle more information for debugging (needed for bug reports), set Enzyme.Compiler.VERBOSE_ERRORS[] = true (default false)
Illegal replace ficticious phi for: %"Memory{Float32}[]_replacementJ" = phi ptr addrspace(10) , !dbg !221 of %"Memory{Float32}[]" = call noalias nonnull align 16 dereferenceable(16) "enzyme_ReadOnlyOrThrow" "enzyme_type"="{[-1]:Pointer, [-1,0]:Integer, [-1,1]:Integer, [-1,2]:Integer, [-1,3]:Integer, [-1,4]:Integer, [-1,5]:Integer, [-1,6]:Integer, [-1,7]:Integer, [-1,8]:Pointer, [-1,8,-1]:Float@float}" ptr addrspace(10) @jl_alloc_genericmemory_unchecked(ptr %ptls_load103, i64 %391, ptr nonnull inttoptr (i64 123728303995664 to ptr)) #128, !dbg !295
Stacktrace:
[1] GenericMemory
@ ./boot.jl:588
[2] new_as_memoryref
@ ./boot.jl:604
[3] Array
@ ./boot.jl:654
[4] Array
@ ./boot.jl:662
[5] similar
@ ./abstractarray.jl:867
[6] similar
@ ./abstractarray.jl:866
[7] similar
@ ./broadcast.jl:228
[8] similar
@ ./broadcast.jl:227
[9] OverrideBCMaterialize
@ ~/.julia/packages/Enzyme/9OkvN/src/compiler/interpreter.jl:933
[10] mlogloss_core
@ ~/Desktop/NeuroTabModels.jl/enzyme_mwe.jl:9
Stacktrace:
[1] julia_error(msg::String, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}, data2::Ptr{LLVM.API.LLVMOpaqueValue}, B::Ptr{LLVM.API.LLVMOpaqueBuilder})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/errors.jl:1281
[2] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}, data2::Ptr{LLVM.API.LLVMOpaqueValue}, B::Ptr{LLVM.API.LLVMOpaqueBuilder})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/errors.jl:997
[3] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, runtimeActivity::Bool, strongZero::Bool, width::Int64, additionalArg::Ptr{Nothing}, forceAnonymousTape::Bool, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
@ Enzyme.API ~/.julia/packages/Enzyme/9OkvN/src/api.jl:270
[4] macro expansion
@ ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:2783 [inlined]
[5] macro expansion
@ ~/.julia/packages/LLVM/Bd7kA/src/base.jl:113 [inlined]
[6] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget{GPUCompiler.NativeCompilerTarget}, Enzyme.Compiler.EnzymeCompilerParams{Enzyme.Compiler.PrimalCompilerParams}}, interp::Enzyme.Compiler.Interpreter.EnzymeInterpreter{Nothing}, mod::LLVM.Module, primalf::LLVM.Function, TT::Type, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, wrap::Bool, modifiedBetween::NTuple{N, Bool} where N, returnPrimal::Bool, expectedTapeType::Type, loweredArgs::Set{Int64}, boxedArgs::Set{Int64}, removedRoots::Set{Int64})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:2656
[7] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget{GPUCompiler.NativeCompilerTarget}, Enzyme.Compiler.EnzymeCompilerParams{Enzyme.Compiler.PrimalCompilerParams}})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:5863
[8] #compile#99
@ ~/.julia/packages/GPUCompiler/Yuvf5/src/driver.jl:67 [inlined]
[9] compile
@ ~/.julia/packages/GPUCompiler/Yuvf5/src/driver.jl:55 [inlined]
[10] _thunk(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget{GPUCompiler.NativeCompilerTarget}, Enzyme.Compiler.EnzymeCompilerParams{Enzyme.Compiler.PrimalCompilerParams}}, postopt::Bool)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:6776
[11] _thunk
@ ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:6774 [inlined]
[12] cached_compilation
@ ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:6832 [inlined]
[13] thunkbase(mi::Core.MethodInstance, World::UInt64, FA::Type{<:Annotation}, A::Type{<:Annotation}, TT::Type, Mode::Enzyme.API.CDerivativeMode, width::Int64, ModifiedBetween::NTuple{N, Bool} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, edges::Vector{Any})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:6948
[14] thunk_generator(world::UInt64, source::Union{LineNumberNode, Method}, FA::Type, A::Type, TT::Type, Mode::Enzyme.API.CDerivativeMode, Width::Int64, ModifiedBetween::NTuple{N, Bool} where N, ReturnPrimal::Bool, ShadowInit::Bool, ABI::Type, ErrIfFuncWritten::Bool, RuntimeActivity::Bool, StrongZero::Bool, self::Any, fakeworld::Any, fa::Type, a::Type, tt::Type, mode::Type, width::Type, modifiedbetween::Type, returnprimal::Type, shadowinit::Type, abi::Type, erriffuncwritten::Type, runtimeactivity::Type, strongzero::Type)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/9OkvN/src/compiler.jl:7092
[15] autodiff
@ ~/.julia/packages/Enzyme/9OkvN/src/Enzyme.jl:509 [inlined]
[16] autodiff
@ ~/.julia/packages/Enzyme/9OkvN/src/Enzyme.jl:569 [inlined]
[17] autodiff(::ReverseMode{false, true, false, FFIABI, false, false}, ::typeof(loss_fn), ::Duplicated{@NamedTuple{W::Matrix{Float32}, b::Vector{Float32}}}, ::Const{@NamedTuple{}}, ::Const{Tuple{Matrix{Float32}, Vector{UInt32}}})
@ Enzyme ~/.julia/packages/Enzyme/9OkvN/src/Enzyme.jl:541
[18] top-level scope
@ ~/Desktop/NeuroTabModels.jl/enzyme_mwe.jl:27
[19] include(mod::Module, _path::String)
@ Base ./Base.jl:306
[20] exec_options(opts::Base.JLOptions)
@ Base ./client.jl:317
[21] _start()
@ Base ./client.jl:550
in expression starting at /home/aditya/Desktop/NeuroTabModels.jl/enzyme_mwe.jl:27
y_oh = Float32.(classes .== y_idx)
-sum(y_oh .* logsoftmax(pred; dims=1); dims=1)
lsm = logsoftmax(pred; dims=1)
-sum(ifelse.(classes .== y_idx, lsm, zero(eltype(lsm))); dims=1)
On Julia 1.12.6 with Enzyme
v0.13.140, reverse-mode autodiff of a function that materializes aBool → Float32broadcast crashes insideOverrideBCMaterializewithIllegal replace ficticious phifor aMemory{Float32}allocation.The crash only shows up when Julia is run with
--check-bounds=yesand/or--code-coverage=user. In a plain REPL running the same code passes.Pkg.testand] testset those flags by default, so this is what fails in CI.MRE
Error Stacktrace
I worked around the failure by removing the temporary
Float32.(Bool)broadcast array. Replacingwith
avoids the intermediate
Memory{Float32}allocation and runs cleanly under the same flags.