From 6c898cbecc4dfc183f5f9d714235e1c81147151d Mon Sep 17 00:00:00 2001 From: Daniel Kats Date: Tue, 3 Feb 2026 10:40:35 +0100 Subject: [PATCH] Release v0.2.1 Fix type stability, add boundschecks --- CHANGELOG.md | 7 +++++ Project.toml | 3 +-- profile/jet.jl | 61 ++++++++++++++++++++++++++++++-------------- src/Buffers.jl | 12 ++++----- src/buffer.jl | 8 +++--- src/mbuffer.jl | 49 ++++++++++++++++++++--------------- src/threadsbuffer.jl | 41 +++++++++++++++++------------ 7 files changed, 114 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05331c9..cf5022b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Release notes +## Version [v0.2.1] - 2026.02.03 + +### Fixed + +* Improve type stability in `ThreadsMAllocBuffer`. +* Add @boundscheck and @inline to various functions to improve performance. + ## Version [v0.2.0] - 2025.02.03 ### Added diff --git a/Project.toml b/Project.toml index 078bf9a..99b1ce7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,10 @@ name = "Buffers" uuid = "d9ae4b7f-d04c-4b7f-92f7-4d9c2e17e1a4" authors = ["Daniel Kats "] -version = "0.2.0" +version = "0.2.1" [deps] PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] PrecompileTools = "1" diff --git a/profile/jet.jl b/profile/jet.jl index e43949a..d7cae15 100644 --- a/profile/jet.jl +++ b/profile/jet.jl @@ -1,33 +1,56 @@ using Buffers +using LinearAlgebra using JET - -function test(lenbuf, d1, d2) - buf = Buffer(lenbuf) - A = alloc!(buf, d1,d2) - B = alloc!(buf, d1,d2) - A .= 1.0 - B .= 2.0 - test!(A) - #show(A) - #C = A + B - test2(A, B) - drop!(buf, B) - drop!(buf, A) -end - -function test2(A, B) +function jtest2(A, B) C = A + B end -function test!(A::AbstractArray{Float64,2}) +function mytest!(A::AbstractArray{Float64,2}) A .+= 0.5 end +function jtest(lenbuf, d1, d2) + buf = Buffer(lenbuf) + A1 = alloc!(buf, d1,d2) + B1 = alloc!(buf, d1,d2) + A1 .= 1.0 + B1 .= 2.0 + mytest!(A1) + #show(A1) + #C = A1 + B1 + jtest2(A1, B1) + drop!(buf, B1) + drop!(buf, A1) + @threadsbuffer tbuf(1000) begin # 1000 elements buffer for nthreads() threads each + Threads.@threads for k = 1:20 + bbuf = reshape_buf!(tbuf, length(tbuf)) + reset!(tbuf) + A = alloc!(tbuf, 10, 10) # 10x10 tensor + B = alloc!(tbuf, 10, 10) # 10x10 tensor + A .= 1.0 + B .= 2.0 + + reset!(tbuf) + end + end + @buffer mbuf(100) begin + Am = alloc!(mbuf, 10, 10) + Bm = alloc!(mbuf, 10, 10) + Cm = alloc!(mbuf, 10, 10) + Am .= 1.0 + Bm .= 2.0 + mul!(Cm, Am, Bm) + drop!(mbuf, Bm, Am) + end + return +end + + function main() lenbuf = 1000 d1 = d2 = 10 -@report_opt target_modules=(@__MODULE__, Buffers) test(lenbuf, d1, d2) -#test(lenbuf, d1, d2) +@report_opt ignored_modules=(Base,Threads,) target_modules=(@__MODULE__, Buffers) jtest(lenbuf, d1, d2) +# jtest(lenbuf, d1, d2) end @time main() diff --git a/src/Buffers.jl b/src/Buffers.jl index 754996d..73f520d 100644 --- a/src/Buffers.jl +++ b/src/Buffers.jl @@ -77,7 +77,7 @@ julia> An = neuralyze(A) # tensor without origin julia> @tensor An[i,j,k] = B[i,j,l] * C[l,k] ``` """ -function alloc!(buf, dims...; extend=true) end +function alloc! end """ drop!(buf, tensor...) @@ -87,7 +87,7 @@ function alloc!(buf, dims...; extend=true) end Only last tensors can be dropped. For `ThreadsBuffer`, drop tensors from the buffer of the current thread. """ -function drop!(buf, tensor...) end +function drop! end """ reset!(buf) @@ -95,7 +95,7 @@ function drop!(buf, tensor...) end Reset buffer `buf` to the initial state. For `ThreadsBuffer`, reset the buffer of the current thread and release it. """ -function reset!(buf) end +function reset! end """ reshape_buf!(buf, dims...; offset=0, extend=true) @@ -121,21 +121,21 @@ julia> C = rand(10,20) julia> @tensor A[i,j,k] = B[i,j,l] * C[l,k] ``` """ -function reshape_buf!(buf, dims...; offset=0, extend=true) end +function reshape_buf! end """ isextendable(buf) Check if buffer `buf` is extendable. """ -function isextendable(buf) end +function isextendable end """ set_extendable!(buf, extend=true) Set buffer `buf` to be extendable or not. """ -function set_extendable!(buf, extend=true) end +function set_extendable! end """ neuralyze(tensor::AbstractArray) diff --git a/src/buffer.jl b/src/buffer.jl index cb413ad..dd004f1 100644 --- a/src/buffer.jl +++ b/src/buffer.jl @@ -33,8 +33,8 @@ function set_extendable!(buf::Buffer, extend::Bool=true) return end -function alloc!(buf::Buffer{T}, dims...) where {T} - @assert buf.offset[] >= 1 "Buffer is used with reshape_buf! and must be reset!" + Base.@propagate_inbounds function alloc!(buf::Buffer{T}, dims...) where {T} + @boundscheck(@assert buf.offset[] >= 1 "Buffer is used with reshape_buf! and must be reset!") start = buf.offset[] + 1 len = prod(dims) stop = start + len - 1 @@ -59,8 +59,8 @@ function drop!(buf::Buffer, tensor::AbstractArray...) end end -function reshape_buf!(buf::Buffer{T}, dims...; offset=0) where {T} - @assert buf.offset[] <= 1 "Buffer is used with alloc! and must be reset!" +Base.@propagate_inbounds function reshape_buf!(buf::Buffer{T}, dims...; offset=0) where {T} + @boundscheck(@assert buf.offset[] <= 1 "Buffer is used with alloc! and must be reset!") buf.offset[] = 0 len = prod(dims) start = offset + 2 diff --git a/src/mbuffer.jl b/src/mbuffer.jl index ae0df3c..acad9f6 100644 --- a/src/mbuffer.jl +++ b/src/mbuffer.jl @@ -67,12 +67,14 @@ function set_extendable!(buf::MAllocBuffer, extend::Bool=true) return end -function alloc!(buf::MAllocBuffer{T}, dims...) where {T} - @assert buf.offset[] >= 1 "Buffer is used with reshape_buf! and must be reset!" +Base.@propagate_inbounds function alloc!(buf::MAllocBuffer{T}, dims...) where {T} + @boundscheck(@assert buf.offset[] >= 1 "Buffer is used with reshape_buf! and must be reset!") start = buf.offset[] len = prod(dims) - if start + len > buf.data_length - error("Buffer overflow!") + @boundscheck begin + if start + len > buf.data_length + error("Buffer overflow!") + end end buf.offset[] += len return unsafe_wrap(Array, buf.data + start*sizeof(T), dims; own=false) @@ -88,13 +90,15 @@ function drop!(buf::MAllocBuffer{T}, tensor::AbstractArray...) where {T} end end -function reshape_buf!(buf::MAllocBuffer{T}, dims...; offset=0) where {T} - @assert buf.offset[] <= 1 "Buffer is used with alloc! and must be reset!" +Base.@propagate_inbounds function reshape_buf!(buf::MAllocBuffer{T}, dims...; offset=0) where {T} + @boundscheck(@assert buf.offset[] <= 1 "Buffer is used with alloc! and must be reset!") buf.offset[] = 0 len = prod(dims) start = offset + 1 - if start + len > buf.data_length - error("Buffer overflow!") + @boundscheck begin + if start + len > buf.data_length + error("Buffer overflow!") + end end return unsafe_wrap(Array, buf.data + start*sizeof(T), dims; own=false) end @@ -126,10 +130,13 @@ end macro buffer(specs, ex) buf, T, len = _parse_specs(specs) quote - $(esc(buf)) = MAllocBuffer{$(esc(T))}($(esc(len))) - $(esc(ex)) - free!($(esc(buf))) - $(esc(buf)) = nothing + let $(esc(buf)) = MAllocBuffer{$(esc(T))}($(esc(len))) + try + $(esc(ex)) + finally + free!($(esc(buf))) + end + end end end @@ -143,13 +150,15 @@ macro buffer(specs, specs2, ex) buf, T, len = _parse_specs(specs) buf2, T2, len2 = _parse_specs(specs2) quote - $(esc(buf)) = MAllocBuffer{$(esc(T))}($(esc(len))) - $(esc(buf2)) = MAllocBuffer{$(esc(T2))}($(esc(len2))) - $(esc(ex)) - free!($(esc(buf2))) - free!($(esc(buf))) - $(esc(buf2)) = nothing - $(esc(buf)) = nothing + let $(esc(buf)) = MAllocBuffer{$(esc(T))}($(esc(len))), + $(esc(buf2)) = MAllocBuffer{$(esc(T2))}($(esc(len2))) + try + $(esc(ex)) + finally + free!($(esc(buf2))) + free!($(esc(buf))) + end + end end end @@ -164,7 +173,7 @@ function _parse_specs(specs) T = specs.args[2] len = specs.args[3] else - "Invalid buffer specification!" + error("Invalid buffer specification!") end return name, T, len end \ No newline at end of file diff --git a/src/threadsbuffer.jl b/src/threadsbuffer.jl index dc88625..7e3074c 100644 --- a/src/threadsbuffer.jl +++ b/src/threadsbuffer.jl @@ -1,4 +1,3 @@ - abstract type AbstractThreadsBuffer end """ @@ -137,7 +136,7 @@ Return the buffer of the current thread. If the buffer is not available, wait until it is released. """ -function current_buffer(buf::AbstractThreadsBuffer) +@inline function current_buffer(buf::AbstractThreadsBuffer) return buf.buffers[current_buffer_index(buf)] end @@ -154,7 +153,7 @@ function set_extendable!(buf::AbstractThreadsBuffer, extend::Bool=true) return end -function alloc!(buf::AbstractThreadsBuffer, dims...) +@inline function alloc!(buf::AbstractThreadsBuffer, dims...) return alloc!(current_buffer(buf), dims...) end @@ -193,8 +192,13 @@ if the buffers were not released properly. function repair!(buf::AbstractThreadsBuffer) for i in 1:nbuffers(buf) reset!(buf.buffers[i]) - push!(buf.pool, i) end + lock(buf.condition) do + resize!(buf.pool, nbuffers(buf)) + buf.pool .= [1:nbuffers(buf);] + notify(buf.condition) + end + return end function reshape_buf!(buf::AbstractThreadsBuffer, dims...; offset=0) @@ -214,10 +218,13 @@ The specifications `specs` can be: macro threadsbuffer(specs, ex) buf, T, len, n = _parse_specs_tb(specs) quote - $(esc(buf)) = ThreadsMAllocBuffer{$(esc(T))}($(esc(len)), $(esc(n))) - $(esc(ex)) - free!($(esc(buf))) - $(esc(buf)) = nothing + let $(esc(buf)) = ThreadsMAllocBuffer{$(esc(T))}($(esc(len)), $(esc(n))) + try + $(esc(ex)) + finally + free!($(esc(buf))) + end + end end end @@ -230,13 +237,15 @@ macro threadsbuffer(specs, specs2, ex) buf, T, len, n = _parse_specs_tb(specs) buf2, T2, len2, n2 = _parse_specs_tb(specs2) quote - $(esc(buf)) = ThreadsMAllocBuffer{$(esc(T))}($(esc(len)), $(esc(n))) - $(esc(buf2)) = ThreadsMAllocBuffer{$(esc(T2))}($(esc(len2)), $(esc(n2))) - $(esc(ex)) - free!($(esc(buf2))) - free!($(esc(buf))) - $(esc(buf2)) = nothing - $(esc(buf)) = nothing + let $(esc(buf)) = ThreadsMAllocBuffer{$(esc(T))}($(esc(len)), $(esc(n))), + $(esc(buf2)) = ThreadsMAllocBuffer{$(esc(T2))}($(esc(len2)), $(esc(n2))) + try + $(esc(ex)) + finally + free!($(esc(buf2))) + free!($(esc(buf))) + end + end end end @@ -258,7 +267,7 @@ function _parse_specs_tb(specs) len = specs.args[3] n = specs.args[4] else - "Invalid buffer specification!" + error("Invalid buffer specification!") end return name, T, len, n end \ No newline at end of file