diff --git a/Project.toml b/Project.toml index b901066..01e1db6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,14 @@ name = "BLASFEO" uuid = "2d913c56-f67c-11e9-3afb-9119491b9f4c" -authors = ["Ian McInerney", "Imperial College London"] version = "0.1.0" +authors = ["Ian McInerney", "Imperial College London"] + +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +blasfeo_jll = "6b574d4a-bb57-5a4b-b7e6-1c794c903646" [compat] +LinearAlgebra = "1.12.0" julia = "1" [extras] diff --git a/src/BLASFEO.jl b/src/BLASFEO.jl index e7d76f7..d32c6ed 100644 --- a/src/BLASFEO.jl +++ b/src/BLASFEO.jl @@ -1,5 +1,17 @@ module BLASFEO +using LinearAlgebra +using blasfeo_jll +include("vecs.jl") +include("mats.jl") +include("show.jl") +include("level1.jl") -greet() = print("Hello World!") +# Vectors +export BlasfeoDvec, BlasfeoSvec +# Matrices +export BlasfeoDmat, BlasfeoSmat + +# Level 1 +export axpy, axpby end # module diff --git a/src/level1.jl b/src/level1.jl new file mode 100644 index 0000000..a9fc235 --- /dev/null +++ b/src/level1.jl @@ -0,0 +1,75 @@ +for (type,flag) in [ + (:BlasfeoDvec, :d), + (:BlasfeoSvec, :s), + ] + # dot + blasfeo_dot = Symbol(:blasfeo_, flag, :dot) + @eval function LinearAlgebra.dot(a::$type, b::$type) + a_ptr=pointer_from_objref(a) + b_ptr=pointer_from_objref(b) + @ccall blasfeo.$blasfeo_dot( + a.m::Cint, a_ptr::Ptr{$type}, 0::Cint, + b_ptr::Ptr{$type}, 0::Cint, + )::eltype($type) + end + + # axpy! + blasfeo_axpy = Symbol(:blasfeo_, flag, :axpy) + @eval function LinearAlgebra.axpy!(α::T, x::$type, y::$type) where {T <: Real} + x_ptr=pointer_from_objref(x) + y_ptr=pointer_from_objref(y) + @ccall blasfeo.$blasfeo_axpy( + x.m::Cint, + α::Cdouble, x_ptr::Ptr{$type}, 0::Cint, + y_ptr::Ptr{$type}, 0::Cint, + y_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return y + end + + # 3 arg axpby + @eval function axpy(α::T, x::$type, y::$type, z::$type) where {T <: Real} + x_ptr=pointer_from_objref(x) + y_ptr=pointer_from_objref(y) + z_ptr=pointer_from_objref(z) + @ccall blasfeo.$blasfeo_axpy( + x.m::Cint, + α::Cdouble, x_ptr::Ptr{$type}, 0::Cint, + y_ptr::Ptr{$type}, 0::Cint, + z_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return z + end + + # axpby + blasfeo_axpby = Symbol(:blasfeo_, flag, :axpby) + @eval function LinearAlgebra.axpby!(α::T1, x::$type, β::T2, y::$type) where {T1 <: Real, T2 <: Real} + x_ptr=pointer_from_objref(x) + y_ptr=pointer_from_objref(y) + @ccall blasfeo.$blasfeo_axpby( + a.m::Cint, + α::Cdouble, x_ptr::Ptr{$type}, 0::Cint, + β::Cdouble, y_ptr::Ptr{$type}, 0::Cint, + y_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return y + end + + # 3 arg axpby + blasfeo_axpby = Symbol(:blasfeo_, flag, :axpby) + @eval function axpby(α::T1, x::$type, β::T2, y::$type, z::$type) where {T1 <: Real, T2 <: Real} + x_ptr=pointer_from_objref(x) + y_ptr=pointer_from_objref(y) + z_ptr=pointer_from_objref(z) + @ccall blasfeo.$blasfeo_axpby( + x.m::Cint, + α::Cdouble, x_ptr::Ptr{$type}, 0::Cint, + β::Cdouble, y_ptr::Ptr{$type}, 0::Cint, + z_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return z + end + + # :* + #@eval function Base.:* +end diff --git a/src/mats.jl b/src/mats.jl new file mode 100644 index 0000000..126db22 --- /dev/null +++ b/src/mats.jl @@ -0,0 +1,154 @@ +# NOTE(@anton): Both of these structs are treated as mutable and Julia manages their +# lifetimes itself. Blasfeo also offers a way to preallocate memory but +# we still need the bitstype struct and to manage that memory so I think +# may as well use `blasfeo_allocate_*` and `blasfeo_free_*` + +# bits clone of panel major `blasfeo_dmat` +# blasfeo_jll compiles with PANELMAJ so we only need this one and not the column major version +mutable struct BlasfeoDmat <: AbstractMatrix{Cdouble} + const mem::Ptr{Cdouble} # pointer to passed chunk of memory + const pA::Ptr{Cdouble} # pointer to a pm*pn array of doubles, the first is aligned to cache line size + const dA::Ptr{Cdouble} # pointer to a min(m,n) (or max???) array of doubles + const m::Cint # rows + const n::Cint # cols + const pm::Cint # packed number or rows + const cn::Cint # packed number or cols + const use_dA::Cint # flag to tell if dA can be used + const memsize::Cint # size of needed memory + + function BlasfeoDmat(m::Integer,n::Integer) + mat = new(C_NULL,C_NULL,C_NULL,0,0,0,0,0,0) + @ccall blasfeo.blasfeo_allocate_dmat(m::Cint, n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoDmat})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_dmat(pointer_from_objref(this)::Ptr{BlasfeoDmat})::Cvoid + end + return finalizer(destructor, mat) + end + + function BlasfeoDmat(other::Matrix{Cdouble}) + mat = new(C_NULL,C_NULL,C_NULL,0,0,0,0,0,0) + m,n = size(other) + @ccall blasfeo.blasfeo_allocate_dmat(m::Cint, n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoDmat})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_dmat(pointer_from_objref(this)::Ptr{BlasfeoDmat})::Cvoid + end + @ccall blasfeo.blasfeo_pack_dmat(m::Cint, n::Cint, + other::Ptr{Cdouble}, m::Cint, + pointer_from_objref(mat)::Ptr{BlasfeoDmat}, + 0::Cint, 0::Cint)::Cvoid + return finalizer(destructor, mat) + end +end + +# bits clone of panel major `blasfeo_smat` +# blasfeo_jll compiles with PANELMAJ so we only need this one and not the column major version +mutable struct BlasfeoSmat <: AbstractMatrix{Cfloat} + const mem::Ptr{Cfloat} # pointer to passed chunk of memory + const pA::Ptr{Cfloat} # pointer to a pm*pn array of doubles, the first is aligned to cache line size + const dA::Ptr{Cfloat} # pointer to a min(m,n) (or max???) array of doubles + const m::Cint # rows + const n::Cint # cols + const pm::Cint # packed number or rows + const cn::Cint # packed number or cols + const use_dA::Cint # flag to tell if dA can be used + const memsize::Cint # size of needed memory + + function BlasfeoSmat(m::Integer,n::Integer) + mat = new(C_NULL,C_NULL,C_NULL,0,0,0,0,0,0) + @ccall blasfeo.blasfeo_allocate_smat(m::Cint, n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoSmat})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_smat(pointer_from_objref(this)::Ptr{BlasfeoSmat})::Cvoid + end + return finalizer(destructor, mat) + end + + function BlasfeoSmat(other::Matrix{Cfloat}) + mat = new(C_NULL,C_NULL,C_NULL,0,0,0,0,0,0) + m,n = size(other) + @ccall blasfeo.blasfeo_allocate_smat(m::Cint, n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoSmat})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_smat(pointer_from_objref(this)::Ptr{BlasfeoSmat})::Cvoid + end + @ccall blasfeo.blasfeo_pack_smat(m::Cint, n::Cint, + other::Ptr{Cdouble}, m::Cint, + pointer_from_objref(mat)::Ptr{BlasfeoSmat}, + 0::Cint, 0::Cint)::Cvoid + return finalizer(destructor, mat) + end +end + +# basic matrix operations +for (type,flag) in [ + (:BlasfeoDmat, :d), + (:BlasfeoSmat, :s), + ] + # size + @eval Base.size(A::$type) = (A.m, A.n) + + # getindex + blasfeo_geex1 = Symbol(:blasfeo_, flag, :geex1) + @eval Base.getindex(A::$type, I::Vararg{Int, 2}) = @ccall blasfeo.$blasfeo_geex1(pointer_from_objref(A)::Ptr{$type}, (I[1]-1)::Cint, (I[2]-1)::Cint)::eltype($type) + + # setindex! + blasfeo_gein1 = Symbol(:blasfeo_, flag, :gein1) + @eval Base.setindex!(A::$type, v::T, I::Vararg{Int, 2}) where {T <: Real} = @ccall blasfeo.$blasfeo_gein1(v::eltype($type),pointer_from_objref(A)::Ptr{$type}, (I[1]-1)::Cint, (I[2]-1)::Cint)::eltype($type) + + # similar + @eval Base.similar(A::$type) = $type(A.m, A.n) + @eval Base.similar(A::$type,dims::Dims{2}) = $type(dims...) + + # copy + blasfeo_gecp = Symbol(:blasfeo_, flag, :gecp) + @eval function Base.copy(A::$type) + B = similar(A) + A_ptr = pointer_from_objref(A) + B_ptr = pointer_from_objref(B) + @ccall blasfeo.$blasfeo_gecp( + A.m::Cint, A.n::Cint, + A_ptr::Ptr{$type}, 0::Cint, 0::Cint, + B_ptr::Ptr{$type}, 0::Cint, 0::Cint + )::Cvoid + return B + end + + # fill! + blasfeo_gese = Symbol(:blasfeo_, flag, :gese) + @eval function Base.fill!(A::$type, b::T) where {T <: Real} + A_ptr = pointer_from_objref(A) + @ccall blasfeo.$blasfeo_gese( + A.m::Cint, A.n::Cint, + b::eltype($type), + A_ptr::Ptr{$type}, 0::Cint, 0::Cint, + )::Cvoid + return A + end + + # convert to Matrix and back + # TODO(@anton) maybe the performance overhead of this means maybe we want only explicit constructors? + blasfeo_unpack = Symbol(:blasfeo_unpack_, flag, :mat) + blasfeo_pack = Symbol(:blasfeo_pack_, flag, :mat) + @eval function Base.convert(::Type{Matrix{eltype($type)}}, A::$type) + A_ptr = pointer_from_objref(A) + B = Matrix{eltype($type)}(undef, size(A)) + @ccall blasfeo.$blasfeo_unpack( + A.m::Cint, A.n::Cint, + A_ptr::Ptr{$type}, + 0::Cint, 0::Cint, + B::Ptr{eltype($type)}, + A.m::Cint + )::Cvoid + return B + end + @eval function Base.convert(::Type{$type}, A::Matrix{eltype($type)}) + B = $type(size(A)...) + B_ptr = pointer_from_objref(B) + m,n = size(A) + @ccall blasfeo.$blasfeo_pack( + m::Cint, n::Cint, + A::Ptr{eltype($type)}, m::Cint, + B_ptr::Ptr{$type}, + 0::Cint, 0::Cint + )::Cvoid + return B + end +end diff --git a/src/show.jl b/src/show.jl new file mode 100644 index 0000000..add7b1f --- /dev/null +++ b/src/show.jl @@ -0,0 +1,58 @@ +# TODO(@anton) mxn or nxm + +function Base.show(io, mat::BlasfeoDmat) + println("$(mat.m)x$(mat.n) BlasfeoDmat:") + @ccall blasfeo.blasfeo_print_dmat(mat.m::Cint, mat.n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoDmat}, 0::Cint, 0::Cint)::Cvoid +end + +function Base.show(io, mat::BlasfeoSmat) + println("$(mat.m)x$(mat.n) BlasfeoSmat:") + @ccall blasfeo.blasfeo_print_smat(mat.m::Cint, mat.n::Cint, pointer_from_objref(mat)::Ptr{BlasfeoSmat}, 0::Cint, 0::Cint)::Cvoid +end + +function Base.show(io, vec::BlasfeoDvec) + println("$(vec.m)-element BlasfeoDvec:") + @ccall blasfeo.blasfeo_print_dvec(vec.m::Cint, pointer_from_objref(vec)::Ptr{BlasfeoDvec}, 0::Cint)::Cvoid +end + +function Base.show(io, vec::BlasfeoSvec) + println("$(vec.m)-element BlasfeoSvec:") + @ccall blasfeo.blasfeo_print_svec(vec.m::Cint, pointer_from_objref(vec)::Ptr{BlasfeoSvec}, 0::Cint)::Cvoid +end + +# TODO(@anton) This is probably not needed, we just need `print_matrix`? this is horribly documented. +for (type,shortname) in [ + (:BlasfeoDvec, :dvec), + (:BlasfeoSvec, :svec), + ] + printer = Symbol(:blasfeo_print_,shortname) + @eval begin + function Base.show(io::IO, ::MIME"text/plain", vec::$type) + println("$(vec.m)-element $($type):") + @ccall blasfeo.$printer(vec.m::Cint, pointer_from_objref(vec)::Ptr{$type}, 0::Cint)::Cvoid + end + + function Base.show(io::IO, vec::$type) + println("$(vec.m)-element $($type):") + @ccall blasfeo.$printer(vec.m::Cint, pointer_from_objref(vec)::Ptr{$type}, 0::Cint)::Cvoid + end + end +end + +for (type,shortname) in [ + (:BlasfeoDmat, "dmat"), + (:BlasfeoSmat, "smat"), + ] + printer = Symbol(:blasfeo_print_,shortname) + @eval begin + function Base.show(io::IO, ::MIME"text/plain", mat::$type) + println("$(mat.m)x$(mat.n) $($type):") + @ccall blasfeo.$printer(mat.m::Cint, mat.n::Cint, pointer_from_objref(mat)::Ptr{$type}, 0::Cint, 0::Cint)::Cvoid + end + + function Base.show(io::IO, mat::$type) + println("$(mat.m)x$(mat.n) $($type):") + @ccall blasfeo.$printer(mat.m::Cint, mat.n::Cint, pointer_from_objref(mat)::Ptr{$type}, 0::Cint, 0::Cint)::Cvoid + end + end +end diff --git a/src/vecs.jl b/src/vecs.jl new file mode 100644 index 0000000..509809e --- /dev/null +++ b/src/vecs.jl @@ -0,0 +1,117 @@ +# NOTE(@anton): Both of these structs are treated as mutable and Julia manages their +# lifetimes itself. Blasfeo also offers a way to preallocate memory but +# we still need the bitstype struct and to manage that memory so I think +# may as well use `blasfeo_allocate_*` and `blasfeo_free_*` + +# bits clone of panel major `blasfeo_dvec` +mutable struct BlasfeoDvec <: AbstractVector{Cdouble} + mem::Ptr{Cdouble} # pointer to passed chunk of memory + pa::Ptr{Cdouble} # pointer to a pm array of doubles, the first is aligned to cache line size + m::Cint # size + pm::Cint # packed size + memsize::Cint # size of needed memory + + function BlasfeoDvec(m::T) where {T <: Integer} + vec = new(C_NULL,C_NULL,0,0,0) + @ccall blasfeo.blasfeo_allocate_dvec(m::Cint, pointer_from_objref(vec)::Ptr{BlasfeoDvec})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_dvec(pointer_from_objref(this)::Ptr{BlasfeoDvec})::Cvoid + end + return finalizer(destructor, vec) + end + + function BlasfeoDvec(other::Vector{Cdouble}) + vec = new(C_NULL,C_NULL,0,0,0) + @ccall blasfeo.blasfeo_allocate_dvec(length(other)::Cint, pointer_from_objref(vec)::Ptr{BlasfeoDvec})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_dvec(pointer_from_objref(this)::Ptr{BlasfeoDvec})::Cvoid + end + + @ccall blasfeo.blasfeo_pack_dvec(vec.m::Cint, + other::Ptr{Cdouble}, 1::Cint, + pointer_from_objref(vec)::Ptr{BlasfeoDvec}, + 0::Cint)::Cvoid + + return finalizer(destructor, vec) + end +end + +# bits clone of panel major `blasfeo_svec` +mutable struct BlasfeoSvec <: AbstractVector{Cfloat} + mem::Ptr{Cfloat} # pointer to passed chunk of memory + pa::Ptr{Cfloat} # pointer to a pm array of floats, the first is aligned to cache line size + m::Cint # size + pm::Cint # packed size + memsize::Cint # size of needed memory + + function BlasfeoSvec(m::T) where {T <: Integer} + vec = new(C_NULL,C_NULL,0,0,0) + @ccall blasfeo.blasfeo_allocate_svec(m::Cint, pointer_from_objref(vec)::Ptr{BlasfeoSvec})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_svec(pointer_from_objref(this)::Ptr{BlasfeoSvec})::Cvoid + end + return finalizer(destructor, vec) + end + + function BlasfeoSvec(other::Vector{Cfloat}) + vec = new(C_NULL,C_NULL,0,0,0) + @ccall blasfeo.blasfeo_allocate_svec(length(other)::Cint, pointer_from_objref(vec)::Ptr{BlasfeoSvec})::Cvoid + function destructor(this) + @ccall blasfeo.blasfeo_free_svec(pointer_from_objref(this)::Ptr{BlasfeoSvec})::Cvoid + end + + @ccall blasfeo.blasfeo_pack_svec(vec.m::Cint, + other::Ptr{Cfloat}, 1::Cint, + pointer_from_objref(vec)::Ptr{BlasfeoSvec}, + 0::Cint)::Cvoid + + return finalizer(destructor, vec) + end +end + +# basic vector operations +for (type,flag) in [ + (:BlasfeoDvec, :d), + (:BlasfeoSvec, :s), + ] + # size + @eval Base.size(A::$type) = (A.m,) + + # getindex + blasfeo_vecex1 = Symbol(:blasfeo_, flag, :vecex1) + @eval Base.getindex(A::$type, I::Vararg{Int, 1}) = @ccall blasfeo.$blasfeo_vecex1(pointer_from_objref(A)::Ptr{$type}, (I[1]-1)::Cint)::eltype($type) + + # setindex! + blasfeo_vecin1 = Symbol(:blasfeo_, flag, :vecin1) + @eval Base.setindex!(A::$type, v::T, I::Vararg{Int, 1}) where {T <: Real} = @ccall blasfeo.$blasfeo_vecin1(v::eltype($type),pointer_from_objref(A)::Ptr{$type}, (I[1]-1)::Cint)::eltype($type) + + # similar + @eval Base.similar(A::$type) = $type(A.m) + @eval Base.similar(A::$type,dims::Dims{1}) = $type(dims...) + + # copy + blasfeo_veccp = Symbol(:blasfeo_, flag, :veccp) + @eval function Base.copy(A::$type) + B = similar(A) + A_ptr = pointer_from_objref(A) + B_ptr = pointer_from_objref(B) + @ccall blasfeo.$blasfeo_veccp( + A.m::Cint, + A_ptr::Ptr{$type}, 0::Cint, + B_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return B + end + + # fill! + blasfeo_vecse = Symbol(:blasfeo_, flag, :vecse) + @eval function Base.fill!(A::$type, b::T) where {T <: Real} + A_ptr = pointer_from_objref(A) + @ccall blasfeo.$blasfeo_vecse( + A.m::Cint, + b::eltype($type), + A_ptr::Ptr{$type}, 0::Cint, + )::Cvoid + return A + end +end diff --git a/test/mat/basics.jl b/test/mat/basics.jl new file mode 100644 index 0000000..d8b1e02 --- /dev/null +++ b/test/mat/basics.jl @@ -0,0 +1,27 @@ +@testset "Matrix Basic Operations" begin + @testset for MAT in (BlasfeoDmat, BlasfeoSmat) + A = rand(eltype(MAT), 100, 100) + A_blasfeo = MAT(A) + @test A == A_blasfeo + @test A[10,20] == A_blasfeo[10,20] + + A[10,20] = 30.0; A_blasfeo[10,20] = 30.0 + A[20,30] = 50; A_blasfeo[20,30] = 50 + @test A == A_blasfeo + + B_blasfeo = similar(A_blasfeo) + @test size(A_blasfeo) == size(B_blasfeo) + # Note(@anton) Blasfeo _always_ clears memory. + @test A_blasfeo != B_blasfeo + + C_blasfeo = copy(A_blasfeo) + @test A_blasfeo == C_blasfeo + + C_blasfeo[15,15] = 30.0 + @test C_blasfeo[15,15] == 30.0 + @test A_blasfeo != C_blasfeo + + fill!(C_blasfeo, 100.0) + @test all(C_blasfeo .== 100.0) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index f3ad109..63df0b2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,5 @@ using BLASFEO using Test -@testset "BLASFEO.jl" begin - # Write your own tests here. -end +include("mat/basics.jl") +include("vec/basics.jl") diff --git a/test/vec/basics.jl b/test/vec/basics.jl new file mode 100644 index 0000000..90084ee --- /dev/null +++ b/test/vec/basics.jl @@ -0,0 +1,27 @@ +@testset "Vector Basic Operations" begin + @testset for VEC in (BlasfeoDvec, BlasfeoSvec) + A = rand(eltype(VEC), 100) + A_blasfeo = VEC(A) + @test A == A_blasfeo + @test A[10] == A_blasfeo[10] + + A[10] = 10.0; A_blasfeo[10] = 10.0 + A[20] = 20; A_blasfeo[20] = 20 + @test A == A_blasfeo + + B_blasfeo = similar(A_blasfeo) + @test size(A_blasfeo) == size(B_blasfeo) + # Note(@anton) Blasfeo _always_ clears memory. + @test A_blasfeo != B_blasfeo + + C_blasfeo = copy(A_blasfeo) + @test A_blasfeo == C_blasfeo + + C_blasfeo[15] = 15.0 + @test C_blasfeo[15] == 15.0 + @test A_blasfeo != C_blasfeo + + fill!(C_blasfeo, 100.0) + @test all(C_blasfeo .== 100.0) + end +end