Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions arm
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CC=arm-none-eabi-gcc CXX=arm-none-eabi-g++ cmake -B build-m55 -DARGON_ENABLE_TESTING=ON -DARGON_TEST_CPU=M55 -DFETCHCONTENT_SOURCE_DIR_C++SPEC=../cppspec

cmake --build build-m55 --target gather_spec && sudo qemu-arm -cpu cortex-m55 build-m55/test/specs/gather_spec
21 changes: 21 additions & 0 deletions include/argon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <type_traits>
#include "argon/argon_full.hpp"
#include "argon/argon_half.hpp"
#include "argon/helpers/argon_for.hpp"
#include "argon/store.hpp"
#include "argon/vector.hpp"
#include "arm_simd/helpers/multivector.hpp"
Expand Down Expand Up @@ -52,6 +53,16 @@ ace argon_type reinterpret(V in) {
return argon_type{simd::reinterpret<typename argon_type::vector_type>(in)};
}

/// @brief Reinterpret a vector of one type to another
/// @tparam argon_type The type to reinterpret to
/// @tparam V The type to reinterpret from
/// @param in The vector to reinterpret
/// @return The reinterpreted vector
template <typename ScalarType, typename ArgonType>
ace ScalarType bit_cast(ArgonType in) {
return in.template As<ScalarType>();
}

/// @brief Load data to a set of vector lanes from a pointer with interleaving
/// @tparam lane The lane to load to
/// @tparam stride The stride
Expand Down Expand Up @@ -162,6 +173,16 @@ ace Argon<BranchType> ternary(Argon<CondType> condition, Argon<BranchType> true_
}
}

template <typename BranchType, typename CondType>
requires(sizeof(CondType) == sizeof(BranchType))
ace BranchType ternary(CondType condition, BranchType true_value, BranchType false_value) {
if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
return condition ? true_value.vec() : false_value.vec();
} else {
return helpers::ArgonFor_t<CondType>{condition}.Select(true_value, false_value);
}
}

/// @copydoc ternary
template <typename ValueType, typename CondType>
requires std::is_arithmetic_v<ValueType> &&
Expand Down
129 changes: 71 additions & 58 deletions include/argon/argon_full.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,28 @@
#define ace [[nodiscard]] [[gnu::always_inline]] inline
#endif

template <typename scalar_type>
class Argon : public argon::Vector<simd::Vec128_t<scalar_type>> {
using T = argon::Vector<simd::Vec128_t<scalar_type>>;
template <typename ScalarType>
class Argon : public argon::Vector<simd::Vec128_t<ScalarType>> {
using T = argon::Vector<simd::Vec128_t<ScalarType>>;

public:
using vector_type = simd::Vec128_t<scalar_type>;
using vector_type = simd::Vec128_t<ScalarType>;
using lane_type = const argon::Lane<vector_type>;

static_assert(simd::is_quadword_v<vector_type>);

static constexpr size_t bytes = 16;
static constexpr size_t lanes = bytes / sizeof(scalar_type);
static constexpr size_t lanes = bytes / sizeof(ScalarType);

using T::T;
using argon::Vector<vector_type>::Vector;
ace Argon(argon::Vector<vector_type> vec) : T{std::move(vec)} {};
ace Argon(std::array<scalar_type, 4> value_list) : T{T::Load(value_list.data())} {};
ace Argon(ArgonHalf<scalar_type> low, ArgonHalf<scalar_type> high) : T{Combine(low, high)} {};
ace Argon(std::array<ScalarType, 4> value_list) : T{T::Load(value_list.data())} {};
ace Argon(ArgonHalf<ScalarType> low, ArgonHalf<ScalarType> high) : T{Combine(low, high)} {};

#ifndef ARGON_PLATFORM_MVE
ace Argon(argon::Lane<vector_type> b) : T{b} {};
ace Argon(argon::ConstLane<0, vector_type> b) : T{b} {};
#endif

template <simd::is_vector_type intrinsic_type>
ace Argon(argon::Lane<intrinsic_type> b) : T{b} {};
Expand All @@ -46,142 +51,142 @@ class Argon : public argon::Vector<simd::Vec128_t<scalar_type>> {
}

#ifndef ARGON_PLATFORM_MVE
ace static Argon<scalar_type> Combine(ArgonHalf<scalar_type> low, ArgonHalf<scalar_type> high) {
ace static Argon<ScalarType> Combine(ArgonHalf<ScalarType> low, ArgonHalf<ScalarType> high) {
return simd::combine(low, high);
}

ace Argon<scalar_type> Reverse() {
ace Argon<ScalarType> Reverse() {
auto rev_half = this->Reverse64bit();
return Combine(rev_half.GetHigh(), rev_half.GetLow());
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplyAddLong(ArgonHalf<U> b, ArgonHalf<U> c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplyAddLong(ArgonHalf<U> b, ArgonHalf<U> c) {
return simd::multiply_add_long(this->vec_, b, c);
}
template <typename U, typename C>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<C, simd::Vec64_t<argon::helpers::NextSmaller_t<scalar_type>>>
ace Argon<scalar_type> MultiplyAddLong(ArgonHalf<U> b, C c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<C, simd::Vec64_t<argon::helpers::NextSmaller_t<ScalarType>>>
ace Argon<ScalarType> MultiplyAddLong(ArgonHalf<U> b, C c) {
return simd::multiply_add_long(this->vec_, b, c);
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplyAddLong(ArgonHalf<U> b, U c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplyAddLong(ArgonHalf<U> b, U c) {
return simd::multiply_add_long(this->vec_, b, c);
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplyAddLong(ArgonHalf<U> b, typename ArgonHalf<U>::lane_type c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplyAddLong(ArgonHalf<U> b, typename ArgonHalf<U>::lane_type c) {
return simd::multiply_add_long(this->vec_, b, c.vec(), c.lane());
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplySubtractLong(ArgonHalf<U> b, ArgonHalf<U> c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplySubtractLong(ArgonHalf<U> b, ArgonHalf<U> c) {
return simd::multiply_subtract_long(this->vec_, b, c);
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplySubtractLong(ArgonHalf<U> b, U c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplySubtractLong(ArgonHalf<U> b, U c) {
return simd::multiply_subtract_long(this->vec_, b, c);
}

template <typename U>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<scalar_type>::type>
ace Argon<scalar_type> MultiplySubtractLong(ArgonHalf<U> b, typename ArgonHalf<U>::lane_type c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<U, typename argon::helpers::NextSmaller<ScalarType>::type>
ace Argon<ScalarType> MultiplySubtractLong(ArgonHalf<U> b, typename ArgonHalf<U>::lane_type c) {
return simd::multiply_subtract_long(this->vec_, b, c.vec(), c.lane());
}

ace auto AddNarrow(Argon<scalar_type> b) const
requires argon::helpers::has_smaller_v<scalar_type>
ace auto AddNarrow(Argon<ScalarType> b) const
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::add_narrow(this->vec_, b);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

ace auto AddRoundNarrow(Argon<scalar_type> b) const
requires argon::helpers::has_smaller_v<scalar_type>
ace auto AddRoundNarrow(Argon<ScalarType> b) const
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::add_round_narrow(this->vec_, b);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

ace auto SubtractNarrow(Argon<scalar_type> b) const
requires argon::helpers::has_smaller_v<scalar_type>
ace auto SubtractNarrow(Argon<ScalarType> b) const
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::subtract_narrow(this->vec_, b);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

ace auto SubtractRoundNarrow(Argon<scalar_type> b) const
requires argon::helpers::has_smaller_v<scalar_type>
ace auto SubtractRoundNarrow(Argon<ScalarType> b) const
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::subtract_round_narrow(this->vec_, b);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

template <size_t n>
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
ace auto ShiftRightNarrow() const {
auto result = simd::shift_right_narrow<n>(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

template <size_t n>
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
ace auto ShiftRightSaturateNarrow() const {
auto result = simd::shift_right_saturate_narrow<n>(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

template <size_t n>
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
ace auto ShiftRightRoundSaturateNarrow() const {
auto result = simd::shift_right_round_saturate_narrow<n>(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

template <size_t n>
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
ace auto ShiftRightRoundNarrow() const {
auto result = simd::shift_right_round_narrow<n>(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

ace auto Narrow() const
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::move_narrow(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

ace auto SaturateNarrow() const
requires argon::helpers::has_smaller_v<scalar_type>
requires argon::helpers::has_smaller_v<ScalarType>
{
auto result = simd::move_saturate_narrow(this->vec_);
return argon::helpers::ArgonFor_t<decltype(result)>{result};
}

template <typename NextSmallerType>
requires argon::helpers::has_smaller_v<scalar_type> &&
std::is_same_v<NextSmallerType, argon::helpers::NextSmaller_t<scalar_type>>
ace Argon<scalar_type> MultiplyDoubleAddSaturateLong(ArgonHalf<NextSmallerType> b, ArgonHalf<NextSmallerType> c) {
requires argon::helpers::has_smaller_v<ScalarType> &&
std::is_same_v<NextSmallerType, argon::helpers::NextSmaller_t<ScalarType>>
ace Argon<ScalarType> MultiplyDoubleAddSaturateLong(ArgonHalf<NextSmallerType> b, ArgonHalf<NextSmallerType> c) {
return neon::multiply_double_add_saturate_long(this->vec_, b, c);
}

ace ArgonHalf<scalar_type> GetHigh() const { return simd::get_high(this->vec_); }
ace ArgonHalf<scalar_type> GetLow() const { return simd::get_low(this->vec_); }
ace ArgonHalf<ScalarType> GetHigh() const { return simd::get_high(this->vec_); }
ace ArgonHalf<ScalarType> GetLow() const { return simd::get_low(this->vec_); }
#endif

template <typename U>
Expand All @@ -201,13 +206,13 @@ class Argon : public argon::Vector<simd::Vec128_t<scalar_type>> {
}
}

ace Argon<scalar_type> Reverse() const {
Argon<scalar_type> rev = this->Reverse64bit(); // rev within dword
return Argon{rev.GetHigh(), rev.GetLow()}; // swap dwords
ace Argon<ScalarType> Reverse() const {
Argon<ScalarType> rev = this->Reverse64bit(); // rev within dword
return Argon{rev.GetHigh(), rev.GetLow()}; // swap dwords
}

template <typename CommutableOpType>
scalar_type Reduce(CommutableOpType op) {
ScalarType Reduce(CommutableOpType op) {
auto rev = this->SwapDoublewords();
auto sum = op(*this, rev);
if constexpr (lanes == 16) {
Expand All @@ -222,23 +227,23 @@ class Argon : public argon::Vector<simd::Vec128_t<scalar_type>> {
return sum[0];
}

scalar_type ReduceAdd() {
ScalarType ReduceAdd() {
#ifdef __aarch64__
return simd::reduce_add(this->vec_);
#else
return this->Reduce([](auto a, auto b) { return a + b; });
#endif
}

scalar_type ReduceMax() {
ScalarType ReduceMax() {
#ifdef __aarch64__
return simd::reduce_max(this->vec_);
#else
return this->Reduce([](auto a, auto b) { return std::max(a, b); });
#endif
}

scalar_type ReduceMin() {
ScalarType ReduceMin() {
#ifdef __aarch64__
return simd::reduce_min(this->vec_);
#else
Expand All @@ -248,7 +253,7 @@ class Argon : public argon::Vector<simd::Vec128_t<scalar_type>> {
}

#ifndef ARGON_PLATFORM_MVE
ace Argon<scalar_type> SwapDoublewords() { return Combine(GetHigh(), GetLow()); }
ace Argon<ScalarType> SwapDoublewords() { return Combine(GetHigh(), GetLow()); }
#endif
};

Expand All @@ -257,6 +262,14 @@ template <typename... arg_types>
// Argon(arg_types...) -> Argon<arg_types...[0]>;
Argon(arg_types...) -> Argon<std::tuple_element_t<0, std::tuple<arg_types...>>>;

#ifndef ARGON_PLATFORM_MVE
template <typename VectorType>
Argon(argon::Lane<VectorType>) -> Argon<simd::Scalar_t<VectorType>>;

template <typename VectorType>
Argon(argon::ConstLane<0, VectorType>) -> Argon<simd::Scalar_t<VectorType>>;
#endif

template <typename ScalarType>
requires std::is_scalar_v<ScalarType>
Argon(ScalarType) -> Argon<ScalarType>;
Expand Down
19 changes: 10 additions & 9 deletions include/argon/lane.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once
#include "arm_simd.hpp"
#include "arm_simd/helpers/scalar.hpp"
#include "arm_simd/helpers/vec64.hpp"
#include "features.h"
#include "helpers/argon_for.hpp"

Expand Down Expand Up @@ -64,17 +65,17 @@ class ConstLane {
ace scalar_type Get() const { return simd::get_lane<LaneIndex>(vec_); }

#if __ARM_ARCH >= 8
ace VectorType& vec() { return vec_; }
ace VectorType vec() { return vec_; }
ace const int lane() { return LaneIndex; }
#else
ace VectorType& vec() {
ace neon::Vec64_t<scalar_type> vec() {
if constexpr (simd::is_doubleword_v<VectorType>) {
return vec_;
} else if constexpr (simd::is_quadword_v<VectorType>) {
if (LaneIndex >= ArgonHalf<scalar_type>::lanes) {
return simd::get_high(vec());
if constexpr (LaneIndex >= ArgonHalf<scalar_type>::lanes) {
return simd::get_high(vec_);
} else {
return simd::get_low(vec());
return simd::get_low(vec_);
}
}
}
Expand Down Expand Up @@ -118,17 +119,17 @@ class Lane {
ace operator scalar_type() const { return Get(); }

#if __ARM_ARCH >= 8
ace VectorType& vec() { return vec_; }
ace VectorType vec() { return vec_; }
ace const int lane() { return lane_; }
#else
ace VectorType& vec() {
ace neon::Vec64_t<scalar_type> vec() {
if constexpr (simd::is_doubleword_v<VectorType>) {
return vec_;
} else if constexpr (simd::is_quadword_v<VectorType>) {
if (lane_ >= ArgonHalf<scalar_type>::lanes) {
return simd::get_high(vec());
return simd::get_high(vec_);
} else {
return simd::get_low(vec());
return simd::get_low(vec_);
}
}
}
Expand Down
Loading