Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/argon/argon_full.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#endif

template <typename ScalarType>
requires std::is_arithmetic_v<ScalarType>
class Argon : public argon::Vector<simd::Vec128_t<ScalarType>> {
using T = argon::Vector<simd::Vec128_t<ScalarType>>;

Expand Down
2 changes: 2 additions & 0 deletions include/argon/helpers/argon_for.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

template <typename T>
class ArgonHalf;

template <typename T>
requires std::is_arithmetic_v<T>
class Argon;

namespace argon::helpers {
Expand Down
32 changes: 16 additions & 16 deletions include/argon/store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ ace void store(scalar_type* ptr, intrinsic_types... vectors) {
using intrinsic_type = typename std::tuple_element_t<0, std::tuple<intrinsic_types...>>;

constexpr size_t size = sizeof...(vectors);
const std::array<intrinsic_type, sizeof...(vectors)> vec_array = {vectors...};
constexpr std::array<intrinsic_type, size> vec_array = {std::move(vectors)...};

// Best case scenerio: we know both length and stride
static_assert(0 < stride && stride < 5, "Stores can only be performed with a stride of 1, 2, 3, or 4");
Expand All @@ -119,26 +119,26 @@ ace void store(scalar_type* ptr, intrinsic_types... vectors) {

if constexpr (stride == 1) {
constexpr size_t tail_size = size % 4;
#pragma unroll
for (auto v : vec_array | std::views::chunk(4)) {
if (v.size() == 4) { // 4-element chunks
constexpr size_t head_size = size - tail_size;
size_t i = 0;
if constexpr (head_size > 0) {
for (; i < head_size; i += 4) {
using multi_type = simd::MultiVector_t<intrinsic_type, 4>;
simd::store1_x4(ptr, *(multi_type*)v.begin());
simd::store1_x4(ptr, *(multi_type*)&vec_array[i]);
ptr += (sizeof(intrinsic_type) / sizeof(*ptr)) * 4; // increment output pointer
} else {
if constexpr (tail_size == 1) { // 1-element tail
simd::store1(ptr, v.begin());
} else if constexpr (tail_size == 2) {
using tail_multi_type = simd::MultiVector_t<intrinsic_type, 2>;
simd::store1_x2(ptr, *(tail_multi_type*)v.begin());
} else if constexpr (tail_size == 3) {
using tail_multi_type = simd::MultiVector_t<intrinsic_type, 3>;
simd::store1_x3(ptr, *(tail_multi_type*)v.begin());
}
}
}
if constexpr (tail_size == 1) { // 1-element tail
simd::store1(ptr, &vec_array[i]);
} else if constexpr (tail_size == 2) {
using tail_multi_type = simd::MultiVector_t<intrinsic_type, 2>;
simd::store1_x2(ptr, *(tail_multi_type*)&vec_array[i]);
} else if constexpr (tail_size == 3) {
using tail_multi_type = simd::MultiVector_t<intrinsic_type, 3>;
simd::store1_x3(ptr, *(tail_multi_type*)&vec_array[i]);
}
} else {
#pragma unroll
#pragma GCC unroll size
for (auto v : vec_array | std::views::chunk(stride)) {
if constexpr (stride == 2) {
store_interleaved<2>(ptr, v.begin());
Expand Down
3 changes: 2 additions & 1 deletion include/argon/vectorize/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ struct load : std::ranges::view_interface<load<ScalarType>> {

/// @brief Construct a load from a span
/// @param span The span to load data from.
load(const std::span<ScalarType> span) : start_{span.data()}, size_{vectorizeable_size(span.size()) / lanes} {}
load(const std::span<ScalarType> span)
: start_{span.data()}, size_{helpers::vectorizeable_size<ScalarType>(span.size()) / lanes} {}

private:
const ScalarType* start_;
Expand Down
9 changes: 5 additions & 4 deletions include/argon/vectorize/load_interleaved.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct load_interleaved : std::ranges::view_interface<load_interleaved<ScalarTyp
using value_type = std::array<Argon<ScalarType>, Stride>;

LoadInterleavedIterator() = default;
LoadInterleavedIterator(ScalarType* ptr) : ptr_{ptr} {}
LoadInterleavedIterator(const ScalarType* ptr) : ptr_{ptr} {}

std::array<Argon<ScalarType>, Stride> operator*() const {
return Argon<ScalarType>::template LoadInterleaved<Stride>(ptr_);
Expand Down Expand Up @@ -96,23 +96,24 @@ struct load_interleaved : std::ranges::view_interface<load_interleaved<ScalarTyp
friend LoadInterleavedIterator operator+(const int n, const LoadInterleavedIterator& it) { return it + n; }

private:
ScalarType* ptr_;
const ScalarType* ptr_;
};
static_assert(std::sized_sentinel_for<LoadInterleavedIterator, LoadInterleavedIterator>);
static_assert(std::bidirectional_iterator<LoadInterleavedIterator>);
static_assert(std::input_iterator<LoadInterleavedIterator>);

using iterator = LoadInterleavedIterator;
using sentinel = const ScalarType*;

iterator begin() { return start_; }
ScalarType* end() { return start_ + size_; }
const ScalarType* end() { return start_ + size_; }
size_t size() const { return size_ / (lanes * Stride); }

template <std::ranges::contiguous_range R>
load_interleaved(R&& r) : start_{&*std::ranges::begin(r)}, size_{vectorizeable_size(std::ranges::size(r))} {}

private:
ScalarType* start_;
const ScalarType* start_;
size_t size_;
};

Expand Down
4 changes: 2 additions & 2 deletions test/specs/vectorize/load_interleaved_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ auto vectorize_load_interleaved = describe("vectorize_load_interleaved", ${
using element_type = int16_t;
std::array<element_type, 8> vals;
auto vec = argon::vectorize::load_interleaved(vals);
expect(std::is_same_v<decltype(vec.begin()), typename argon::vectorize::load_interleaved<element_type, 2>::iterator>).to_be_true();
expect(std::is_same_v<decltype(vec.begin()), std::ranges::iterator_t<argon::vectorize::load_interleaved<element_type, 2>>>).to_be_true();
});

it("returns an end sentinel pointer when end() is called", _{
using element_type = int16_t;
std::array<element_type, 8> vals;
auto vec = argon::vectorize::load_interleaved(vals);
expect(std::is_same_v<decltype(vec.end()), element_type*>).to_be_true();
expect(std::is_same_v<decltype(vec.end()), std::ranges::sentinel_t<argon::vectorize::load_interleaved<element_type, 2>>>).to_be_true();
});

it("can access all elements of vals", _{
Expand Down
4 changes: 2 additions & 2 deletions test/specs/vectorize/load_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ auto vectorize_load = describe("vectorize_load", ${
using element_type = int16_t;
std::array<element_type, 8> vals;
auto vec = argon::vectorize::load(vals);
expect(std::is_same_v<decltype(vec.begin()), typename argon::vectorize::load<element_type>::iterator>).to_be_true();
expect(std::is_same_v<decltype(vec.begin()), std::ranges::iterator_t<argon::vectorize::load<element_type>>>).to_be_true();
});

it("returns an end sentinel pointer when end() is called", _{
using element_type = int16_t;
std::array<element_type, 8> vals;
auto vec = argon::vectorize::load(vals);
expect(std::is_same_v<decltype(vec.end()), typename argon::vectorize::load<element_type>::sentinel>).to_be_true();
expect(std::is_same_v<decltype(vec.end()), std::ranges::sentinel_t<argon::vectorize::load<element_type>>>).to_be_true();
});

it("can access all elements of vals", _{
Expand Down