diff --git a/libs/libpll b/libs/libpll index fb40c17..1ac5ceb 160000 --- a/libs/libpll +++ b/libs/libpll @@ -1 +1 @@ -Subproject commit fb40c1766ed24039969ae2367ba007c58e0ee2c7 +Subproject commit 1ac5cebae551b95bbe903f5be0014c1ecada4a58 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2ebf181..23f2408 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,7 +8,7 @@ endif() set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined -Wl,dynamic_lookup") set(PLLMOD_INCLUDE_PATH ${PLLMODULES_LIBPLL_PATH}/src/) -set(PLLMODULES_COMPONENTS "optimize;algorithm;binary;msa;tree;util" CACHE STRING "pll-modules components to build") +set(PLLMODULES_COMPONENTS "optimize;algorithm;binary;msa;tree;util;dks" CACHE STRING "pll-modules components to build") foreach(module ${PLLMODULES_COMPONENTS}) MESSAGE(STATUS "Will compile pll-module ${module}") diff --git a/src/dks/CMakeLists.txt b/src/dks/CMakeLists.txt new file mode 100644 index 0000000..ef6556e --- /dev/null +++ b/src/dks/CMakeLists.txt @@ -0,0 +1,5 @@ +file(GLOB DKS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +add_pllmodules_lib(pllmoddks "${DKS_SOURCES}") +target_include_directories(pllmoddks_obj PRIVATE ${PLLMOD_INCLUDE_PATH} + ${CMAKE_CURRENT_SOURCE_DIR}/../msa + ) diff --git a/src/dks/benchmark.cpp b/src/dks/benchmark.cpp new file mode 100644 index 0000000..913acbe --- /dev/null +++ b/src/dks/benchmark.cpp @@ -0,0 +1,162 @@ +#include "benchmark.h" +#include "partition.h" +#include +#include +#include +#include + +namespace dks { + +msa_t convert_pll_msa_t(const pll_msa_t *pll_msa) { + msa_t msa; + msa.reserve(pll_msa->count); + for (int i = 0; i < pll_msa->count; i++) { + msa.emplace_back(pll_msa->length); + for (int j = 0; j < pll_msa->length; j++) { + msa[i][j] = pll_msa->sequence[i][j]; + } + } + return msa; +} + +inline benchmark_time_t weight_kernel_times(kernel_weight_t kw, + benchmark_result_t bmr) { + return kw[test_kernel_t::partial] * bmr[test_kernel_t::partial] + + kw[test_kernel_t::likelihood] * bmr[test_kernel_t::likelihood] + + kw[test_kernel_t::derivative] * bmr[test_kernel_t::derivative] + + kw[test_kernel_t::pmatrix] * bmr[test_kernel_t::pmatrix]; +} + +inline attributes_t best_attrib_time(const attributes_time_t &at) { + return std::max_element(at.begin(), at.end(), + [](const attributes_time_t::value_type &a, + const attributes_time_t::value_type &b) { + return a.second > b.second; + }) + ->first; +} + +kernel_weight_t suggest_weights(double sites, double states, double taxa) { + kernel_weight_t kw; + + kw[test_kernel_t::partial] = + 0.4866 * sites + 437.1470 * states + 6.5094 * taxa - 3557.8645; + kw[test_kernel_t::likelihood] = + 0.327 * sites + 28.952 * states + 1.147 * taxa + -43.042; + kw[test_kernel_t::derivative] = + 0.2174 * sites + 26.1509 * states + 0.7898 * taxa + -108.6298; + kw[test_kernel_t::pmatrix] = + 3.221e-03 * sites + 2.672e+01 * states + 7.741e-01 * taxa + -2.195e+02; + + for (auto &kv : kw) { + kv.second = kv.second < 0.0 ? 0.1 : kv.second; + } + return kw; +} + +kernel_weight_t suggest_weights(const msa_t &msa, unsigned int states) { + return suggest_weights(msa.size(), msa[0].size(), states); +} + +kernel_weight_t suggest_weights(const pllmod_msa_stats_t *msa, int sites, + int taxa) { + return suggest_weights(sites, taxa, msa->states); +} + +attributes_time_t +select_kernel_verbose(const model_t &model, + const std::vector> &clvs, + const msa_weight_t &weights, const kernel_weight_t &kw, + attributes_generator_t att_gen) { + attributes_time_t times; + for (attributes_t attribs = att_gen.next(); attribs != att_gen.end(); + attribs = att_gen.next()) { + + test_case_t tc(attribs); + times[attribs] = + weight_kernel_times(kw, tc.benchmark(clvs, weights, model)); + } + return times; +} + +attributes_time_t select_kernel_verbose(const model_t &model, const msa_t &msa, + const msa_weight_t &weights, + const pll_state_t *charmap, + const kernel_weight_t &kw, + attributes_generator_t att_gen) { + attributes_time_t times; + for (attributes_t attribs = att_gen.next(); attribs != att_gen.end(); + attribs = att_gen.next()) { + + test_case_t tc(attribs, charmap); + times[attribs] = weight_kernel_times(kw, tc.benchmark(msa, weights, model)); + } + return times; +} + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap, + const kernel_weight_t &kw, + attributes_generator_t gen) { + auto msa = convert_pll_msa_t(pll_msa); + model_t model{pll_partition}; + msa_weight_t weights(pll_partition->pattern_weights, + pll_partition->pattern_weights + pll_msa->length); + return best_attrib_time( + select_kernel_verbose(model, msa, weights, charmap, kw, gen)) + .pll_attributes(); +} + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap) { + auto kw = + suggest_weights(pll_msa->length, pll_msa->count, pll_partition->states); + attributes_generator_t gen; + return select_kernel_auto(pll_partition, pll_msa, charmap, kw, gen); +} + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap, + attributes_generator_t gen) { + auto kw = + suggest_weights(pll_msa->length, pll_msa->count, pll_partition->states); + return select_kernel_auto(pll_partition, pll_msa, charmap, kw, gen); +} + +unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights, + const pll_state_t *charmap, unsigned int states, + unsigned int rate_cats, + attributes_generator_t gen) { + auto kw = suggest_weights(weights.size(), msa.size(), states); + model_t model{msa, states, rate_cats}; + auto result = select_kernel_verbose(model, msa, weights, charmap, kw, gen); + return best_attrib_time(result).pll_attributes(); +} + +unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights, + const pll_state_t *charmap, unsigned int states, + unsigned int rate_cats) { + attributes_generator_t gen; + return select_kernel_auto(msa, weights, charmap, states, rate_cats, gen); +} + +unsigned int select_kernel_auto(const std::vector> &clvs, + const msa_weight_t &weights, + unsigned int states, unsigned int rate_cats, + attributes_generator_t gen) { + auto kw = suggest_weights(weights.size(), clvs.size(), states); + model_t model{clvs.size(), states, rate_cats}; + auto result = select_kernel_verbose(model, clvs, weights, kw, gen); + return best_attrib_time(result).pll_attributes(); +} + +unsigned int select_kernel_auto(const std::vector> &clvs, + const msa_weight_t &weights, + unsigned int states, unsigned int rate_cats) { + attributes_generator_t gen; + return select_kernel_auto(clvs, weights, states, rate_cats, gen); +} +} // namespace dks diff --git a/src/dks/benchmark.h b/src/dks/benchmark.h new file mode 100644 index 0000000..701f995 --- /dev/null +++ b/src/dks/benchmark.h @@ -0,0 +1,69 @@ +#ifndef DKS_BENCHMARK_H_ +#define DKS_BENCHMARK_H_ +#include "pll_msa.h" +#include "test_case.h" +#include + +namespace dks { +typedef std::unordered_map kernel_weight_t; +typedef std::unordered_map attributes_time_t; + +kernel_weight_t suggest_weights(const msa_t &msa); +kernel_weight_t suggest_weights(const pllmod_msa_stats_t *msa, int sites, + int taxa); + +attributes_time_t select_kernel_verbose(const model_t &model, const msa_t &msa, + const msa_weight_t &weights, + const pll_state_t *charmap, + const kernel_weight_t &kw, + attributes_generator_t att_gen); + +attributes_time_t +select_kernel_verbose(const model_t &model, + const std::vector> &clvs, + const msa_weight_t &weights, const kernel_weight_t &kw, + attributes_generator_t att_gen); + +attributes_t select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa); + +unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights, + const pll_state_t *charmap, unsigned int states, + unsigned int rate_cats, + attributes_generator_t gen); + +unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights, + const pll_state_t *charmap, unsigned int states, + unsigned int rate_cats); + +unsigned int select_kernel_auto(const std::vector> &clvs, + const msa_weight_t &weights, + unsigned int states, unsigned int rate_cats, + attributes_generator_t gen); + +unsigned int select_kernel_auto(const std::vector> &clvs, + const msa_weight_t &weights, + unsigned int states, unsigned int rate_cats); + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap, + const msa_weight_t &weights, + attributes_generator_t gen); + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap); + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap, + attributes_generator_t gen); + +unsigned int select_kernel_auto(const pll_partition_t *pll_partition, + const pll_msa_t *pll_msa, + const pll_state_t *charmap, + const msa_weight_t &weights); + +} // namespace dks +#endif// DKS_BENCHMARK_H_ diff --git a/src/dks/dks.h b/src/dks/dks.h new file mode 100644 index 0000000..3a81feb --- /dev/null +++ b/src/dks/dks.h @@ -0,0 +1,6 @@ +#ifndef DKS_H_ +#define DKS_H_ + +#include "benchmark.h" + +#endif diff --git a/src/dks/model.cpp b/src/dks/model.cpp new file mode 100644 index 0000000..cea5f46 --- /dev/null +++ b/src/dks/model.cpp @@ -0,0 +1,37 @@ +#include "model.h" +#include + +namespace dks { + +unsigned int model_t::submodels() const { return 1; } + +unsigned int model_t::rate_categories() const { return _rate_categories; } + +uint64_t model_t::states() const { return _states; } + +const double *model_t::subst_params_raw() const { return _subst_params.data(); } + +const std::vector &model_t::subst_params() const { + return _subst_params; +} + +const double *model_t::frequencies_raw() const { return _frequencies.data(); } + +const std::vector &model_t::frequencies() const { return _frequencies; } + +const tree_t &model_t::tree() const { return _tree; } + +std::vector model_t::make_operations() const { + auto traversal_nodes = _tree.full_traverse(); + std::vector operations(traversal_nodes.size()); + unsigned int operations_count = 0; + pll_utree_create_operations(traversal_nodes.data(), traversal_nodes.size(), + nullptr, nullptr, operations.data(), nullptr, + &operations_count); + + operations.resize(operations_count); + return operations; +} + +void model_t::reset_tree() { _tree = tree_t(_tree.tip_count()); } +} // namespace dks diff --git a/src/dks/model.h b/src/dks/model.h new file mode 100644 index 0000000..71b1598 --- /dev/null +++ b/src/dks/model.h @@ -0,0 +1,72 @@ +#ifndef DKS_MODEL_H_ +#define DKS_MODEL_H_ +#include "tree.h" +#include +#include +#include + +namespace dks { +typedef std::vector> msa_t; +typedef std::vector msa_weight_t; +class model_t { +public: + model_t(const msa_t &msa, unsigned int states) : model_t{msa, states, 0} {}; + + model_t(size_t tip_count, unsigned int states) + : model_t{tip_count, states, 0} {}; + + model_t(const msa_t &msa, unsigned int states, uint64_t seed) + : _tree{msa.size(), seed}, _states{states}, _rate_categories{1}, + _prop_invar{0.0}, _subst_params((_states - 1) * (_states - 2), 1.0), + _frequencies(_states, 1.0 / _states){}; + + model_t(size_t tip_count, unsigned int states, uint64_t seed) + : _tree{tip_count, seed}, _states{states}, _rate_categories{1}, + _prop_invar{0.0}, _subst_params{6, 1.0}, _frequencies{4, .25} {}; + + model_t(const pll_partition_t *pll_partition) + : model_t(pll_partition->tips, pll_partition->states, 0, + pll_partition->rate_cats, pll_partition->subst_params, + pll_partition->frequencies, *(pll_partition->prop_invar)){}; + + model_t(size_t tip_count, unsigned int states, size_t model_index, + unsigned int rate_categories, double **subst_params, + double **frequencies, double pinv) + : _tree{tree_t(tip_count)}, _states{states}, + _rate_categories{rate_categories}, _prop_invar{pinv}, + _subst_params{subst_params[model_index], + subst_params[model_index] + + (_states - 1) * (_states - 2)}, + _frequencies{frequencies[model_index], + frequencies[model_index] + _states} {}; + + model_t(size_t tip_count, unsigned int states, unsigned int rate_categories, + unsigned int seed) + : _tree{tip_count, seed}, _states{states}, + _rate_categories{rate_categories}, _prop_invar{0.0}, + _subst_params((_states - 1) * (_states - 2) / 2, 1.0), + _frequencies(_states, 1.0 / _states) {} + + unsigned int submodels() const; + unsigned int rate_categories() const; + uint64_t states() const; + const double *subst_params_raw() const; + const std::vector &subst_params() const; + const double *frequencies_raw() const; + const std::vector &frequencies() const; + const tree_t &tree() const; + + void reset_tree(); + + std::vector make_operations() const; + +private: + tree_t _tree; + unsigned int _states; + unsigned int _rate_categories; + double _prop_invar; + std::vector _subst_params; + std::vector _frequencies; +}; +} // namespace dks +#endif diff --git a/src/dks/partition.cpp b/src/dks/partition.cpp new file mode 100644 index 0000000..fd750a4 --- /dev/null +++ b/src/dks/partition.cpp @@ -0,0 +1,200 @@ +#include "partition.h" +#include + +namespace dks { + +constexpr unsigned int partition_t::_params_indices[]; +constexpr double partition_t::_rate_cats[]; + +partition_t::partition_t(const msa_t &msa, const model_t &model, + const msa_weight_t &weights, + const pll_state_t *charmap, unsigned int attributes) { + + unsigned int tip_count = msa.size(); + unsigned int inner_count = tip_count - 2; + + _partition = pll_partition_create(tip_count, // tips + inner_count, // clv_buffers + model.states(), // states + msa[0].size(), // sites + model.submodels(), // rate_matrices + 2 * tip_count - 3, // prob_matrices + model.rate_categories(), // rate_cats + inner_count, // scale_buffes + attributes // attributes + ); + initialize_tips(msa, charmap); + initialize_rates(model); + set_pattern_weights(weights); + update_probability_matrices(model.tree()); + alloc_sumtable(attributes); +} + +partition_t::partition_t(const std::vector> &clvs, + const model_t &model, const msa_weight_t &weights, + unsigned int attributes) { + + unsigned int tip_count = clvs.size(); + unsigned int inner_count = tip_count - 2; + + _partition = pll_partition_create(tip_count, // tips + inner_count, // clv_buffers + model.states(), // states + clvs[0].size(), // sites + model.submodels(), // rate_matrices + 2 * tip_count - 3, // prob_matrices + model.rate_categories(), // rate_cats + inner_count, // scale_buffes + attributes // attributes + ); + initialize_tips(clvs); + initialize_rates(model); + set_pattern_weights(weights); + update_probability_matrices(model.tree()); + alloc_sumtable(attributes); +} + +void partition_t::alloc_sumtable(unsigned int attribs) { + unsigned int align = PLL_ALIGNMENT_CPU; + switch (attribs & PLL_ATTRIB_ARCH_MASK) { + case PLL_ATTRIB_ARCH_SSE: + align = PLL_ALIGNMENT_SSE; + break; + case PLL_ATTRIB_ARCH_AVX: + case PLL_ATTRIB_ARCH_AVX2: + case PLL_ATTRIB_ARCH_AVX512: + align = PLL_ALIGNMENT_AVX; + break; + default: + break; + } + _sumtable = (double *)pll_aligned_alloc( + _partition->sites * _partition->rate_cats * _partition->states_padded * + sizeof(double), + align); +} + +void partition_t::initialize_tips(const msa_t &msa, + const pll_state_t *charmap) { + for (size_t tip_id = 0; tip_id < msa.size(); tip_id++) { + pll_set_tip_states(_partition, tip_id, charmap, msa[tip_id].data()); + } +} + +void partition_t::initialize_tips( + const std::vector> &clvs) { + for (size_t tip_id = 0; tip_id < clvs.size(); tip_id++) { + pll_set_tip_clv(_partition, tip_id, clvs[tip_id].data(), PLL_FALSE); + } +} + +void partition_t::initialize_rates(const model_t &model) { + for (size_t i = 0; i < model.submodels(); i++) { + pll_set_subst_params(_partition, i, model.subst_params_raw()); + pll_set_frequencies(_partition, i, model.frequencies_raw()); + } + pll_set_category_rates(_partition, _rate_cats); +} + +void partition_t::update_probability_matrices(const tree_t &tree) { + pll_update_prob_matrices( + _partition, _params_indices, tree.matrix_indices().data(), + tree.branch_lengths().data(), tree.branch_lengths().size()); +} + +void partition_t::update_partials(const std::vector &ops) { + pll_update_partials(_partition, ops.data(), ops.size()); +} + +void partition_t::update_site_repeats(const tree_t &tree) { + update_site_repeats(tree.make_operations()); +} + +void partition_t::update_site_repeats(const model_t &model) { + update_site_repeats(model.make_operations()); +} + +void partition_t::update_site_repeats(const std::vector &ops) { + for (size_t i = 0; i < ops.size(); i++) { + pll_update_repeats(_partition, &ops[i]); + } +} + +void partition_t::set_pattern_weights(const msa_weight_t &weights) { + pll_set_pattern_weights(_partition, weights.data()); +} + +void partition_t::update_partials(const tree_t &tree) { + update_partials(tree.make_operations()); +} + +void partition_t::update_partials(const model_t &model) { + update_partials(model.tree()); +} + +void partition_t::invalidate_prob_matrix() { + *_partition->eigen_decomp_valid = 0; +} + +double partition_t::loglh(const tree_t &tree) { + pll_unode_t *parent = tree.vroot(); + pll_unode_t *child = parent->back; + return pll_compute_edge_loglikelihood( + _partition, parent->clv_index, parent->scaler_index, child->clv_index, + child->scaler_index, parent->pmatrix_index, _params_indices, nullptr); +} + +double partition_t::loglh(const model_t &model) { return loglh(model.tree()); } + +std::vector partition_t::loglh_persite(const model_t &model, + size_t sites) { + pll_unode_t *parent = model.tree().vroot(); + pll_unode_t *child = parent->back; + + std::vector persites(sites, 0.0); + + pll_compute_edge_loglikelihood(_partition, parent->clv_index, + parent->scaler_index, child->clv_index, + child->scaler_index, parent->pmatrix_index, + _params_indices, persites.data()); + return persites; +} + +unsigned int partition_t::attributes() const { return _partition->attributes; } + +void partition_t::update_sumtable(const tree_t &tree) { + pll_unode_t *parent = tree.vroot(); + pll_unode_t *child = parent->back; + if (site_repeats()) { + update_partials(tree); + } + + pll_update_sumtable(_partition, parent->clv_index, child->clv_index, + parent->scaler_index, child->scaler_index, + _params_indices, _sumtable); +} + +derivative_t partition_t::compute_derivative(const tree_t &tree, double brlen) { + pll_unode_t *parent = tree.vroot(); + pll_unode_t *child = parent->back; + + derivative_t d; + pll_compute_likelihood_derivatives( + _partition, parent->scaler_index, child->scaler_index, brlen, + _params_indices, _sumtable, &d.first, &d.second); + + return d; +} + +bool partition_t::site_repeats() const { + return attributes() & PLL_ATTRIB_SITE_REPEATS; +} + +partition_t::~partition_t() { + pll_partition_destroy(_partition); + pll_aligned_free(_sumtable); +} + +const pll_partition_t *partition_t::partition_raw() const { return _partition; } + +} // namespace dks diff --git a/src/dks/partition.h b/src/dks/partition.h new file mode 100644 index 0000000..a663a2f --- /dev/null +++ b/src/dks/partition.h @@ -0,0 +1,56 @@ +#ifndef DKS_PARTITION_H_ +#define DKS_PARTITION_H_ +#include "model.h" +#include +#include +#include + +namespace dks { +typedef std::pair derivative_t; +class partition_t { +public: + partition_t(const msa_t &, const model_t &, const msa_weight_t &, + const pll_state_t *, unsigned int); + partition_t(const std::vector> &, const model_t &, + const msa_weight_t &, unsigned int); + ~partition_t(); + + void initialize_tips(const msa_t &, const pll_state_t *); + void initialize_tips(const std::vector> &); + + void initialize_rates(const model_t &model); + void set_pattern_weights(const msa_weight_t &); + void update_probability_matrices(const tree_t &tree); + + void update_partials(const std::vector &); + void update_partials(const tree_t &tree); + void update_partials(const model_t &model); + + void update_site_repeats(const std::vector &); + void update_site_repeats(const tree_t &tree); + void update_site_repeats(const model_t &model); + + void invalidate_prob_matrix(); + + void update_sumtable(const tree_t &tree); + derivative_t compute_derivative(const tree_t &tree, double brlen = 1.0); + + double loglh(const tree_t &tree); + double loglh(const model_t &model); + std::vector loglh_persite(const model_t &model, size_t sites); + + bool site_repeats() const; + unsigned int attributes() const; + + const pll_partition_t *partition_raw() const; + +private: + void alloc_sumtable(unsigned int attribs); + + pll_partition_t *_partition; + double *_sumtable; + constexpr static unsigned int _params_indices[] = {0}; + constexpr static double _rate_cats[] = {1.0}; +}; +} // namespace dks +#endif diff --git a/src/dks/test_case.cpp b/src/dks/test_case.cpp new file mode 100644 index 0000000..1d1585e --- /dev/null +++ b/src/dks/test_case.cpp @@ -0,0 +1,222 @@ +#include "partition.h" +#include "test_case.h" +#include +#include +#include +#include + +namespace dks { + +test_cpu_t test_cpu_from_attribs(uint32_t attribs) { + if ((attribs & PLL_ATTRIB_ARCH_MASK) == 0) { + return dks::test_cpu_t::none; + } + return static_cast(32 - + __builtin_clz(attribs & PLL_ATTRIB_ARCH_MASK)); +} + +int attributes_t::cpu_attrib() const { + if (simd == dks::test_cpu_t::avx2) { + return PLL_ATTRIB_ARCH_AVX2; + } + if (simd == dks::test_cpu_t::avx) { + return PLL_ATTRIB_ARCH_AVX; + } + if (simd == dks::test_cpu_t::sse) { + return PLL_ATTRIB_ARCH_SSE; + } + if (simd == dks::test_cpu_t::none) { + return PLL_ATTRIB_ARCH_CPU; + } + if (simd == dks::test_cpu_t::avx512) { + return PLL_ATTRIB_ARCH_AVX512; + } + throw std::runtime_error("Unrecognized CPU type"); +} + +int attributes_t::siterepeat_attrib() const { + return site_repeats ? PLL_ATTRIB_SITE_REPEATS : 0; +} + +int attributes_t::patterntip_attrib() const { + return pattern_tip ? PLL_ATTRIB_PATTERN_TIP : 0; +} + +int attributes_t::pll_attributes() const { + return cpu_attrib() | siterepeat_attrib() | patterntip_attrib(); +} + +attributes_t attributes_generator_t::next() { + while (!valid()) { + _state++; + if (_state > _max) { + return end(); + } + } + attributes_t attrib(_state & PLL_ATTRIB_PATTERN_TIP, + _state & PLL_ATTRIB_SITE_REPEATS, 0, + test_cpu_from_attribs(_state)); + _state++; + return attrib; +} + +attributes_t attributes_generator_t::end() { + return attributes_t(false, false, false, dks::test_cpu_t::invalid); +} + +benchmark_result_t test_case_t::run_benchmarks(partition_t &partition, + const model_t &model) { + benchmark_result_t br; + + br[test_kernel_t::partial] = benchmark_partials(partition, model); + br[test_kernel_t::likelihood] = benchmark_likelihood(partition, model); + br[test_kernel_t::pmatrix] = benchmark_pmatrix(partition, model); + br[test_kernel_t::derivative] = benchmark_derivative(partition, model); + + return br; +} + +benchmark_result_t +test_case_t::benchmark(const std::vector> &clvs, + const msa_weight_t &weights, const model_t &model) { + partition_t partition(clvs, model, weights, attributes()); + + return run_benchmarks(partition, model); +} + +benchmark_result_t test_case_t::benchmark(const msa_t &msa, + const msa_weight_t &weights, + const model_t &model) { + partition_t partition(msa, model, weights, _charmap, attributes()); + + return run_benchmarks(partition, model); +} + +benchmark_time_t test_case_t::benchmark_partials(partition_t &partition, + const model_t &model) { + for (size_t i = 0; i < _trials; i++) { + partition.update_partials(model); + } + auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < _trials; i++) { + partition.update_partials(model); + } + auto t2 = std::chrono::high_resolution_clock::now(); + return (t2 - t1) / _trials; +} + +benchmark_time_t test_case_t::benchmark_likelihood(partition_t &partition, + const model_t &model) { + partition.update_partials(model); + for (size_t i = 0; i < _trials; i++) { + partition.loglh(model); + } + auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < _trials; i++) { + partition.loglh(model); + } + auto t2 = std::chrono::high_resolution_clock::now(); + return (t2 - t1) / _trials; +} + +benchmark_time_t test_case_t::benchmark_pmatrix(partition_t &partition, + const model_t &model) { + for (size_t i = 0; i < _trials; i++) { + partition.update_probability_matrices(model.tree()); + } + auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < _trials; i++) { + partition.update_probability_matrices(model.tree()); + } + auto t2 = std::chrono::high_resolution_clock::now(); + return (t2 - t1) / _trials; +} + +benchmark_time_t test_case_t::benchmark_derivative(partition_t &partition, + const model_t &model) { + for (size_t i = 0; i < _trials; i++) { + partition.update_sumtable(model.tree()); + partition.compute_derivative(model.tree()); + } + auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < _trials; i++) { + partition.update_sumtable(model.tree()); + partition.compute_derivative(model.tree()); + } + auto t2 = std::chrono::high_resolution_clock::now(); + return (t2 - t1) / _trials; +} + +benchmark_time_t +test_case_t::benchmark_update_site_repeats(partition_t &partition, + const model_t &model) { + for (size_t i = 0; i < _trials; i++) { + partition.update_site_repeats(model); + } + auto t1 = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < _trials; i++) { + partition.update_site_repeats(model); + } + auto t2 = std::chrono::high_resolution_clock::now(); + return (t2 - t1) / _trials; +} + +attributes_t test_case_t::attributes_struct() const { + return attributes_t(_pattern_tip, _site_repeats, _rate_scalers, _cpu); +} + +unsigned int test_case_t::attributes() const { + return cpu_attributes() | misc_attributes(); +} + +unsigned int test_case_t::misc_attributes() const { + return _pattern_tip ? PLL_ATTRIB_PATTERN_TIP + : 0 | _site_repeats + ? PLL_ATTRIB_SITE_REPEATS + : 0 | _rate_scalers ? PLL_ATTRIB_RATE_SCALERS : 0; +} + +unsigned int test_case_t::cpu_attributes() const { + if (test_cpu_t::none == _cpu) { + return PLL_ATTRIB_ARCH_CPU; + } + if (test_cpu_t::sse == _cpu) { + return PLL_ATTRIB_ARCH_SSE; + } + if (test_cpu_t::avx == _cpu) { + return PLL_ATTRIB_ARCH_AVX; + } + if (test_cpu_t::avx2 == _cpu) { + return PLL_ATTRIB_ARCH_AVX2; + } + if (test_cpu_t::avx512 == _cpu) { + return PLL_ATTRIB_ARCH_AVX512; + } + throw std::runtime_error("Unrecognized CPU type"); +} + +} // namespace dks + +std::ostream &operator<<(std::ostream &stream, const dks::test_cpu_t &cpu) { + if (cpu == dks::test_cpu_t::none) { + stream << "none"; + } else if (cpu == dks::test_cpu_t::sse) { + stream << "sse"; + } else if (cpu == dks::test_cpu_t::avx) { + stream << "avx"; + } else if (cpu == dks::test_cpu_t::avx2) { + stream << "avx2"; + } else if (cpu == dks::test_cpu_t::avx512) { + stream << "avx512"; + } + return stream; +} + +std::ostream &operator<<(std::ostream &stream, + const dks::attributes_t &attribs) { + stream << "{\"cpu\": \"" << attribs.simd << "\"" << std::boolalpha + << ", \"pattern tip\": \"" << attribs.pattern_tip << "\"" + << ", \"rate scalers\": \"" << attribs.rate_scalers << "\"" + << ", \"site repeats\": \"" << attribs.site_repeats << "\"}"; + return stream; +} diff --git a/src/dks/test_case.h b/src/dks/test_case.h new file mode 100644 index 0000000..9ed66ac --- /dev/null +++ b/src/dks/test_case.h @@ -0,0 +1,175 @@ +#ifndef DKS_TESTCASE_H_ +#define DKS_TESTCASE_H_ +#include "model.h" +#include "partition.h" +#include "tree.h" +#include +#include +#include +#include +#include + +namespace dks { +enum test_cpu_t { + none, + sse, + avx, + avx2, + avx512, + invalid, +}; + +test_cpu_t test_cpu_from_attribs(uint32_t attribs); + +enum test_kernel_t { + partial, + likelihood, + derivative, + pmatrix, +}; + +typedef std::chrono::duration benchmark_time_t; + +typedef std::unordered_map benchmark_result_t; + +struct attributes_t { + bool pattern_tip; + bool site_repeats; + bool rate_scalers; + test_cpu_t simd; + + attributes_t() = default; + + attributes_t(bool pt, bool sr, bool rs, test_cpu_t s) + : pattern_tip{pt}, site_repeats{sr}, rate_scalers{rs}, simd{s} {}; + + bool operator==(const attributes_t &other) const { + return pattern_tip == other.pattern_tip && + site_repeats == other.site_repeats && + rate_scalers == other.rate_scalers && simd == other.simd; + } + + bool operator!=(const attributes_t &other) const { return !(*this == other); } + + int cpu_attrib() const; + + int siterepeat_attrib() const; + + int patterntip_attrib() const; + + int pll_attributes() const; +}; + +class attributes_generator_t { +public: + attributes_generator_t() + : _off_flags{PLL_ATTRIB_AB_MASK | PLL_ATTRIB_AB_FLAG | + PLL_ATTRIB_RATE_SCALERS | PLL_ATTRIB_ARCH_AVX512}, + _on_flags{0}, _max{PLL_ATTRIB_MASK + 1}, _state{0} {}; + + attributes_t next(); + + attributes_t end(); + + void disable(int attribs) { _off_flags |= attribs; } + void enable(int attribs) { _on_flags |= attribs; } + +private: + inline bool check_cases() const { + // do some bit math to check that a bit is only set if the corrisponding + // flag is set + + return ((_off_flags & _state) | (_on_flags & ~_state)) & PLL_ATTRIB_MASK; + } + + inline bool check_xor(int attrib) const { + return __builtin_popcount(attrib & _state) <= 1; + } + + inline bool valid() const { + if (!check_xor(PLL_ATTRIB_SITE_REPEATS | PLL_ATTRIB_PATTERN_TIP)) { + return false; + } + if (!check_xor(PLL_ATTRIB_ARCH_MASK)) { + return false; + } + return !check_cases(); + } + + uint32_t _off_flags; + uint32_t _on_flags; + uint32_t _max; + uint32_t _state; +}; + +class test_case_t { +public: + test_case_t(test_cpu_t cpu, bool pt, bool sr, bool rs, uint64_t seed, + const pll_state_t *charmap) + : _charmap{charmap}, _cpu{cpu}, _trials{30}, _random_seed{seed}, + _pattern_tip{pt}, _site_repeats{sr}, _rate_scalers{rs} {} + + test_case_t(const attributes_t &attribs) + : test_case_t(attribs.simd, attribs.pattern_tip, attribs.site_repeats, + attribs.rate_scalers, 0, nullptr){}; + + test_case_t(const attributes_t &attribs, const pll_state_t *charmap) + : test_case_t(attribs.simd, attribs.pattern_tip, attribs.site_repeats, + attribs.rate_scalers, 0, charmap){}; + + benchmark_result_t benchmark(const msa_t &, const msa_weight_t &, + const model_t &); + benchmark_result_t benchmark(const std::vector> &, + const msa_weight_t &, const model_t &); + + benchmark_result_t run_benchmarks(partition_t &partition, + const model_t &model); + + benchmark_time_t benchmark_partials(partition_t &partition, + const model_t &model); + benchmark_time_t benchmark_likelihood(partition_t &partition, + const model_t &model); + benchmark_time_t benchmark_pmatrix(partition_t &partition, + const model_t &model); + benchmark_time_t benchmark_derivative(partition_t &partition, + const model_t &model); + benchmark_time_t benchmark_update_site_repeats(partition_t &partition, + const model_t &model); + attributes_t attributes_struct() const; + unsigned int attributes() const; + unsigned int cpu_attributes() const; + unsigned int misc_attributes() const; + +private: + const pll_state_t *_charmap; + test_cpu_t _cpu; + size_t _trials; + uint64_t _random_seed; + bool _pattern_tip; + bool _site_repeats; + bool _rate_scalers; +}; +} // namespace dks + +namespace std { +template <> struct hash { + typedef dks::attributes_t argument_type; + typedef size_t result_type; + result_type operator()(const argument_type &s) const noexcept { + return (s.pattern_tip << 0) ^ (s.site_repeats << 1) ^ + (s.rate_scalers << 2) ^ (s.simd << 3); + } +}; +template <> struct hash { + typedef dks::test_kernel_t argument_type; + typedef size_t result_type; + result_type operator()(const argument_type &s) const noexcept { + return static_cast(s); + } +}; +} // namespace std + +std::ostream &operator<<(std::ostream &stream, const dks::test_cpu_t &cpu); +std::ostream &operator<<(std::ostream &stream, + const dks::attributes_t &attribs); +#endif diff --git a/src/dks/tree.cpp b/src/dks/tree.cpp new file mode 100644 index 0000000..397c14e --- /dev/null +++ b/src/dks/tree.cpp @@ -0,0 +1,164 @@ +#include "pll.h" +#include "tree.h" +#include + +namespace dks { +int full_traverse_cb(pll_unode_t *n) { + // silence a warning + do { + (void)(n); + } while (0); + return PLL_SUCCESS; +} + +tree_t::tree_t(size_t tip_count, uint64_t random_seed) { + size_t inner_count = tip_count - 2; + + pll_unode_t *vroot = make_triplet(make_tip(), make_tip(), make_tip()); + + // generate a list of nodes + size_t node_buffer_size = tip_count + 3 * inner_count; + pll_unode_t **nodes = + (pll_unode_t **)malloc(sizeof(pll_unode_t *) * node_buffer_size); + + std::minstd_rand random_engine(random_seed); + + for (size_t i = 3; i < tip_count; i++) { + unsigned int node_count; + pll_utree_traverse(vroot, PLL_TREE_TRAVERSE_POSTORDER, full_traverse_cb, + nodes, &node_count); + + std::uniform_int_distribution<> roller(0, node_count - 1); + pll_unode_t *insert_node = nodes[roller(random_engine)]; + insert_tip(insert_node); + } + _tree = pll_utree_wraptree(vroot, tip_count); + pll_utree_reset_template_indices(vroot, tip_count); + update_internal_lists(); + + free(nodes); + if (!pll_utree_check_integrity(_tree)) { + pll_utree_destroy(_tree, nullptr); + throw std::runtime_error("Tree validation check failed"); + } +} + +tree_t::~tree_t() { pll_utree_destroy(_tree, nullptr); } + +pll_unode_t *tree_t::vroot() const { return _tree->vroot; } + +size_t tree_t::node_count() const { + return _tree->tip_count + _tree->inner_count; +} + +size_t tree_t::edge_count() const { return _tree->tip_count * 2 - 3; } + +size_t tree_t::tip_count() const { return _tree->tip_count; } + +void tree_t::insert_tip(pll_unode_t *insert_node) { + pll_unode_t *new_a = make_node(); + pll_unode_t *new_b = make_node(); + pll_unode_t *new_c = make_node(); + pll_unode_t *new_tip = make_tip(); + + pll_unode_t *saved_back = insert_node->back; + + pair_nodes(insert_node, new_a); + pair_nodes(saved_back, new_b); + pair_nodes(new_tip, new_c); + + make_circle(new_a, new_b, new_c); +} + +void tree_t::pair_nodes(pll_unode_t *a, pll_unode_t *b) { + a->back = b; + b->back = a; +} + +void tree_t::make_circle(pll_unode_t *a, pll_unode_t *b, pll_unode_t *c) { + a->next = b; + b->next = c; + c->next = a; +} + +pll_unode_t *tree_t::make_triplet(pll_unode_t *a, pll_unode_t *b, + pll_unode_t *c) { + pll_unode_t *inner_a = make_node(); + pll_unode_t *inner_b = make_node(); + pll_unode_t *inner_c = make_node(); + + pair_nodes(inner_a, a); + pair_nodes(inner_b, b); + pair_nodes(inner_c, c); + + make_circle(inner_a, inner_b, inner_c); + return inner_a; +} + +pll_unode_t *tree_t::make_node() { + pll_unode_t *a = (pll_unode_t *)malloc(sizeof(pll_unode_t)); + a->label = nullptr; + a->next = nullptr; + a->back = nullptr; + a->length = 0.1; + return a; +} + +pll_unode_t *tree_t::make_tip() { return make_node(); } + +const std::vector &tree_t::branch_lengths() const { + return _branch_lengths; +} + +const std::vector &tree_t::matrix_indices() const { + return _matrix_indices; +} + +std::vector tree_t::full_traverse() const { + unsigned int node_number = 0; + std::vector trav_nodes(node_count()); + if (!(pll_utree_traverse(_tree->vroot, PLL_TREE_TRAVERSE_POSTORDER, + full_traverse_cb, trav_nodes.data(), + &node_number))) { + throw std::runtime_error("Generic failure to traverse the tree"); + } + + return trav_nodes; +} + +void tree_t::fill_branch_lengths(const std::vector &nodes) { + if (_branch_lengths.size() != nodes.size()) { + _branch_lengths.resize(nodes.size(), 0.1); + } + for (size_t i = 0; i < nodes.size(); i++) { + _branch_lengths[i] = nodes[i]->length; + } +} + +void tree_t::fill_matrix_indices(const std::vector &nodes) { + if (_matrix_indices.size() != nodes.size()) { + _matrix_indices.resize(nodes.size(), 0); + } + for (size_t i = 0; i < nodes.size(); i++) { + _matrix_indices[i] = nodes[i]->pmatrix_index; + } +} + +void tree_t::update_internal_lists() { + auto nodes = full_traverse(); + fill_branch_lengths(nodes); + fill_matrix_indices(nodes); +} + +std::vector tree_t::make_operations() const { + auto traversal_nodes = full_traverse(); + std::vector operations(traversal_nodes.size()); + unsigned int operations_count = 0; + pll_utree_create_operations(traversal_nodes.data(), traversal_nodes.size(), + nullptr, nullptr, operations.data(), nullptr, + &operations_count); + + operations.resize(operations_count); + return operations; +} +} // namespace dks diff --git a/src/dks/tree.h b/src/dks/tree.h new file mode 100644 index 0000000..d2e194f --- /dev/null +++ b/src/dks/tree.h @@ -0,0 +1,41 @@ +#ifndef DKS_TREE_H_ +#define DKS_TREE_H_ +#include "pll.h" +#include +#include +#include + +namespace dks { +class tree_t { +public: + tree_t(size_t tip_count) : tree_t{tip_count, 0} {}; + tree_t(size_t tip_count, uint64_t random_seed); + pll_unode_t *vroot() const; + size_t node_count() const; + size_t edge_count() const; + size_t tip_count() const; + const std::vector &branch_lengths() const; + const std::vector &matrix_indices() const; + std::vector full_traverse() const; + std::vector make_operations() const; + ~tree_t(); + +private: + static void insert_tip(pll_unode_t *); + static void pair_nodes(pll_unode_t *, pll_unode_t *); + static void make_circle(pll_unode_t *, pll_unode_t *, pll_unode_t *); + static pll_unode_t *make_triplet(pll_unode_t *, pll_unode_t *, pll_unode_t *); + static pll_unode_t *make_node(); + static pll_unode_t *make_tip(); + + void fill_branch_lengths(const std::vector &); + void fill_matrix_indices(const std::vector &); + + void update_internal_lists(); + + pll_utree_t *_tree; + std::vector _branch_lengths; + std::vector _matrix_indices; +}; +} // namespace dks +#endif diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 61bc93a..d56626f 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -5,3 +5,4 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${PLLMOD_CFLAGS}") add_pllmodules_lib(pllmodutil "${UTIL_SOURCES}") +