Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9b42c03
Initial commit with DKS
computations Apr 25, 2019
02c13a5
removed the old model
computations Apr 29, 2019
56ebfea
updated dks
computations Apr 30, 2019
6422a0e
removed reference to physical cpu count
computations Apr 30, 2019
6d0c384
updated formatting, removed main
computations Apr 30, 2019
1f1f7c9
changed the dks include to be a local reference
computations Apr 30, 2019
840e6df
fixed a typo
computations Apr 30, 2019
798f6c8
removed an inline that was causing linking errors
computations Apr 30, 2019
9bb64cf
started integration, added support for constraints
computations Apr 30, 2019
04c86c3
removed some extra libraries
computations Apr 30, 2019
d468992
changed the interface of some functions to incoperate the generator
computations Apr 30, 2019
a7fb33b
cleaned up some code
computations May 2, 2019
7a726bc
Converted the interface to be more "friendly" to pll-moduels
computations May 2, 2019
79b1f11
reverting change relating to valid states in attrib generator
computations May 3, 2019
1c44118
added a 'raw' clv option
computations May 3, 2019
9d84e9f
fixed the benchmark header
computations May 3, 2019
8795a2c
reverting a reversion, formatting
computations May 3, 2019
5038931
adds more interafaces into dks
computations May 7, 2019
3794627
fixs a signed compare warning
computations May 8, 2019
6ac86e1
bumps libpll version up
computations May 8, 2019
186f5dd
Eliminates some dead code
computations May 8, 2019
d7b03e1
Replace `#pragma once` with real include guards
computations May 8, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ endif()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined -Wl,dynamic_lookup")
set(PLLMOD_INCLUDE_PATH ${PLLMODULES_LIBPLL_PATH}/src/)

set(PLLMODULES_COMPONENTS "optimize;algorithm;binary;msa;tree;util" CACHE STRING "pll-modules components to build")
set(PLLMODULES_COMPONENTS "optimize;algorithm;binary;msa;tree;util;dks" CACHE STRING "pll-modules components to build")

foreach(module ${PLLMODULES_COMPONENTS})
MESSAGE(STATUS "Will compile pll-module ${module}")
Expand Down
5 changes: 5 additions & 0 deletions src/dks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
file(GLOB DKS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
add_pllmodules_lib(pllmoddks "${DKS_SOURCES}")
target_include_directories(pllmoddks_obj PRIVATE ${PLLMOD_INCLUDE_PATH}
${CMAKE_CURRENT_SOURCE_DIR}/../msa
)
162 changes: 162 additions & 0 deletions src/dks/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#include "benchmark.h"
#include "partition.h"
#include <algorithm>
#include <cmath>
#include <unordered_map>
#include <utility>

namespace dks {

msa_t convert_pll_msa_t(const pll_msa_t *pll_msa) {
msa_t msa;
msa.reserve(pll_msa->count);
for (int i = 0; i < pll_msa->count; i++) {
msa.emplace_back(pll_msa->length);
for (int j = 0; j < pll_msa->length; j++) {
msa[i][j] = pll_msa->sequence[i][j];
}
}
return msa;
}

inline benchmark_time_t weight_kernel_times(kernel_weight_t kw,
benchmark_result_t bmr) {
return kw[test_kernel_t::partial] * bmr[test_kernel_t::partial] +
kw[test_kernel_t::likelihood] * bmr[test_kernel_t::likelihood] +
kw[test_kernel_t::derivative] * bmr[test_kernel_t::derivative] +
kw[test_kernel_t::pmatrix] * bmr[test_kernel_t::pmatrix];
}

inline attributes_t best_attrib_time(const attributes_time_t &at) {
return std::max_element(at.begin(), at.end(),
[](const attributes_time_t::value_type &a,
const attributes_time_t::value_type &b) {
return a.second > b.second;
})
->first;
}

kernel_weight_t suggest_weights(double sites, double states, double taxa) {
kernel_weight_t kw;

kw[test_kernel_t::partial] =
0.4866 * sites + 437.1470 * states + 6.5094 * taxa - 3557.8645;
kw[test_kernel_t::likelihood] =
0.327 * sites + 28.952 * states + 1.147 * taxa + -43.042;
kw[test_kernel_t::derivative] =
0.2174 * sites + 26.1509 * states + 0.7898 * taxa + -108.6298;
kw[test_kernel_t::pmatrix] =
3.221e-03 * sites + 2.672e+01 * states + 7.741e-01 * taxa + -2.195e+02;

for (auto &kv : kw) {
kv.second = kv.second < 0.0 ? 0.1 : kv.second;
}
return kw;
}

kernel_weight_t suggest_weights(const msa_t &msa, unsigned int states) {
return suggest_weights(msa.size(), msa[0].size(), states);
}

kernel_weight_t suggest_weights(const pllmod_msa_stats_t *msa, int sites,
int taxa) {
return suggest_weights(sites, taxa, msa->states);
}

attributes_time_t
select_kernel_verbose(const model_t &model,
const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights, const kernel_weight_t &kw,
attributes_generator_t att_gen) {
attributes_time_t times;
for (attributes_t attribs = att_gen.next(); attribs != att_gen.end();
attribs = att_gen.next()) {

test_case_t tc(attribs);
times[attribs] =
weight_kernel_times(kw, tc.benchmark(clvs, weights, model));
}
return times;
}

attributes_time_t select_kernel_verbose(const model_t &model, const msa_t &msa,
const msa_weight_t &weights,
const pll_state_t *charmap,
const kernel_weight_t &kw,
attributes_generator_t att_gen) {
attributes_time_t times;
for (attributes_t attribs = att_gen.next(); attribs != att_gen.end();
attribs = att_gen.next()) {

test_case_t tc(attribs, charmap);
times[attribs] = weight_kernel_times(kw, tc.benchmark(msa, weights, model));
}
return times;
}

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap,
const kernel_weight_t &kw,
attributes_generator_t gen) {
auto msa = convert_pll_msa_t(pll_msa);
model_t model{pll_partition};
msa_weight_t weights(pll_partition->pattern_weights,
pll_partition->pattern_weights + pll_msa->length);
return best_attrib_time(
select_kernel_verbose(model, msa, weights, charmap, kw, gen))
.pll_attributes();
}

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap) {
auto kw =
suggest_weights(pll_msa->length, pll_msa->count, pll_partition->states);
attributes_generator_t gen;
return select_kernel_auto(pll_partition, pll_msa, charmap, kw, gen);
}

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap,
attributes_generator_t gen) {
auto kw =
suggest_weights(pll_msa->length, pll_msa->count, pll_partition->states);
return select_kernel_auto(pll_partition, pll_msa, charmap, kw, gen);
}

unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights,
const pll_state_t *charmap, unsigned int states,
unsigned int rate_cats,
attributes_generator_t gen) {
auto kw = suggest_weights(weights.size(), msa.size(), states);
model_t model{msa, states, rate_cats};
auto result = select_kernel_verbose(model, msa, weights, charmap, kw, gen);
return best_attrib_time(result).pll_attributes();
}

unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights,
const pll_state_t *charmap, unsigned int states,
unsigned int rate_cats) {
attributes_generator_t gen;
return select_kernel_auto(msa, weights, charmap, states, rate_cats, gen);
}

unsigned int select_kernel_auto(const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights,
unsigned int states, unsigned int rate_cats,
attributes_generator_t gen) {
auto kw = suggest_weights(weights.size(), clvs.size(), states);
model_t model{clvs.size(), states, rate_cats};
auto result = select_kernel_verbose(model, clvs, weights, kw, gen);
return best_attrib_time(result).pll_attributes();
}

unsigned int select_kernel_auto(const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights,
unsigned int states, unsigned int rate_cats) {
attributes_generator_t gen;
return select_kernel_auto(clvs, weights, states, rate_cats, gen);
}
} // namespace dks
69 changes: 69 additions & 0 deletions src/dks/benchmark.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#ifndef DKS_BENCHMARK_H_
#define DKS_BENCHMARK_H_
#include "pll_msa.h"
#include "test_case.h"
#include <unordered_map>

namespace dks {
typedef std::unordered_map<test_kernel_t, double> kernel_weight_t;
typedef std::unordered_map<attributes_t, benchmark_time_t> attributes_time_t;

kernel_weight_t suggest_weights(const msa_t &msa);
kernel_weight_t suggest_weights(const pllmod_msa_stats_t *msa, int sites,
int taxa);

attributes_time_t select_kernel_verbose(const model_t &model, const msa_t &msa,
const msa_weight_t &weights,
const pll_state_t *charmap,
const kernel_weight_t &kw,
attributes_generator_t att_gen);

attributes_time_t
select_kernel_verbose(const model_t &model,
const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights, const kernel_weight_t &kw,
attributes_generator_t att_gen);

attributes_t select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa);

unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights,
const pll_state_t *charmap, unsigned int states,
unsigned int rate_cats,
attributes_generator_t gen);

unsigned int select_kernel_auto(const msa_t &msa, const msa_weight_t &weights,
const pll_state_t *charmap, unsigned int states,
unsigned int rate_cats);

unsigned int select_kernel_auto(const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights,
unsigned int states, unsigned int rate_cats,
attributes_generator_t gen);

unsigned int select_kernel_auto(const std::vector<std::vector<double>> &clvs,
const msa_weight_t &weights,
unsigned int states, unsigned int rate_cats);

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap,
const msa_weight_t &weights,
attributes_generator_t gen);

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap);

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap,
attributes_generator_t gen);

unsigned int select_kernel_auto(const pll_partition_t *pll_partition,
const pll_msa_t *pll_msa,
const pll_state_t *charmap,
const msa_weight_t &weights);

} // namespace dks
#endif// DKS_BENCHMARK_H_
6 changes: 6 additions & 0 deletions src/dks/dks.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#ifndef DKS_H_
#define DKS_H_

#include "benchmark.h"

#endif
37 changes: 37 additions & 0 deletions src/dks/model.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "model.h"
#include <pll.h>

namespace dks {

unsigned int model_t::submodels() const { return 1; }

unsigned int model_t::rate_categories() const { return _rate_categories; }

uint64_t model_t::states() const { return _states; }

const double *model_t::subst_params_raw() const { return _subst_params.data(); }

const std::vector<double> &model_t::subst_params() const {
return _subst_params;
}

const double *model_t::frequencies_raw() const { return _frequencies.data(); }

const std::vector<double> &model_t::frequencies() const { return _frequencies; }

const tree_t &model_t::tree() const { return _tree; }

std::vector<pll_operation_t> model_t::make_operations() const {
auto traversal_nodes = _tree.full_traverse();
std::vector<pll_operation_t> operations(traversal_nodes.size());
unsigned int operations_count = 0;
pll_utree_create_operations(traversal_nodes.data(), traversal_nodes.size(),
nullptr, nullptr, operations.data(), nullptr,
&operations_count);

operations.resize(operations_count);
return operations;
}

void model_t::reset_tree() { _tree = tree_t(_tree.tip_count()); }
} // namespace dks
72 changes: 72 additions & 0 deletions src/dks/model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#ifndef DKS_MODEL_H_
#define DKS_MODEL_H_
#include "tree.h"
#include <memory>
#include <pll.h>
#include <vector>

namespace dks {
typedef std::vector<std::vector<char>> msa_t;
typedef std::vector<unsigned int> msa_weight_t;
class model_t {
public:
model_t(const msa_t &msa, unsigned int states) : model_t{msa, states, 0} {};

model_t(size_t tip_count, unsigned int states)
: model_t{tip_count, states, 0} {};

model_t(const msa_t &msa, unsigned int states, uint64_t seed)
: _tree{msa.size(), seed}, _states{states}, _rate_categories{1},
_prop_invar{0.0}, _subst_params((_states - 1) * (_states - 2), 1.0),
_frequencies(_states, 1.0 / _states){};

model_t(size_t tip_count, unsigned int states, uint64_t seed)
: _tree{tip_count, seed}, _states{states}, _rate_categories{1},
_prop_invar{0.0}, _subst_params{6, 1.0}, _frequencies{4, .25} {};

model_t(const pll_partition_t *pll_partition)
: model_t(pll_partition->tips, pll_partition->states, 0,
pll_partition->rate_cats, pll_partition->subst_params,
pll_partition->frequencies, *(pll_partition->prop_invar)){};

model_t(size_t tip_count, unsigned int states, size_t model_index,
unsigned int rate_categories, double **subst_params,
double **frequencies, double pinv)
: _tree{tree_t(tip_count)}, _states{states},
_rate_categories{rate_categories}, _prop_invar{pinv},
_subst_params{subst_params[model_index],
subst_params[model_index] +
(_states - 1) * (_states - 2)},
_frequencies{frequencies[model_index],
frequencies[model_index] + _states} {};

model_t(size_t tip_count, unsigned int states, unsigned int rate_categories,
unsigned int seed)
: _tree{tip_count, seed}, _states{states},
_rate_categories{rate_categories}, _prop_invar{0.0},
_subst_params((_states - 1) * (_states - 2) / 2, 1.0),
_frequencies(_states, 1.0 / _states) {}

unsigned int submodels() const;
unsigned int rate_categories() const;
uint64_t states() const;
const double *subst_params_raw() const;
const std::vector<double> &subst_params() const;
const double *frequencies_raw() const;
const std::vector<double> &frequencies() const;
const tree_t &tree() const;

void reset_tree();

std::vector<pll_operation_t> make_operations() const;

private:
tree_t _tree;
unsigned int _states;
unsigned int _rate_categories;
double _prop_invar;
std::vector<double> _subst_params;
std::vector<double> _frequencies;
};
} // namespace dks
#endif
Loading