diff --git a/src/misra_gries.cc b/src/misra_gries.cc new file mode 100644 index 00000000..15ff3c02 --- /dev/null +++ b/src/misra_gries.cc @@ -0,0 +1,45 @@ +// +// Created by Nikolai Karpov on 2019-02-24. +// +#include "streamingcc_include/misra_gries.h" + +#include + +namespace streamingcc { + +namespace integer { + +void MisraGriesInt::ProcessItem(const uint32_t item, const double weight) { + auto pred = [&](const counter &x) { return x.first == item; }; + auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); + if (ind == long(counters_.size())) { + counters_.emplace_back(counter(item, weight)); + } else { + counters_[ind].second += weight; + } + std::sort(counters_.begin(), + counters_.end(), + [](const counter &a, const counter &b) { return a.second > b.second; }); + if (counters_.size() > capacity_) { + auto val = counters_.back().second; + counters_.pop_back(); + for (auto &x : counters_) { + x.second -= val; + } + } +} + +double MisraGriesInt::GetEstimation(const uint32_t item) const { + auto pred = [&](const counter &x) { return x.first == item; }; + auto it = std::find_if(counters_.begin(), counters_.end(), pred); + if (it != counters_.end()) { + return it->second; + } else { + return 0.; + } +} + +} //namespace integer + +} //namespace streamingcc + diff --git a/src/space_saving.cc b/src/space_saving.cc new file mode 100644 index 00000000..da3c706b --- /dev/null +++ b/src/space_saving.cc @@ -0,0 +1,47 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#include "streamingcc_include/space_saving.h" + +#include + +namespace streamingcc { + +namespace integer { + +void SpaceSavingInt::ProcessItem(const uint32_t item, const double weight) { + auto pred = [&](const counter &x) { return x.first == item; }; + auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); + auto min_value = !counters_.empty() ? counters_.back().second : 0.; + if (ind == long(counters_.size())) { + if (counters_.size() == capacity_) { + counters_.emplace_back(counter(item, min_value + weight)); + } else { + counters_.emplace_back(counter(item, weight)); + } + } else { + counters_[ind].second += weight; + } + std::sort(counters_.begin(), + counters_.end(), + [](const counter &a, const counter &b) { return a.second > b.second; }); + if (counters_.size() > capacity_) { + counters_.pop_back(); + } +} + +double SpaceSavingInt::GetEstimation(const uint32_t item) const { + auto pred = [&](const counter &x) { return x.first == item; }; + auto it = std::find_if(counters_.begin(), counters_.end(), pred); + auto min_value = !counters_.empty() ? counters_.back().second : 0; + if (it != counters_.end()) { + return it->second; + } else { + return min_value; + } +} + +} //namespace integer + +} //namespace streamingcc diff --git a/src/streamingcc b/src/streamingcc index 7df65ed9..52b21895 100644 --- a/src/streamingcc +++ b/src/streamingcc @@ -8,6 +8,8 @@ #include "streamingcc_include/f2.h" #include "streamingcc_include/count_min.h" #include "streamingcc_include/sampling.h" +#include "streamingcc_include/misra_gries.h" +#include "streamingcc_include/space_saving.h" #include "streamingcc_include/bloom_filter.h" #include "streamingcc_include/hyper_loglog.h" diff --git a/src/streamingcc_include/misra_gries.h b/src/streamingcc_include/misra_gries.h new file mode 100644 index 00000000..f9f27636 --- /dev/null +++ b/src/streamingcc_include/misra_gries.h @@ -0,0 +1,34 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#ifndef STREAMINGCC_MISRA_GRIES_H +#define STREAMINGCC_MISRA_GRIES_H + +#include "../streamingcc_include/streaming_algorithm.h" + +#include +#include + +namespace streamingcc { + +namespace integer { + +class MisraGriesInt : public StreamingAlgorithmWeightedInt { + public: + explicit MisraGriesInt(const size_t capacity) + : capacity_(capacity) {} + void ProcessItem(const uint32_t item, const double weight) override; + double GetEstimation(const uint32_t item) const; + ~MisraGriesInt() override = default; + private: + using counter = std::pair; + std::vector counters_; + size_t capacity_; +}; + +} // namespace integer + +} // namespace streamingcc + +#endif //STREAMINGCC_MISRA_GRIES_H diff --git a/src/streamingcc_include/space_saving.h b/src/streamingcc_include/space_saving.h new file mode 100644 index 00000000..bb63c04f --- /dev/null +++ b/src/streamingcc_include/space_saving.h @@ -0,0 +1,31 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#ifndef STREAMINGCC_SPACE_SAVING_H +#define STREAMINGCC_SPACE_SAVING_H + +#include "../streamingcc_include/streaming_algorithm.h" + +namespace streamingcc { + +namespace integer { + +class SpaceSavingInt : public StreamingAlgorithmWeightedInt { + public: + explicit SpaceSavingInt(const size_t capacity) + : capacity_(capacity) {} + void ProcessItem(const uint32_t item, const double weight) override; + double GetEstimation(const uint32_t item) const; + ~SpaceSavingInt() override = default; + private: + using counter = std::pair; + std::vector counters_; + size_t capacity_; +}; + +} // namespace integer + +} // namespace streamingcc + +#endif //STREAMINGCC_SPACE_SAVING_H diff --git a/tests/misra_gries_test.cc b/tests/misra_gries_test.cc new file mode 100644 index 00000000..336d34c9 --- /dev/null +++ b/tests/misra_gries_test.cc @@ -0,0 +1,66 @@ +// +// Created by nk-karpov on 2019-05-30. +// + + + +#include "../src/streamingcc_include/misra_gries.h" + +#define BOOST_TEST_MODULE ClassTest +#define BOOST_TEST_DYN_LINK + +#include + +BOOST_AUTO_TEST_CASE(MisraGriesInt_Test0) { + using streamingcc::integer::MisraGriesInt; + auto k = 20; + MisraGriesInt mg(k); + std::vector f(101); + auto total = 0.; + for (auto j = 0; j < 100; j++) { + mg.ProcessItem(100, 0.5); + total += 0.5; + f[100] += 0.5; + } + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + mg.ProcessItem(i, 0.5); + total += 0.5; + f[i] += 0.5; + } + } + for (auto i = 0; i <= 100; ++i) { + auto est = mg.GetEstimation(i); + BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est); + } +}; + +BOOST_AUTO_TEST_CASE(MisraGriesInt_Test1) { + using streamingcc::integer::MisraGriesInt; + auto k = 20; + auto val = 0.25; + MisraGriesInt mg(k); + std::vector f(101); + auto total = 0.; + + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + mg.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + + for (auto j = 0; j < 100; j++) { + mg.ProcessItem(100, val); + total += val; + f[100] += val; + } + + for (auto i = 0; i <= 100; ++i) { + auto est = mg.GetEstimation(i); + BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est); + } +}; \ No newline at end of file diff --git a/tests/space_saving_test.cc b/tests/space_saving_test.cc new file mode 100644 index 00000000..535fa3f6 --- /dev/null +++ b/tests/space_saving_test.cc @@ -0,0 +1,63 @@ +// +// Created by nk-karpov on 2019-05-30. +// + + +#include "../src/streamingcc_include/space_saving.h" + +#define BOOST_TEST_MODULE ClassTest +#define BOOST_TEST_DYN_LINK + +#include + +BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test0) { + using streamingcc::integer::SpaceSavingInt; + auto k = 101; + auto val = 0.25; + SpaceSavingInt ss(k); + std::vector f(101); + auto total = 0.; + for (auto j = 0; j < 100; j++) { + ss.ProcessItem(100, val); + total += val; + f[100] += val; + } + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + ss.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + for (auto i = 0; i <= 100; ++i) { + auto est = ss.GetEstimation(i); + BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est); + } +}; + +BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test1) { + using streamingcc::integer::SpaceSavingInt; + auto k = 101; + auto val = 0.25; + SpaceSavingInt ss(k); + std::vector f(101); + auto total = 0.; + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + ss.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + for (auto j = 0; j < 100; j++) { + ss.ProcessItem(100, val); + total += val; + f[100] += val; + } + for (auto i = 0; i <= 100; ++i) { + auto est = ss.GetEstimation(i); + BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est); + } +}; \ No newline at end of file