From c7c549211c80a126d749e9bd866aa9c019b74483 Mon Sep 17 00:00:00 2001 From: nk-karpov Date: Thu, 30 May 2019 20:59:51 +0300 Subject: [PATCH 1/4] add Misra-Gries + SpaceSaving --- src/MisraGries.cc | 44 ++++++++++++++++++++++++++ src/SpaceSaving.cc | 48 ++++++++++++++++++++++++++++ src/streamingcc | 2 ++ tests/MisraGries_test.cc | 66 +++++++++++++++++++++++++++++++++++++++ tests/SpaceSaving_test.cc | 63 +++++++++++++++++++++++++++++++++++++ 5 files changed, 223 insertions(+) create mode 100644 src/MisraGries.cc create mode 100644 src/SpaceSaving.cc create mode 100644 tests/MisraGries_test.cc create mode 100644 tests/SpaceSaving_test.cc diff --git a/src/MisraGries.cc b/src/MisraGries.cc new file mode 100644 index 00000000..5a5ab033 --- /dev/null +++ b/src/MisraGries.cc @@ -0,0 +1,44 @@ +// +// Created by Nikolai Karpov on 2019-02-24. +// +#include "streamingcc_include/MisraGries.h" + +namespace streamingcc { + +namespace integer { + +void MisraGriesInt::ProcessItem(const uint32_t item, const double weight) { + assert(weight >= 0.); + auto pred = [&](const counter &x) { return x.first == item; }; + auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); + if (ind == counters_.size()) { + counters_.emplace_back(counter(item, weight)); + } else { + counters_[ind].second += weight; + } + std::sort(counters_.begin(), + counters_.end(), + [](const counter &a, const counter &b) { return a.second > b.second; }); + if (counters_.size() > capacity_) { + auto val = counters_.back().second; + counters_.pop_back(); + for (auto &x : counters_) { + x.second -= val; + } + } +} + +double MisraGriesInt::GetEstimation(const uint32_t item) const { + auto pred = [&](const counter &x) { return x.first == item; }; + auto it = std::find_if(counters_.begin(), counters_.end(), pred); + if (it != counters_.end()) { + return it->second; + } else { + return 0.; + } +} + +} //namespace integer + +} //namespace streamingcc + diff --git a/src/SpaceSaving.cc b/src/SpaceSaving.cc new file mode 100644 index 00000000..08e49804 --- /dev/null +++ b/src/SpaceSaving.cc @@ -0,0 +1,48 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#include "streamingcc_include/SpaceSaving.h" + +#include + +namespace streamingcc { + +namespace integer { + +void SpaceSavingInt::ProcessItem(const uint32_t item, const double weight) { + assert(weight >= 0.); + auto pred = [&](const counter &x) { return x.first == item; }; + auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); + auto min_value = !counters_.empty() ? counters_.back().second : 0.; + if (ind == counters_.size()) { + if (counters_.size() == capacity_) { + counters_.emplace_back(counter(item, min_value + weight)); + } else { + counters_.emplace_back(counter(item, weight)); + } + } else { + counters_[ind].second += weight; + } + std::sort(counters_.begin(), + counters_.end(), + [](const counter &a, const counter &b) { return a.second > b.second; }); + if (counters_.size() > capacity_) { + counters_.pop_back(); + } +} + +double SpaceSavingInt::GetEstimation(const uint32_t item) const { + auto pred = [&](const counter &x) { return x.first == item; }; + auto it = std::find_if(counters_.begin(), counters_.end(), pred); + auto min_value = !counters_.empty() ? counters_.back().second : 0; + if (it != counters_.end()) { + return it->second; + } else { + return min_value; + } +} + +} //namespace integer + +} //namespace streamingcc \ No newline at end of file diff --git a/src/streamingcc b/src/streamingcc index d1cb9b81..fb3610f3 100644 --- a/src/streamingcc +++ b/src/streamingcc @@ -8,5 +8,7 @@ #include "streamingcc_include/f2.h" #include "streamingcc_include/count_min.h" #include "streamingcc_include/sampling.h" +#include "streamingcc_include/MisraGries.h" +#include "streamingcc_include/SpaceSaving.h" #endif // SRC_STREAMINGCC diff --git a/tests/MisraGries_test.cc b/tests/MisraGries_test.cc new file mode 100644 index 00000000..88e8e9ba --- /dev/null +++ b/tests/MisraGries_test.cc @@ -0,0 +1,66 @@ +// +// Created by nk-karpov on 2019-05-30. +// + + + +#include "../src/streamingcc_include/MisraGries.h" + +#define BOOST_TEST_MODULE ClassTest +#define BOOST_TEST_DYN_LINK + +#include + +BOOST_AUTO_TEST_CASE(MisraGriesInt_Test0) { + using streamingcc::integer::MisraGriesInt; + auto k = 20; + MisraGriesInt mg(k); + std::vector f(101); + auto total = 0.; + for (auto j = 0; j < 100; j++) { + mg.ProcessItem(100, 0.5); + total += 0.5; + f[100] += 0.5; + } + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + mg.ProcessItem(i, 0.5); + total += 0.5; + f[i] += 0.5; + } + } + for (auto i = 0; i <= 100; ++i) { + auto est = mg.GetEstimation(i); + BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est); + } +}; + +BOOST_AUTO_TEST_CASE(MisraGriesInt_Test1) { + using streamingcc::integer::MisraGriesInt; + auto k = 20; + auto val = 0.25; + MisraGriesInt mg(k); + std::vector f(101); + auto total = 0.; + + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + mg.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + + for (auto j = 0; j < 100; j++) { + mg.ProcessItem(100, val); + total += val; + f[100] += val; + } + + for (auto i = 0; i <= 100; ++i) { + auto est = mg.GetEstimation(i); + BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est); + } +}; \ No newline at end of file diff --git a/tests/SpaceSaving_test.cc b/tests/SpaceSaving_test.cc new file mode 100644 index 00000000..ef1a125e --- /dev/null +++ b/tests/SpaceSaving_test.cc @@ -0,0 +1,63 @@ +// +// Created by nk-karpov on 2019-05-30. +// + + +#include "../src/streamingcc_include/SpaceSaving.h" + +#define BOOST_TEST_MODULE ClassTest +#define BOOST_TEST_DYN_LINK + +#include + +BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test0) { + using streamingcc::integer::SpaceSavingInt; + auto k = 101; + auto val = 0.25; + SpaceSavingInt ss(k); + std::vector f(101); + auto total = 0.; + for (auto j = 0; j < 100; j++) { + ss.ProcessItem(100, val); + total += val; + f[100] += val; + } + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + ss.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + for (auto i = 0; i <= 100; ++i) { + auto est = ss.GetEstimation(i); + BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est); + } +}; + +BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test1) { + using streamingcc::integer::SpaceSavingInt; + auto k = 101; + auto val = 0.25; + SpaceSavingInt ss(k); + std::vector f(101); + auto total = 0.; + for (auto i = 0; i < 100; ++i) { + for (auto j = 0; j < 10; ++j) { + ss.ProcessItem(i, val); + total += val; + f[i] += val; + } + } + for (auto j = 0; j < 100; j++) { + ss.ProcessItem(100, val); + total += val; + f[100] += val; + } + for (auto i = 0; i <= 100; ++i) { + auto est = ss.GetEstimation(i); + BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est); + BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est); + } +}; \ No newline at end of file From 282e76e69b0bda1ddf04eeafb16cf7a6bbe89fa5 Mon Sep 17 00:00:00 2001 From: nk-karpov Date: Thu, 30 May 2019 21:00:49 +0300 Subject: [PATCH 2/4] add Misra-Gries + SpaceSaving --- src/streamingcc_include/MisraGries.h | 34 +++++++++++++++++++++++++++ src/streamingcc_include/SpaceSaving.h | 31 ++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 src/streamingcc_include/MisraGries.h create mode 100644 src/streamingcc_include/SpaceSaving.h diff --git a/src/streamingcc_include/MisraGries.h b/src/streamingcc_include/MisraGries.h new file mode 100644 index 00000000..c5a2e35f --- /dev/null +++ b/src/streamingcc_include/MisraGries.h @@ -0,0 +1,34 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#ifndef STREAMINGCC_MISRAGRIES_H +#define STREAMINGCC_MISRAGRIES_H + +#include "../streamingcc_include/streaming_algorithm.h" + +#include +#include + +namespace streamingcc { + +namespace integer { + +class MisraGriesInt : public StreamingAlgorithmWeightedInt { + public: + explicit MisraGriesInt(const size_t capacity) + : capacity_(capacity) {} + void ProcessItem(const uint32_t item, const double weight) override; + double GetEstimation(const uint32_t item) const; + ~MisraGriesInt() override = default; + private: + using counter = std::pair; + std::vector counters_; + size_t capacity_; +}; + +} // namespace integer + +} // namespace streamingcc + +#endif //STREAMINGCC_MISRAGRIES_H diff --git a/src/streamingcc_include/SpaceSaving.h b/src/streamingcc_include/SpaceSaving.h new file mode 100644 index 00000000..ee25cabd --- /dev/null +++ b/src/streamingcc_include/SpaceSaving.h @@ -0,0 +1,31 @@ +// +// Created by nk-karpov on 2019-05-30. +// + +#ifndef STREAMINGCC_SPACESAVING_H +#define STREAMINGCC_SPACESAVING_H + +#include "../streamingcc_include/streaming_algorithm.h" + +namespace streamingcc { + +namespace integer { + +class SpaceSavingInt : public StreamingAlgorithmWeightedInt { + public: + explicit SpaceSavingInt(const size_t capacity) + : capacity_(capacity) {} + void ProcessItem(const uint32_t item, const double weight) override; + double GetEstimation(const uint32_t item) const; + ~SpaceSavingInt() override = default; + private: + using counter = std::pair; + std::vector counters_; + size_t capacity_; +}; + +} // namespace integer + +} // namespace streamingcc + +#endif //STREAMINGCC_SPACESAVING_H From 0f9c6def6d45af766565b3747afea6f60fe305e2 Mon Sep 17 00:00:00 2001 From: nk-karpov Date: Thu, 13 Jun 2019 12:20:42 +0300 Subject: [PATCH 3/4] rename --- src/{MisraGries.cc => misra_gries.cc} | 2 +- src/{SpaceSaving.cc => space_saving.cc} | 2 +- src/streamingcc | 4 ++-- src/streamingcc_include/{MisraGries.h => misra_gries.h} | 6 +++--- src/streamingcc_include/{SpaceSaving.h => space_saving.h} | 6 +++--- tests/{MisraGries_test.cc => misra_gries_test.cc} | 2 +- tests/{SpaceSaving_test.cc => space_saving_test.cc} | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) rename src/{MisraGries.cc => misra_gries.cc} (96%) rename src/{SpaceSaving.cc => space_saving.cc} (96%) rename src/streamingcc_include/{MisraGries.h => misra_gries.h} (86%) rename src/streamingcc_include/{SpaceSaving.h => space_saving.h} (85%) rename tests/{MisraGries_test.cc => misra_gries_test.cc} (96%) rename tests/{SpaceSaving_test.cc => space_saving_test.cc} (96%) diff --git a/src/MisraGries.cc b/src/misra_gries.cc similarity index 96% rename from src/MisraGries.cc rename to src/misra_gries.cc index 5a5ab033..5cf2a1ca 100644 --- a/src/MisraGries.cc +++ b/src/misra_gries.cc @@ -1,7 +1,7 @@ // // Created by Nikolai Karpov on 2019-02-24. // -#include "streamingcc_include/MisraGries.h" +#include "streamingcc_include/misra_gries.h" namespace streamingcc { diff --git a/src/SpaceSaving.cc b/src/space_saving.cc similarity index 96% rename from src/SpaceSaving.cc rename to src/space_saving.cc index 08e49804..63909090 100644 --- a/src/SpaceSaving.cc +++ b/src/space_saving.cc @@ -2,7 +2,7 @@ // Created by nk-karpov on 2019-05-30. // -#include "streamingcc_include/SpaceSaving.h" +#include "streamingcc_include/space_saving.h" #include diff --git a/src/streamingcc b/src/streamingcc index fb3610f3..d9c9fa86 100644 --- a/src/streamingcc +++ b/src/streamingcc @@ -8,7 +8,7 @@ #include "streamingcc_include/f2.h" #include "streamingcc_include/count_min.h" #include "streamingcc_include/sampling.h" -#include "streamingcc_include/MisraGries.h" -#include "streamingcc_include/SpaceSaving.h" +#include "streamingcc_include/misra_gries.h" +#include "streamingcc_include/space_saving.h" #endif // SRC_STREAMINGCC diff --git a/src/streamingcc_include/MisraGries.h b/src/streamingcc_include/misra_gries.h similarity index 86% rename from src/streamingcc_include/MisraGries.h rename to src/streamingcc_include/misra_gries.h index c5a2e35f..f9f27636 100644 --- a/src/streamingcc_include/MisraGries.h +++ b/src/streamingcc_include/misra_gries.h @@ -2,8 +2,8 @@ // Created by nk-karpov on 2019-05-30. // -#ifndef STREAMINGCC_MISRAGRIES_H -#define STREAMINGCC_MISRAGRIES_H +#ifndef STREAMINGCC_MISRA_GRIES_H +#define STREAMINGCC_MISRA_GRIES_H #include "../streamingcc_include/streaming_algorithm.h" @@ -31,4 +31,4 @@ class MisraGriesInt : public StreamingAlgorithmWeightedInt { } // namespace streamingcc -#endif //STREAMINGCC_MISRAGRIES_H +#endif //STREAMINGCC_MISRA_GRIES_H diff --git a/src/streamingcc_include/SpaceSaving.h b/src/streamingcc_include/space_saving.h similarity index 85% rename from src/streamingcc_include/SpaceSaving.h rename to src/streamingcc_include/space_saving.h index ee25cabd..bb63c04f 100644 --- a/src/streamingcc_include/SpaceSaving.h +++ b/src/streamingcc_include/space_saving.h @@ -2,8 +2,8 @@ // Created by nk-karpov on 2019-05-30. // -#ifndef STREAMINGCC_SPACESAVING_H -#define STREAMINGCC_SPACESAVING_H +#ifndef STREAMINGCC_SPACE_SAVING_H +#define STREAMINGCC_SPACE_SAVING_H #include "../streamingcc_include/streaming_algorithm.h" @@ -28,4 +28,4 @@ class SpaceSavingInt : public StreamingAlgorithmWeightedInt { } // namespace streamingcc -#endif //STREAMINGCC_SPACESAVING_H +#endif //STREAMINGCC_SPACE_SAVING_H diff --git a/tests/MisraGries_test.cc b/tests/misra_gries_test.cc similarity index 96% rename from tests/MisraGries_test.cc rename to tests/misra_gries_test.cc index 88e8e9ba..336d34c9 100644 --- a/tests/MisraGries_test.cc +++ b/tests/misra_gries_test.cc @@ -4,7 +4,7 @@ -#include "../src/streamingcc_include/MisraGries.h" +#include "../src/streamingcc_include/misra_gries.h" #define BOOST_TEST_MODULE ClassTest #define BOOST_TEST_DYN_LINK diff --git a/tests/SpaceSaving_test.cc b/tests/space_saving_test.cc similarity index 96% rename from tests/SpaceSaving_test.cc rename to tests/space_saving_test.cc index ef1a125e..535fa3f6 100644 --- a/tests/SpaceSaving_test.cc +++ b/tests/space_saving_test.cc @@ -3,7 +3,7 @@ // -#include "../src/streamingcc_include/SpaceSaving.h" +#include "../src/streamingcc_include/space_saving.h" #define BOOST_TEST_MODULE ClassTest #define BOOST_TEST_DYN_LINK From d8bf04c0914873cc92d881dbc098e955c185744a Mon Sep 17 00:00:00 2001 From: nk-karpov Date: Mon, 8 Jul 2019 18:37:42 +0300 Subject: [PATCH 4/4] asserts --- src/misra_gries.cc | 5 +++-- src/space_saving.cc | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/misra_gries.cc b/src/misra_gries.cc index 5cf2a1ca..15ff3c02 100644 --- a/src/misra_gries.cc +++ b/src/misra_gries.cc @@ -3,15 +3,16 @@ // #include "streamingcc_include/misra_gries.h" +#include + namespace streamingcc { namespace integer { void MisraGriesInt::ProcessItem(const uint32_t item, const double weight) { - assert(weight >= 0.); auto pred = [&](const counter &x) { return x.first == item; }; auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); - if (ind == counters_.size()) { + if (ind == long(counters_.size())) { counters_.emplace_back(counter(item, weight)); } else { counters_[ind].second += weight; diff --git a/src/space_saving.cc b/src/space_saving.cc index 63909090..da3c706b 100644 --- a/src/space_saving.cc +++ b/src/space_saving.cc @@ -11,11 +11,10 @@ namespace streamingcc { namespace integer { void SpaceSavingInt::ProcessItem(const uint32_t item, const double weight) { - assert(weight >= 0.); auto pred = [&](const counter &x) { return x.first == item; }; auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred)); auto min_value = !counters_.empty() ? counters_.back().second : 0.; - if (ind == counters_.size()) { + if (ind == long(counters_.size())) { if (counters_.size() == capacity_) { counters_.emplace_back(counter(item, min_value + weight)); } else { @@ -45,4 +44,4 @@ double SpaceSavingInt::GetEstimation(const uint32_t item) const { } //namespace integer -} //namespace streamingcc \ No newline at end of file +} //namespace streamingcc