Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions src/misra_gries.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//
// Created by Nikolai Karpov on 2019-02-24.
//
#include "streamingcc_include/misra_gries.h"

#include <cassert>

namespace streamingcc {

namespace integer {

void MisraGriesInt::ProcessItem(const uint32_t item, const double weight) {
auto pred = [&](const counter &x) { return x.first == item; };
auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred));
if (ind == long(counters_.size())) {
counters_.emplace_back(counter(item, weight));
} else {
counters_[ind].second += weight;
}
std::sort(counters_.begin(),
counters_.end(),
[](const counter &a, const counter &b) { return a.second > b.second; });
if (counters_.size() > capacity_) {
auto val = counters_.back().second;
counters_.pop_back();
for (auto &x : counters_) {
x.second -= val;
}
}
}

double MisraGriesInt::GetEstimation(const uint32_t item) const {
auto pred = [&](const counter &x) { return x.first == item; };
auto it = std::find_if(counters_.begin(), counters_.end(), pred);
if (it != counters_.end()) {
return it->second;
} else {
return 0.;
}
}

} //namespace integer

} //namespace streamingcc

47 changes: 47 additions & 0 deletions src/space_saving.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//
// Created by nk-karpov on 2019-05-30.
//

#include "streamingcc_include/space_saving.h"

#include <cassert>

namespace streamingcc {

namespace integer {

void SpaceSavingInt::ProcessItem(const uint32_t item, const double weight) {
auto pred = [&](const counter &x) { return x.first == item; };
auto ind = std::distance(counters_.begin(), std::find_if(counters_.begin(), counters_.end(), pred));
auto min_value = !counters_.empty() ? counters_.back().second : 0.;
if (ind == long(counters_.size())) {
if (counters_.size() == capacity_) {
counters_.emplace_back(counter(item, min_value + weight));
} else {
counters_.emplace_back(counter(item, weight));
}
} else {
counters_[ind].second += weight;
}
std::sort(counters_.begin(),
counters_.end(),
[](const counter &a, const counter &b) { return a.second > b.second; });
if (counters_.size() > capacity_) {
counters_.pop_back();
}
}

double SpaceSavingInt::GetEstimation(const uint32_t item) const {
auto pred = [&](const counter &x) { return x.first == item; };
auto it = std::find_if(counters_.begin(), counters_.end(), pred);
auto min_value = !counters_.empty() ? counters_.back().second : 0;
if (it != counters_.end()) {
return it->second;
} else {
return min_value;
}
}

} //namespace integer

} //namespace streamingcc
2 changes: 2 additions & 0 deletions src/streamingcc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "streamingcc_include/f2.h"
#include "streamingcc_include/count_min.h"
#include "streamingcc_include/sampling.h"
#include "streamingcc_include/misra_gries.h"
#include "streamingcc_include/space_saving.h"
#include "streamingcc_include/bloom_filter.h"
#include "streamingcc_include/hyper_loglog.h"

Expand Down
34 changes: 34 additions & 0 deletions src/streamingcc_include/misra_gries.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
//
// Created by nk-karpov on 2019-05-30.
//

#ifndef STREAMINGCC_MISRA_GRIES_H
#define STREAMINGCC_MISRA_GRIES_H

#include "../streamingcc_include/streaming_algorithm.h"

#include <algorithm>
#include <vector>

namespace streamingcc {

namespace integer {

class MisraGriesInt : public StreamingAlgorithmWeightedInt {
public:
explicit MisraGriesInt(const size_t capacity)
: capacity_(capacity) {}
void ProcessItem(const uint32_t item, const double weight) override;
double GetEstimation(const uint32_t item) const;
~MisraGriesInt() override = default;
private:
using counter = std::pair<uint32_t, double>;
std::vector<counter> counters_;
size_t capacity_;
};

} // namespace integer

} // namespace streamingcc

#endif //STREAMINGCC_MISRA_GRIES_H
31 changes: 31 additions & 0 deletions src/streamingcc_include/space_saving.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//
// Created by nk-karpov on 2019-05-30.
//

#ifndef STREAMINGCC_SPACE_SAVING_H
#define STREAMINGCC_SPACE_SAVING_H

#include "../streamingcc_include/streaming_algorithm.h"

namespace streamingcc {

namespace integer {

class SpaceSavingInt : public StreamingAlgorithmWeightedInt {
public:
explicit SpaceSavingInt(const size_t capacity)
: capacity_(capacity) {}
void ProcessItem(const uint32_t item, const double weight) override;
double GetEstimation(const uint32_t item) const;
~SpaceSavingInt() override = default;
private:
using counter = std::pair<uint32_t, double>;
std::vector<counter> counters_;
size_t capacity_;
};

} // namespace integer

} // namespace streamingcc

#endif //STREAMINGCC_SPACE_SAVING_H
66 changes: 66 additions & 0 deletions tests/misra_gries_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//
// Created by nk-karpov on 2019-05-30.
//



#include "../src/streamingcc_include/misra_gries.h"

#define BOOST_TEST_MODULE ClassTest
#define BOOST_TEST_DYN_LINK

#include <boost/test/unit_test.hpp>

BOOST_AUTO_TEST_CASE(MisraGriesInt_Test0) {
using streamingcc::integer::MisraGriesInt;
auto k = 20;
MisraGriesInt mg(k);
std::vector<double> f(101);
auto total = 0.;
for (auto j = 0; j < 100; j++) {
mg.ProcessItem(100, 0.5);
total += 0.5;
f[100] += 0.5;
}
for (auto i = 0; i < 100; ++i) {
for (auto j = 0; j < 10; ++j) {
mg.ProcessItem(i, 0.5);
total += 0.5;
f[i] += 0.5;
}
}
for (auto i = 0; i <= 100; ++i) {
auto est = mg.GetEstimation(i);
BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est);
BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est);
}
};

BOOST_AUTO_TEST_CASE(MisraGriesInt_Test1) {
using streamingcc::integer::MisraGriesInt;
auto k = 20;
auto val = 0.25;
MisraGriesInt mg(k);
std::vector<double> f(101);
auto total = 0.;

for (auto i = 0; i < 100; ++i) {
for (auto j = 0; j < 10; ++j) {
mg.ProcessItem(i, val);
total += val;
f[i] += val;
}
}

for (auto j = 0; j < 100; j++) {
mg.ProcessItem(100, val);
total += val;
f[100] += val;
}

for (auto i = 0; i <= 100; ++i) {
auto est = mg.GetEstimation(i);
BOOST_CHECK_MESSAGE(est <= f[i], "MisraGriesInt::GetEstimation = " << est);
BOOST_CHECK_MESSAGE(est >= f[i] - total / k, "MisraGriesInt::GetEstimation = " << est);
}
};
63 changes: 63 additions & 0 deletions tests/space_saving_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
//
// Created by nk-karpov on 2019-05-30.
//


#include "../src/streamingcc_include/space_saving.h"

#define BOOST_TEST_MODULE ClassTest
#define BOOST_TEST_DYN_LINK

#include <boost/test/unit_test.hpp>

BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test0) {
using streamingcc::integer::SpaceSavingInt;
auto k = 101;
auto val = 0.25;
SpaceSavingInt ss(k);
std::vector<double> f(101);
auto total = 0.;
for (auto j = 0; j < 100; j++) {
ss.ProcessItem(100, val);
total += val;
f[100] += val;
}
for (auto i = 0; i < 100; ++i) {
for (auto j = 0; j < 10; ++j) {
ss.ProcessItem(i, val);
total += val;
f[i] += val;
}
}
for (auto i = 0; i <= 100; ++i) {
auto est = ss.GetEstimation(i);
BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est);
BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est);
}
};

BOOST_AUTO_TEST_CASE(SpaceSavingInt_Test1) {
using streamingcc::integer::SpaceSavingInt;
auto k = 101;
auto val = 0.25;
SpaceSavingInt ss(k);
std::vector<double> f(101);
auto total = 0.;
for (auto i = 0; i < 100; ++i) {
for (auto j = 0; j < 10; ++j) {
ss.ProcessItem(i, val);
total += val;
f[i] += val;
}
}
for (auto j = 0; j < 100; j++) {
ss.ProcessItem(100, val);
total += val;
f[100] += val;
}
for (auto i = 0; i <= 100; ++i) {
auto est = ss.GetEstimation(i);
BOOST_CHECK_MESSAGE(est >= f[i], "SpaceSavingInt::GetEstimation = " << est);
BOOST_CHECK_MESSAGE(est <= f[i] + total / k, "SpaceSavingInt::GetEstimation = " << est);
}
};