TheAlgorithms/C++ 1.0.0
All the algorithms implemented in C++
Loading...
Searching...
No Matches
bloom_filter.cpp
Go to the documentation of this file.
1
25#include <cassert>
26#include <functional>
27#include <initializer_list>
28#include <string>
29#include <vector>
30#include <iostream>
31
36namespace data_structures {
40class Bitset {
41 private:
42 std::vector<std::size_t> data;
43 static const std::size_t blockSize =
44 sizeof(std::size_t);
46 public:
47 explicit Bitset(std::size_t);
48 std::size_t size();
49 void add(std::size_t);
50 bool contains(std::size_t);
51};
52
57std::size_t Bitset::size() { return data.size(); }
58
63Bitset::Bitset(std::size_t initSize) : data(initSize) {}
64
71void Bitset::add(std::size_t x) {
72 std::size_t blockIndex = x / blockSize;
73 if (blockIndex >= data.size()) {
74 data.resize(blockIndex + 1);
75 }
76 data[blockIndex] |= 1 << (x % blockSize);
77}
78
86bool Bitset::contains(std::size_t x) {
87 std::size_t blockIndex = x / blockSize;
88 if (blockIndex >= data.size()) {
89 return false;
90 }
91 return data[blockIndex] & (1 << (x % blockSize));
92}
93
98template <typename T>
100 private:
102 std::vector<std::function<std::size_t(T)>>
104
105 public:
106 BloomFilter(std::size_t,
107 std::initializer_list<std::function<std::size_t(T)>>);
108 void add(T);
109 bool contains(T);
110};
111
120template <typename T>
122 std::size_t size,
123 std::initializer_list<std::function<std::size_t(T)>> funks)
124 : set(size), hashFunks(funks) {}
125
133template <typename T>
135 for (std::size_t i = 0; i < hashFunks.size(); i++) {
136 set.add(hashFunks[i](x) % (sizeof(std::size_t) * set.size()));
137 }
138}
139
148template <typename T>
150 for (std::size_t i = 0; i < hashFunks.size(); i++) {
151 if (set.contains(hashFunks[i](x) %
152 (sizeof(std::size_t) * set.size())) == false) {
153 return false;
154 }
155 }
156 return true;
157}
158
166static std::size_t hashDJB2(std::string const& s) {
167 std::size_t hash = 5381;
168 for (char c : s) {
169 hash = ((hash << 5) + hash) + c;
170 }
171 return hash;
172}
173
182static std::size_t hashStr(std::string const& s) {
183 std::size_t hash = 37;
184 std::size_t primeNum1 = 54059;
185 std::size_t primeNum2 = 76963;
186 for (char c : s) {
187 hash = (hash * primeNum1) ^ (c * primeNum2);
188 }
189 return hash;
190}
191
199std::size_t hashInt_1(int x) {
200 x = ((x >> 16) ^ x) * 0x45d9f3b;
201 x = ((x >> 16) ^ x) * 0x45d9f3b;
202 x = (x >> 16) ^ x;
203 return x;
204}
205
213std::size_t hashInt_2(int x) {
214 auto y = static_cast<std::size_t>(x);
215 y = (y ^ (y >> 30)) * static_cast<std::size_t>(0xbf58476d1ce4e5b9);
216 y = (y ^ (y >> 27)) * static_cast<std::size_t>(0x94d049bb133111eb);
217 y = y ^ (y >> 31);
218 return y;
219}
220} // namespace data_structures
221
229 std::vector<std::string> toCheck{"hello", "world", "!"};
230 std::vector<std::string> toFalse{"false", "world2", "!!!"};
231 for (const auto& x : toCheck) {
232 filter.add(x);
233 }
234 for (const auto& x : toFalse) {
235 assert(filter.contains(x) == false);
236 }
237 for (const auto& x : toCheck) {
238 assert(filter.contains(x));
239 }
240}
241
249 std::vector<int> toCheck{100, 200, 300, 50};
250 std::vector<int> toFalse{1, 2, 3, 4, 5, 6, 7, 8};
251 for (int x : toCheck) {
252 filter.add(x);
253 }
254 for (int x : toFalse) {
255 assert(filter.contains(x) == false);
256 }
257 for (int x : toCheck) {
258 assert(filter.contains(x));
259 }
260}
261
267static void test_bitset() {
269 std::vector<std::size_t> toCheck{0, 1, 5, 8, 63, 64, 67, 127};
270 for (auto x : toCheck) {
271 set.add(x);
272 assert(set.contains(x));
273 }
274 assert(set.contains(128) == false);
275 assert(set.contains(256) == false);
276}
277
282int main() {
283 // run self-test implementations
284
285 test_bitset(); // run test for bitset, because bloom filter is depending on it
288
289 std::cout << "All tests have successfully passed!\n";
290 return 0;
291}
static void test_bloom_filter_int()
Test for bloom filter with int as generic type.
static void test_bitset()
Test for bitset.
static void test_bloom_filter_string()
Test for bloom filter with string as generic type.
int main()
Main function.
Simple bitset implementation for bloom filter.
Bitset(std::size_t)
BitSet class constructor.
void add(std::size_t)
Turn bit on position x to 1s.
std::size_t size()
Utility function to return the size of the inner array.
bool contains(std::size_t)
Doest bitset contains element x.
static const std::size_t blockSize
std::vector< std::size_t > data
short info of this variable
Bloom filter template class.
bool contains(T)
Check element function for Bloom filter.
std::vector< std::function< std::size_t(T)> > hashFunks
hash functions for T type
void add(T)
Add function for Bloom filter.
BloomFilter(std::size_t, std::initializer_list< std::function< std::size_t(T)> >)
Constructor for Bloom filter.
Bitset set
inner bitset for elements
int data[MAX]
test data
for IO operations
static std::size_t hashDJB2(std::string const &s)
Function djb2 to get hash for the given string.
static std::size_t hashStr(std::string const &s)
Hash function, to get hash for the given string.
std::size_t hashInt_2(int x)
Hash function for test
std::size_t hashInt_1(int x)
Hash function for test