19 #ifndef INCLUDE_BLOOM_FILTER_HPP 20 #define INCLUDE_BLOOM_FILTER_HPP 33 static constexpr std::size_t bits_per_char = 0x08;
35 static const unsigned char bit_mask[bits_per_char] = {
64 maximum_size(maximum_size),
67 projected_element_count(projected_element_count),
68 false_positive_probability(false_positive_probability),
136 double min_m = std::numeric_limits<double>::infinity();
143 const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
145 const double curr_m = numerator / denominator;
160 optp.
table_size =
static_cast<unsigned long long int>(min_m);
192 projected_element_count_(0),
193 inserted_element_count_ (0),
195 desired_false_positive_probability_(0.0)
200 inserted_element_count_(0),
207 generate_unique_salt();
209 bit_table_.resize(table_size_ / bits_per_char, static_cast<unsigned char>(0x00));
214 this->operator=(filter);
224 (bit_table_.size() == f.
bit_table_.size() ) &&
229 (salt_ == f.
salt_ ) &&
266 return (0 == table_size_);
271 std::fill(bit_table_.begin(), bit_table_.end(),
static_cast<unsigned char>(0x00));
272 inserted_element_count_ = 0;
275 inline void insert(
const unsigned char* key_begin,
const std::size_t& length)
277 std::size_t bit_index = 0;
280 for (std::size_t i = 0; i < salt_.size(); ++i)
282 compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
284 bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
287 ++inserted_element_count_;
290 template <
typename T>
294 insert(reinterpret_cast<const unsigned char*>(&t),
sizeof(T));
297 inline void insert(
const std::string& key)
299 insert(reinterpret_cast<const unsigned char*>(key.data()),key.size());
302 inline void insert(
const char* data,
const std::size_t& length)
304 insert(reinterpret_cast<const unsigned char*>(data),length);
307 template <
typename InputIterator>
308 inline void insert(
const InputIterator begin,
const InputIterator end)
310 InputIterator itr = begin;
318 inline virtual bool contains(
const unsigned char* key_begin,
const std::size_t length)
const 320 std::size_t bit_index = 0;
323 for (std::size_t i = 0; i < salt_.size(); ++i)
325 compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
327 if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
336 template <
typename T>
339 return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(
sizeof(T)));
344 return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
347 inline bool contains(
const char* data,
const std::size_t& length)
const 349 return contains(reinterpret_cast<const unsigned char*>(data),length);
352 template <
typename InputIterator>
353 inline InputIterator
contains_all(
const InputIterator begin,
const InputIterator end)
const 355 InputIterator itr = begin;
370 template <
typename InputIterator>
371 inline InputIterator
contains_none(
const InputIterator begin,
const InputIterator end)
const 373 InputIterator itr = begin;
388 inline virtual unsigned long long int size()
const 395 return inserted_element_count_;
407 return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size());
419 for (std::size_t i = 0; i < bit_table_.size(); ++i)
437 for (std::size_t i = 0; i < bit_table_.size(); ++i)
455 for (std::size_t i = 0; i < bit_table_.size(); ++i)
464 inline const cell_type*
table()
const 466 return bit_table_.data();
476 inline virtual void compute_indices(
const bloom_type& hash, std::size_t& bit_index, std::size_t& bit)
const 478 bit_index = hash % table_size_;
479 bit = bit_index % bits_per_char;
490 const unsigned int predef_salt_count = 128;
492 static const bloom_type predef_salt[predef_salt_count] =
494 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
495 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
496 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
497 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
498 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
499 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
500 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
501 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
502 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
503 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
504 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
505 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
506 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
507 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
508 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
509 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432,
510 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC,
511 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB,
512 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331,
513 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68,
514 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8,
515 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A,
516 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF,
517 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E,
518 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39,
519 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E,
520 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355,
521 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E,
522 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79,
523 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075,
524 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC,
525 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421
528 if (salt_count_ <= predef_salt_count)
531 predef_salt + salt_count_,
532 std::back_inserter(salt_));
534 for (std::size_t i = 0; i < salt_.size(); ++i)
542 salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] +
static_cast<bloom_type
>(random_seed_);
547 std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));
549 srand(static_cast<unsigned int>(random_seed_));
551 while (salt_.size() < salt_count_)
553 bloom_type current_salt =
static_cast<bloom_type
>(rand()) * static_cast<bloom_type>(rand());
555 if (0 == current_salt)
558 if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
560 salt_.push_back(current_salt);
566 inline bloom_type
hash_ap(
const unsigned char* begin, std::size_t remaining_length, bloom_type hash)
const 568 const unsigned char* itr = begin;
569 unsigned int loop = 0;
571 while (remaining_length >= 8)
573 const unsigned int& i1 = *(
reinterpret_cast<const unsigned int*
>(itr)); itr +=
sizeof(
unsigned int);
574 const unsigned int& i2 = *(
reinterpret_cast<const unsigned int*
>(itr)); itr +=
sizeof(
unsigned int);
576 hash ^= (hash << 7) ^ i1 * (hash >> 3) ^
577 (~((hash << 11) + (i2 ^ (hash >> 5))));
579 remaining_length -= 8;
582 if (remaining_length)
584 if (remaining_length >= 4)
586 const unsigned int& i = *(
reinterpret_cast<const unsigned int*
>(itr));
589 hash ^= (hash << 7) ^ i * (hash >> 3);
591 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
595 remaining_length -= 4;
597 itr +=
sizeof(
unsigned int);
600 if (remaining_length >= 2)
602 const unsigned short& i = *(
reinterpret_cast<const unsigned short*
>(itr));
605 hash ^= (hash << 7) ^ i * (hash >> 3);
607 hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
611 remaining_length -= 2;
613 itr +=
sizeof(
unsigned short);
616 if (remaining_length)
618 hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
void insert(const unsigned char *key_begin, const std::size_t &length)
void insert(const char *data, const std::size_t &length)
virtual unsigned long long int size() const
bloom_filter operator|(const bloom_filter &a, const bloom_filter &b)
std::vector< unsigned char > bit_table_
void insert(const InputIterator begin, const InputIterator end)
std::vector< bloom_type > salt_
void generate_unique_salt()
unsigned long long int element_count() const
unsigned long long int random_seed_
unsigned long long int projected_element_count_
bloom_filter(const bloom_filter &filter)
double effective_fpp() const
optimal_parameters_t optimal_parameters
double false_positive_probability
bool operator!=(const optional< T > &left, const optional< T > &right)
unsigned long long int table_size_
bool contains(const std::string &key) const
double desired_false_positive_probability_
virtual bool compute_optimal_parameters()
bloom_filter operator^(const bloom_filter &a, const bloom_filter &b)
bool operator==(const optional< T > &left, const optional< T > &right)
unsigned long long int table_size
void insert(const std::string &key)
InputIterator contains_none(const InputIterator begin, const InputIterator end) const
bloom_filter operator&(const bloom_filter &a, const bloom_filter &b)
bloom_filter(const bloom_parameters &p)
unsigned long long int minimum_size
std::vector< unsigned char > table_type
unsigned int minimum_number_of_hashes
unsigned long long int projected_element_count
unsigned long long int maximum_size
const cell_type * table() const
unsigned long long int inserted_element_count_
typename impl::filter< Filter, list<>, List >::type filter
Get a list with all elements that do not pass a filter removed.
bloom_type hash_ap(const unsigned char *begin, std::size_t remaining_length, bloom_type hash) const
virtual void compute_indices(const bloom_type &hash, std::size_t &bit_index, std::size_t &bit) const
bool contains(const T &t) const
virtual bool contains(const unsigned char *key_begin, const std::size_t length) const
bool contains(const char *data, const std::size_t &length) const
bloom_parameters(unsigned long long int projected_element_count, double false_positive_probability, unsigned long long int maximum_size)
unsigned int number_of_hashes
unsigned long long int random_seed
void copy(const path &from, const path &to)
virtual ~bloom_parameters()
unsigned int maximum_number_of_hashes
InputIterator contains_all(const InputIterator begin, const InputIterator end) const