|
| 1 | +/** |
| 2 | + * MIT License |
| 3 | + * |
| 4 | + * Copyright (c) 2017 Tessil |
| 5 | + * |
| 6 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | + * of this software and associated documentation files (the "Software"), to deal |
| 8 | + * in the Software without restriction, including without limitation the rights |
| 9 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 10 | + * copies of the Software, and to permit persons to whom the Software is |
| 11 | + * furnished to do so, subject to the following conditions: |
| 12 | + * |
| 13 | + * The above copyright notice and this permission notice shall be included in all |
| 14 | + * copies or substantial portions of the Software. |
| 15 | + * |
| 16 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | + * SOFTWARE. |
| 23 | + */ |
| 24 | +#ifndef TSL_ROBIN_GROWTH_POLICY_H |
| 25 | +#define TSL_ROBIN_GROWTH_POLICY_H |
| 26 | + |
| 27 | + |
| 28 | +#include <algorithm> |
| 29 | +#include <array> |
| 30 | +#include <climits> |
| 31 | +#include <cmath> |
| 32 | +#include <cstddef> |
| 33 | +#include <iterator> |
| 34 | +#include <limits> |
| 35 | +#include <ratio> |
| 36 | +#include <stdexcept> |
| 37 | + |
| 38 | + |
| 39 | +namespace tsl { |
| 40 | +namespace rh { |
| 41 | + |
| 42 | +/** |
| 43 | + * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows |
| 44 | + * the table to use a mask operation instead of a modulo operation to map a hash to a bucket. |
| 45 | + * |
| 46 | + * GrowthFactor must be a power of two >= 2. |
| 47 | + */ |
| 48 | +template<std::size_t GrowthFactor> |
| 49 | +class power_of_two_growth_policy { |
| 50 | +public: |
| 51 | + /** |
| 52 | + * Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter. |
| 53 | + * This number is a minimum, the policy may update this value with a higher value if needed (but not lower). |
| 54 | + */ |
| 55 | + power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) { |
| 56 | + if(min_bucket_count_in_out > max_bucket_count()) { |
| 57 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 58 | + } |
| 59 | + |
| 60 | + static_assert(MIN_BUCKETS_SIZE > 0, "MIN_BUCKETS_SIZE must be > 0."); |
| 61 | + const std::size_t min_bucket_count = MIN_BUCKETS_SIZE; |
| 62 | + |
| 63 | + min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out); |
| 64 | + min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out); |
| 65 | + m_mask = min_bucket_count_in_out - 1; |
| 66 | + } |
| 67 | + |
| 68 | + /** |
| 69 | + * Return the bucket [0, bucket_count()) to which the hash belongs. |
| 70 | + */ |
| 71 | + std::size_t bucket_for_hash(std::size_t hash) const noexcept { |
| 72 | + return hash & m_mask; |
| 73 | + } |
| 74 | + |
| 75 | + /** |
| 76 | + * Return the bucket count to use when the bucket array grows on rehash. |
| 77 | + */ |
| 78 | + std::size_t next_bucket_count() const { |
| 79 | + if((m_mask + 1) > max_bucket_count() / GrowthFactor) { |
| 80 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 81 | + } |
| 82 | + |
| 83 | + return (m_mask + 1) * GrowthFactor; |
| 84 | + } |
| 85 | + |
| 86 | + /** |
| 87 | + * Return the maximum number of buckets supported by the policy. |
| 88 | + */ |
| 89 | + std::size_t max_bucket_count() const { |
| 90 | + // Largest power of two. |
| 91 | + return (std::numeric_limits<std::size_t>::max() / 2) + 1; |
| 92 | + } |
| 93 | + |
| 94 | +private: |
| 95 | + static std::size_t round_up_to_power_of_two(std::size_t value) { |
| 96 | + if(is_power_of_two(value)) { |
| 97 | + return value; |
| 98 | + } |
| 99 | + |
| 100 | + if(value == 0) { |
| 101 | + return 1; |
| 102 | + } |
| 103 | + |
| 104 | + --value; |
| 105 | + for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) { |
| 106 | + value |= value >> i; |
| 107 | + } |
| 108 | + |
| 109 | + return value + 1; |
| 110 | + } |
| 111 | + |
| 112 | + static constexpr bool is_power_of_two(std::size_t value) { |
| 113 | + return value != 0 && (value & (value - 1)) == 0; |
| 114 | + } |
| 115 | + |
| 116 | +protected: |
| 117 | + static const std::size_t MIN_BUCKETS_SIZE = 2; |
| 118 | + static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2."); |
| 119 | + |
| 120 | + std::size_t m_mask; |
| 121 | +}; |
| 122 | + |
| 123 | + |
| 124 | +/** |
| 125 | + * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash |
| 126 | + * to a bucket. Slower but it can be usefull if you want a slower growth. |
| 127 | + */ |
| 128 | +template<class GrowthFactor = std::ratio<3, 2>> |
| 129 | +class mod_growth_policy { |
| 130 | +public: |
| 131 | + mod_growth_policy(std::size_t& min_bucket_count_in_out) { |
| 132 | + if(min_bucket_count_in_out > max_bucket_count()) { |
| 133 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 134 | + } |
| 135 | + |
| 136 | + static_assert(MIN_BUCKETS_SIZE > 0, "MIN_BUCKETS_SIZE must be > 0."); |
| 137 | + const std::size_t min_bucket_count = MIN_BUCKETS_SIZE; |
| 138 | + |
| 139 | + min_bucket_count_in_out = std::max(min_bucket_count, min_bucket_count_in_out); |
| 140 | + m_bucket_count = min_bucket_count_in_out; |
| 141 | + } |
| 142 | + |
| 143 | + std::size_t bucket_for_hash(std::size_t hash) const noexcept { |
| 144 | + return hash % m_bucket_count; |
| 145 | + } |
| 146 | + |
| 147 | + std::size_t next_bucket_count() const { |
| 148 | + if(m_bucket_count == max_bucket_count()) { |
| 149 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 150 | + } |
| 151 | + |
| 152 | + const double next_bucket_count = std::ceil(double(m_bucket_count) * REHASH_SIZE_MULTIPLICATION_FACTOR); |
| 153 | + if(!std::isnormal(next_bucket_count)) { |
| 154 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 155 | + } |
| 156 | + |
| 157 | + if(next_bucket_count > double(max_bucket_count())) { |
| 158 | + return max_bucket_count(); |
| 159 | + } |
| 160 | + else { |
| 161 | + return std::size_t(next_bucket_count); |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + std::size_t max_bucket_count() const { |
| 166 | + return MAX_BUCKET_COUNT; |
| 167 | + } |
| 168 | + |
| 169 | +private: |
| 170 | + static const std::size_t MIN_BUCKETS_SIZE = 2; |
| 171 | + static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den; |
| 172 | + static const std::size_t MAX_BUCKET_COUNT = |
| 173 | + std::size_t(double( |
| 174 | + std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR |
| 175 | + )); |
| 176 | + |
| 177 | + static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1."); |
| 178 | + |
| 179 | + std::size_t m_bucket_count; |
| 180 | +}; |
| 181 | + |
| 182 | + |
| 183 | + |
| 184 | +namespace detail { |
| 185 | + |
| 186 | +static constexpr const std::array<std::size_t, 39> PRIMES = {{ |
| 187 | + 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul, |
| 188 | + 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, |
| 189 | + 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, |
| 190 | + 1610612741ul, 3221225473ul, 4294967291ul |
| 191 | +}}; |
| 192 | + |
| 193 | +template<unsigned int IPrime> |
| 194 | +static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; } |
| 195 | + |
| 196 | +// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the |
| 197 | +// compiler can optimize the modulo code better with a constant known at the compilation. |
| 198 | +static constexpr const std::array<std::size_t(*)(std::size_t), 39> MOD_PRIME = {{ |
| 199 | + &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>, |
| 200 | + &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, |
| 201 | + &mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, |
| 202 | + &mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38> |
| 203 | +}}; |
| 204 | + |
| 205 | +} |
| 206 | + |
| 207 | +/** |
| 208 | + * Grow the hash table by using prime numbers as bucket count. Slower than tsl::rh::power_of_two_growth_policy in |
| 209 | + * general but will probably distribute the values around better in the buckets with a poor hash function. |
| 210 | + * |
| 211 | + * To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers. |
| 212 | + * |
| 213 | + * With a switch the code would look like: |
| 214 | + * \code |
| 215 | + * switch(iprime) { // iprime is the current prime of the hash table |
| 216 | + * case 0: hash % 5ul; |
| 217 | + * break; |
| 218 | + * case 1: hash % 17ul; |
| 219 | + * break; |
| 220 | + * case 2: hash % 29ul; |
| 221 | + * break; |
| 222 | + * ... |
| 223 | + * } |
| 224 | + * \endcode |
| 225 | + * |
| 226 | + * Due to the constant variable in the modulo the compiler is able to optimize the operation |
| 227 | + * by a series of multiplications, substractions and shifts. |
| 228 | + * |
| 229 | + * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement. |
| 230 | + */ |
| 231 | +class prime_growth_policy { |
| 232 | +public: |
| 233 | + prime_growth_policy(std::size_t& min_bucket_count_in_out) { |
| 234 | + auto it_prime = std::lower_bound(detail::PRIMES.begin(), |
| 235 | + detail::PRIMES.end(), min_bucket_count_in_out); |
| 236 | + if(it_prime == detail::PRIMES.end()) { |
| 237 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 238 | + } |
| 239 | + |
| 240 | + m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime)); |
| 241 | + min_bucket_count_in_out = *it_prime; |
| 242 | + } |
| 243 | + |
| 244 | + std::size_t bucket_for_hash(std::size_t hash) const noexcept { |
| 245 | + return detail::MOD_PRIME[m_iprime](hash); |
| 246 | + } |
| 247 | + |
| 248 | + std::size_t next_bucket_count() const { |
| 249 | + if(m_iprime + 1 >= detail::PRIMES.size()) { |
| 250 | + throw std::length_error("The hash table exceeds its maxmimum size."); |
| 251 | + } |
| 252 | + |
| 253 | + return detail::PRIMES[m_iprime + 1]; |
| 254 | + } |
| 255 | + |
| 256 | + std::size_t max_bucket_count() const { |
| 257 | + return detail::PRIMES.back(); |
| 258 | + } |
| 259 | + |
| 260 | +private: |
| 261 | + unsigned int m_iprime; |
| 262 | + |
| 263 | + static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(), |
| 264 | + "The type of m_iprime is not big enough."); |
| 265 | +}; |
| 266 | + |
| 267 | +} |
| 268 | +} |
| 269 | + |
| 270 | +#endif |
0 commit comments