From 37335efb4659f1389db145a485f8d663d343261f Mon Sep 17 00:00:00 2001 From: Yedidya Feldblum Date: Tue, 24 Oct 2017 14:12:03 -0700 Subject: [PATCH] Move folly/Hash.h to folly/hash/, leaving a shim Summary: [Folly] Move `folly/Hash.h` to `folly/hash/`, leaving a shim. Reviewed By: Orvid Differential Revision: D6132955 fbshipit-source-id: dc789e9c6daa28116be6a5d83c3cfbb40e247114 --- folly/Hash.h | 503 +---------------- folly/Makefile.am | 1 + folly/compression/test/CompressionTest.cpp | 2 +- .../test/ConcurrentHashMapTest.cpp | 2 +- folly/experimental/test/StringKeyedTest.cpp | 2 +- folly/hash/Hash.h | 519 ++++++++++++++++++ folly/hash/test/ChecksumTest.cpp | 2 +- folly/{ => hash}/test/HashBenchmark.cpp | 2 +- folly/{ => hash}/test/HashTest.cpp | 2 +- folly/test/AtomicHashArrayTest.cpp | 2 +- folly/test/ConcurrentSkipListBenchmark.cpp | 2 +- folly/test/ThreadCachedIntTest.cpp | 2 +- 12 files changed, 531 insertions(+), 510 deletions(-) create mode 100644 folly/hash/Hash.h rename folly/{ => hash}/test/HashBenchmark.cpp (99%) rename folly/{ => hash}/test/HashTest.cpp (99%) diff --git a/folly/Hash.h b/folly/Hash.h index 5253975a..916dfba8 100644 --- a/folly/Hash.h +++ b/folly/Hash.h @@ -16,504 +16,5 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * Various hashing functions. - */ - -namespace folly { namespace hash { - -// This is a general-purpose way to create a single hash from multiple -// hashable objects. hash_combine_generic takes a class Hasher implementing -// hash; hash_combine uses a default hasher StdHasher that uses std::hash. -// hash_combine_generic hashes each argument and combines those hashes in -// an order-dependent way to yield a new hash. - - -// This is the Hash128to64 function from Google's cityhash (available -// under the MIT License). We use it to reduce multiple 64 bit hashes -// into a single hash. -inline uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) { - // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (lower ^ upper) * kMul; - a ^= (a >> 47); - uint64_t b = (upper ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; -} - -// Never used, but gcc demands it. -template -inline size_t hash_combine_generic() { - return 0; -} - -template < - class Iter, - class Hash = std::hash::value_type>> -uint64_t hash_range(Iter begin, - Iter end, - uint64_t hash = 0, - Hash hasher = Hash()) { - for (; begin != end; ++begin) { - hash = hash_128_to_64(hash, hasher(*begin)); - } - return hash; -} - -inline uint32_t twang_32from64(uint64_t key); - -template -size_t hash_combine_generic(const T& t, const Ts&... ts) { - size_t seed = Hasher::hash(t); - if (sizeof...(ts) == 0) { - return seed; - } - size_t remainder = hash_combine_generic(ts...); - /* static */ if (sizeof(size_t) == sizeof(uint32_t)) { - return twang_32from64((uint64_t(seed) << 32) | remainder); - } else { - return static_cast(hash_128_to_64(seed, remainder)); - } -} - -// Simply uses std::hash to hash. Note that std::hash is not guaranteed -// to be a very good hash function; provided std::hash doesn't collide on -// the individual inputs, you are fine, but that won't be true for, say, -// strings or pairs -class StdHasher { - public: - template - static size_t hash(const T& t) { - return std::hash()(t); - } -}; - -template -size_t hash_combine(const T& t, const Ts&... ts) { - return hash_combine_generic(t, ts...); -} - -////////////////////////////////////////////////////////////////////// - -/* - * Thomas Wang 64 bit mix hash function - */ - -inline uint64_t twang_mix64(uint64_t key) { - key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1; - key = key ^ (key >> 24); - key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8) - key = key ^ (key >> 14); - key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4) - key = key ^ (key >> 28); - key = key + (key << 31); // key *= 1 + (1 << 31) - return key; -} - -/* - * Inverse of twang_mix64 - * - * Note that twang_unmix64 is significantly slower than twang_mix64. - */ - -inline uint64_t twang_unmix64(uint64_t key) { - // See the comments in jenkins_rev_unmix32 for an explanation as to how this - // was generated - key *= 4611686016279904257U; - key ^= (key >> 28) ^ (key >> 56); - key *= 14933078535860113213U; - key ^= (key >> 14) ^ (key >> 28) ^ (key >> 42) ^ (key >> 56); - key *= 15244667743933553977U; - key ^= (key >> 24) ^ (key >> 48); - key = (key + 1) * 9223367638806167551U; - return key; -} - -/* - * Thomas Wang downscaling hash function - */ - -inline uint32_t twang_32from64(uint64_t key) { - key = (~key) + (key << 18); - key = key ^ (key >> 31); - key = key * 21; - key = key ^ (key >> 11); - key = key + (key << 6); - key = key ^ (key >> 22); - return (uint32_t) key; -} - -/* - * Robert Jenkins' reversible 32 bit mix hash function - */ - -inline uint32_t jenkins_rev_mix32(uint32_t key) { - key += (key << 12); // key *= (1 + (1 << 12)) - key ^= (key >> 22); - key += (key << 4); // key *= (1 + (1 << 4)) - key ^= (key >> 9); - key += (key << 10); // key *= (1 + (1 << 10)) - key ^= (key >> 2); - // key *= (1 + (1 << 7)) * (1 + (1 << 12)) - key += (key << 7); - key += (key << 12); - return key; -} - -/* - * Inverse of jenkins_rev_mix32 - * - * Note that jenkinks_rev_unmix32 is significantly slower than - * jenkins_rev_mix32. - */ - -inline uint32_t jenkins_rev_unmix32(uint32_t key) { - // These are the modular multiplicative inverses (in Z_2^32) of the - // multiplication factors in jenkins_rev_mix32, in reverse order. They were - // computed using the Extended Euclidean algorithm, see - // http://en.wikipedia.org/wiki/Modular_multiplicative_inverse - key *= 2364026753U; - - // The inverse of a ^= (a >> n) is - // b = a - // for (int i = n; i < 32; i += n) { - // b ^= (a >> i); - // } - key ^= - (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^ - (key >> 10) ^ (key >> 12) ^ (key >> 14) ^ (key >> 16) ^ - (key >> 18) ^ (key >> 20) ^ (key >> 22) ^ (key >> 24) ^ - (key >> 26) ^ (key >> 28) ^ (key >> 30); - key *= 3222273025U; - key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27); - key *= 4042322161U; - key ^= (key >> 22); - key *= 16773121U; - return key; -} - -/* - * Fowler / Noll / Vo (FNV) Hash - * http://www.isthe.com/chongo/tech/comp/fnv/ - */ - -const uint32_t FNV_32_HASH_START = 2166136261UL; -const uint64_t FNV_64_HASH_START = 14695981039346656037ULL; -const uint64_t FNVA_64_HASH_START = 14695981039346656037ULL; - -inline uint32_t fnv32(const char* buf, uint32_t hash = FNV_32_HASH_START) { - // forcing signed char, since other platforms can use unsigned - const signed char* s = reinterpret_cast(buf); - - for (; *s; ++s) { - hash += (hash << 1) + (hash << 4) + (hash << 7) + - (hash << 8) + (hash << 24); - hash ^= *s; - } - return hash; -} - -inline uint32_t fnv32_buf(const void* buf, - size_t n, - uint32_t hash = FNV_32_HASH_START) { - // forcing signed char, since other platforms can use unsigned - const signed char* char_buf = reinterpret_cast(buf); - - for (size_t i = 0; i < n; ++i) { - hash += (hash << 1) + (hash << 4) + (hash << 7) + - (hash << 8) + (hash << 24); - hash ^= char_buf[i]; - } - - return hash; -} - -inline uint32_t fnv32(const std::string& str, - uint32_t hash = FNV_32_HASH_START) { - return fnv32_buf(str.data(), str.size(), hash); -} - -inline uint64_t fnv64(const char* buf, uint64_t hash = FNV_64_HASH_START) { - // forcing signed char, since other platforms can use unsigned - const signed char* s = reinterpret_cast(buf); - - for (; *s; ++s) { - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + - (hash << 8) + (hash << 40); - hash ^= *s; - } - return hash; -} - -inline uint64_t fnv64_buf(const void* buf, - size_t n, - uint64_t hash = FNV_64_HASH_START) { - // forcing signed char, since other platforms can use unsigned - const signed char* char_buf = reinterpret_cast(buf); - - for (size_t i = 0; i < n; ++i) { - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + - (hash << 8) + (hash << 40); - hash ^= char_buf[i]; - } - return hash; -} - -inline uint64_t fnv64(const std::string& str, - uint64_t hash = FNV_64_HASH_START) { - return fnv64_buf(str.data(), str.size(), hash); -} - -inline uint64_t fnva64_buf(const void* buf, - size_t n, - uint64_t hash = FNVA_64_HASH_START) { - const uint8_t* char_buf = reinterpret_cast(buf); - - for (size_t i = 0; i < n; ++i) { - hash ^= char_buf[i]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + - (hash << 8) + (hash << 40); - } - return hash; -} - -inline uint64_t fnva64(const std::string& str, - uint64_t hash = FNVA_64_HASH_START) { - return fnva64_buf(str.data(), str.size(), hash); -} - -/* - * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html - */ - -#define get16bits(d) folly::loadUnaligned(d) - -inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) { - // forcing signed char, since other platforms can use unsigned - const unsigned char* s = reinterpret_cast(buf); - uint32_t hash = static_cast(len); - uint32_t tmp; - size_t rem; - - if (len <= 0 || buf == nullptr) { - return 0; - } - - rem = len & 3; - len >>= 2; - - /* Main loop */ - for (;len > 0; len--) { - hash += get16bits (s); - tmp = (get16bits (s+2) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - s += 2*sizeof (uint16_t); - hash += hash >> 11; - } - - /* Handle end cases */ - switch (rem) { - case 3: - hash += get16bits(s); - hash ^= hash << 16; - hash ^= s[sizeof (uint16_t)] << 18; - hash += hash >> 11; - break; - case 2: - hash += get16bits(s); - hash ^= hash << 11; - hash += hash >> 17; - break; - case 1: - hash += *s; - hash ^= hash << 10; - hash += hash >> 1; - } - - /* Force "avalanching" of final 127 bits */ - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; - - return hash; -}; - -#undef get16bits - -inline uint32_t hsieh_hash32(const char* s) { - return hsieh_hash32_buf(s, std::strlen(s)); -} - -inline uint32_t hsieh_hash32_str(const std::string& str) { - return hsieh_hash32_buf(str.data(), str.size()); -} - -////////////////////////////////////////////////////////////////////// - -} // namespace hash - -namespace detail { -struct integral_hasher { - template - size_t operator()(I const& i) const { - static_assert(sizeof(I) <= 8, "input type is too wide"); - if (sizeof(I) <= 4) { // the branch taken is known at compile time - auto const i32 = static_cast(i); // impl accident: sign-extends - auto const u32 = static_cast(i32); - return static_cast(hash::jenkins_rev_mix32(u32)); - } else { - auto const u64 = static_cast(i); - return static_cast(hash::twang_mix64(u64)); - } - } -}; -} // namespace detail - -template -struct hasher; - -struct Hash { - template - size_t operator()(const T& v) const { - return hasher()(v); - } - - template - size_t operator()(const T& t, const Ts&... ts) const { - return hash::hash_128_to_64((*this)(t), (*this)(ts...)); - } -}; - -template <> -struct hasher { - size_t operator()(bool key) const { - // Make sure that all the output bits depend on the input. - return key ? std::numeric_limits::max() : 0; - } -}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> -struct hasher : detail::integral_hasher {}; - -template <> // char is a different type from both signed char and unsigned char -struct hasher : detail::integral_hasher {}; - -template <> struct hasher { - size_t operator()(const std::string& key) const { - return static_cast( - hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); - } -}; - -template -struct hasher::value, void>::type> { - size_t operator()(T key) const { - return Hash()(static_cast::type>(key)); - } -}; - -template -struct hasher> { - size_t operator()(const std::pair& key) const { - return Hash()(key.first, key.second); - } -}; - -template -struct hasher> { - size_t operator() (const std::tuple& key) const { - return applyTuple(Hash(), key); - } -}; - -// recursion -template -struct TupleHasher { - size_t operator()(std::tuple const& key) const { - return hash::hash_combine( - TupleHasher()(key), - std::get(key)); - } -}; - -// base -template -struct TupleHasher<0, Ts...> { - size_t operator()(std::tuple const& key) const { - // we could do std::hash here directly, but hash_combine hides all the - // ugly templating implicitly - return hash::hash_combine(std::get<0>(key)); - } -}; - -} // namespace folly - -// Custom hash functions. -namespace std { - // Hash function for pairs. Requires default hash functions for both - // items in the pair. - template - struct hash > { - public: - size_t operator()(const std::pair& x) const { - return folly::hash::hash_combine(x.first, x.second); - } - }; - - // Hash function for tuples. Requires default hash functions for all types. - template - struct hash> { - size_t operator()(std::tuple const& key) const { - folly::TupleHasher< - std::tuple_size>::value - 1, // start index - Ts...> hasher; - - return hasher(key); - } - }; -} // namespace std +// shims: +#include diff --git a/folly/Makefile.am b/folly/Makefile.am index 7d28d8c0..bb515379 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -228,6 +228,7 @@ nobase_follyinclude_HEADERS = \ futures/test/TestExecutor.h \ hash/Checksum.h \ hash/detail/ChecksumDetail.h \ + hash/Hash.h \ hash/SpookyHashV1.h \ hash/SpookyHashV2.h \ gen/Base.h \ diff --git a/folly/compression/test/CompressionTest.cpp b/folly/compression/test/CompressionTest.cpp index 4bbf5027..b0a2b22b 100644 --- a/folly/compression/test/CompressionTest.cpp +++ b/folly/compression/test/CompressionTest.cpp @@ -27,10 +27,10 @@ #include #include -#include #include #include #include +#include #include #include diff --git a/folly/concurrency/test/ConcurrentHashMapTest.cpp b/folly/concurrency/test/ConcurrentHashMapTest.cpp index cf283c41..32b3d8ae 100644 --- a/folly/concurrency/test/ConcurrentHashMapTest.cpp +++ b/folly/concurrency/test/ConcurrentHashMapTest.cpp @@ -17,8 +17,8 @@ #include #include -#include #include +#include #include #include diff --git a/folly/experimental/test/StringKeyedTest.cpp b/folly/experimental/test/StringKeyedTest.cpp index b6744459..072500b5 100644 --- a/folly/experimental/test/StringKeyedTest.cpp +++ b/folly/experimental/test/StringKeyedTest.cpp @@ -25,8 +25,8 @@ #include -#include #include +#include #include #include diff --git a/folly/hash/Hash.h b/folly/hash/Hash.h new file mode 100644 index 00000000..5253975a --- /dev/null +++ b/folly/hash/Hash.h @@ -0,0 +1,519 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Various hashing functions. + */ + +namespace folly { namespace hash { + +// This is a general-purpose way to create a single hash from multiple +// hashable objects. hash_combine_generic takes a class Hasher implementing +// hash; hash_combine uses a default hasher StdHasher that uses std::hash. +// hash_combine_generic hashes each argument and combines those hashes in +// an order-dependent way to yield a new hash. + + +// This is the Hash128to64 function from Google's cityhash (available +// under the MIT License). We use it to reduce multiple 64 bit hashes +// into a single hash. +inline uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (lower ^ upper) * kMul; + a ^= (a >> 47); + uint64_t b = (upper ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +// Never used, but gcc demands it. +template +inline size_t hash_combine_generic() { + return 0; +} + +template < + class Iter, + class Hash = std::hash::value_type>> +uint64_t hash_range(Iter begin, + Iter end, + uint64_t hash = 0, + Hash hasher = Hash()) { + for (; begin != end; ++begin) { + hash = hash_128_to_64(hash, hasher(*begin)); + } + return hash; +} + +inline uint32_t twang_32from64(uint64_t key); + +template +size_t hash_combine_generic(const T& t, const Ts&... ts) { + size_t seed = Hasher::hash(t); + if (sizeof...(ts) == 0) { + return seed; + } + size_t remainder = hash_combine_generic(ts...); + /* static */ if (sizeof(size_t) == sizeof(uint32_t)) { + return twang_32from64((uint64_t(seed) << 32) | remainder); + } else { + return static_cast(hash_128_to_64(seed, remainder)); + } +} + +// Simply uses std::hash to hash. Note that std::hash is not guaranteed +// to be a very good hash function; provided std::hash doesn't collide on +// the individual inputs, you are fine, but that won't be true for, say, +// strings or pairs +class StdHasher { + public: + template + static size_t hash(const T& t) { + return std::hash()(t); + } +}; + +template +size_t hash_combine(const T& t, const Ts&... ts) { + return hash_combine_generic(t, ts...); +} + +////////////////////////////////////////////////////////////////////// + +/* + * Thomas Wang 64 bit mix hash function + */ + +inline uint64_t twang_mix64(uint64_t key) { + key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1; + key = key ^ (key >> 24); + key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8) + key = key ^ (key >> 14); + key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4) + key = key ^ (key >> 28); + key = key + (key << 31); // key *= 1 + (1 << 31) + return key; +} + +/* + * Inverse of twang_mix64 + * + * Note that twang_unmix64 is significantly slower than twang_mix64. + */ + +inline uint64_t twang_unmix64(uint64_t key) { + // See the comments in jenkins_rev_unmix32 for an explanation as to how this + // was generated + key *= 4611686016279904257U; + key ^= (key >> 28) ^ (key >> 56); + key *= 14933078535860113213U; + key ^= (key >> 14) ^ (key >> 28) ^ (key >> 42) ^ (key >> 56); + key *= 15244667743933553977U; + key ^= (key >> 24) ^ (key >> 48); + key = (key + 1) * 9223367638806167551U; + return key; +} + +/* + * Thomas Wang downscaling hash function + */ + +inline uint32_t twang_32from64(uint64_t key) { + key = (~key) + (key << 18); + key = key ^ (key >> 31); + key = key * 21; + key = key ^ (key >> 11); + key = key + (key << 6); + key = key ^ (key >> 22); + return (uint32_t) key; +} + +/* + * Robert Jenkins' reversible 32 bit mix hash function + */ + +inline uint32_t jenkins_rev_mix32(uint32_t key) { + key += (key << 12); // key *= (1 + (1 << 12)) + key ^= (key >> 22); + key += (key << 4); // key *= (1 + (1 << 4)) + key ^= (key >> 9); + key += (key << 10); // key *= (1 + (1 << 10)) + key ^= (key >> 2); + // key *= (1 + (1 << 7)) * (1 + (1 << 12)) + key += (key << 7); + key += (key << 12); + return key; +} + +/* + * Inverse of jenkins_rev_mix32 + * + * Note that jenkinks_rev_unmix32 is significantly slower than + * jenkins_rev_mix32. + */ + +inline uint32_t jenkins_rev_unmix32(uint32_t key) { + // These are the modular multiplicative inverses (in Z_2^32) of the + // multiplication factors in jenkins_rev_mix32, in reverse order. They were + // computed using the Extended Euclidean algorithm, see + // http://en.wikipedia.org/wiki/Modular_multiplicative_inverse + key *= 2364026753U; + + // The inverse of a ^= (a >> n) is + // b = a + // for (int i = n; i < 32; i += n) { + // b ^= (a >> i); + // } + key ^= + (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^ + (key >> 10) ^ (key >> 12) ^ (key >> 14) ^ (key >> 16) ^ + (key >> 18) ^ (key >> 20) ^ (key >> 22) ^ (key >> 24) ^ + (key >> 26) ^ (key >> 28) ^ (key >> 30); + key *= 3222273025U; + key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27); + key *= 4042322161U; + key ^= (key >> 22); + key *= 16773121U; + return key; +} + +/* + * Fowler / Noll / Vo (FNV) Hash + * http://www.isthe.com/chongo/tech/comp/fnv/ + */ + +const uint32_t FNV_32_HASH_START = 2166136261UL; +const uint64_t FNV_64_HASH_START = 14695981039346656037ULL; +const uint64_t FNVA_64_HASH_START = 14695981039346656037ULL; + +inline uint32_t fnv32(const char* buf, uint32_t hash = FNV_32_HASH_START) { + // forcing signed char, since other platforms can use unsigned + const signed char* s = reinterpret_cast(buf); + + for (; *s; ++s) { + hash += (hash << 1) + (hash << 4) + (hash << 7) + + (hash << 8) + (hash << 24); + hash ^= *s; + } + return hash; +} + +inline uint32_t fnv32_buf(const void* buf, + size_t n, + uint32_t hash = FNV_32_HASH_START) { + // forcing signed char, since other platforms can use unsigned + const signed char* char_buf = reinterpret_cast(buf); + + for (size_t i = 0; i < n; ++i) { + hash += (hash << 1) + (hash << 4) + (hash << 7) + + (hash << 8) + (hash << 24); + hash ^= char_buf[i]; + } + + return hash; +} + +inline uint32_t fnv32(const std::string& str, + uint32_t hash = FNV_32_HASH_START) { + return fnv32_buf(str.data(), str.size(), hash); +} + +inline uint64_t fnv64(const char* buf, uint64_t hash = FNV_64_HASH_START) { + // forcing signed char, since other platforms can use unsigned + const signed char* s = reinterpret_cast(buf); + + for (; *s; ++s) { + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + + (hash << 8) + (hash << 40); + hash ^= *s; + } + return hash; +} + +inline uint64_t fnv64_buf(const void* buf, + size_t n, + uint64_t hash = FNV_64_HASH_START) { + // forcing signed char, since other platforms can use unsigned + const signed char* char_buf = reinterpret_cast(buf); + + for (size_t i = 0; i < n; ++i) { + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + + (hash << 8) + (hash << 40); + hash ^= char_buf[i]; + } + return hash; +} + +inline uint64_t fnv64(const std::string& str, + uint64_t hash = FNV_64_HASH_START) { + return fnv64_buf(str.data(), str.size(), hash); +} + +inline uint64_t fnva64_buf(const void* buf, + size_t n, + uint64_t hash = FNVA_64_HASH_START) { + const uint8_t* char_buf = reinterpret_cast(buf); + + for (size_t i = 0; i < n; ++i) { + hash ^= char_buf[i]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + + (hash << 8) + (hash << 40); + } + return hash; +} + +inline uint64_t fnva64(const std::string& str, + uint64_t hash = FNVA_64_HASH_START) { + return fnva64_buf(str.data(), str.size(), hash); +} + +/* + * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html + */ + +#define get16bits(d) folly::loadUnaligned(d) + +inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) { + // forcing signed char, since other platforms can use unsigned + const unsigned char* s = reinterpret_cast(buf); + uint32_t hash = static_cast(len); + uint32_t tmp; + size_t rem; + + if (len <= 0 || buf == nullptr) { + return 0; + } + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (s); + tmp = (get16bits (s+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + s += 2*sizeof (uint16_t); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: + hash += get16bits(s); + hash ^= hash << 16; + hash ^= s[sizeof (uint16_t)] << 18; + hash += hash >> 11; + break; + case 2: + hash += get16bits(s); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: + hash += *s; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +}; + +#undef get16bits + +inline uint32_t hsieh_hash32(const char* s) { + return hsieh_hash32_buf(s, std::strlen(s)); +} + +inline uint32_t hsieh_hash32_str(const std::string& str) { + return hsieh_hash32_buf(str.data(), str.size()); +} + +////////////////////////////////////////////////////////////////////// + +} // namespace hash + +namespace detail { +struct integral_hasher { + template + size_t operator()(I const& i) const { + static_assert(sizeof(I) <= 8, "input type is too wide"); + if (sizeof(I) <= 4) { // the branch taken is known at compile time + auto const i32 = static_cast(i); // impl accident: sign-extends + auto const u32 = static_cast(i32); + return static_cast(hash::jenkins_rev_mix32(u32)); + } else { + auto const u64 = static_cast(i); + return static_cast(hash::twang_mix64(u64)); + } + } +}; +} // namespace detail + +template +struct hasher; + +struct Hash { + template + size_t operator()(const T& v) const { + return hasher()(v); + } + + template + size_t operator()(const T& t, const Ts&... ts) const { + return hash::hash_128_to_64((*this)(t), (*this)(ts...)); + } +}; + +template <> +struct hasher { + size_t operator()(bool key) const { + // Make sure that all the output bits depend on the input. + return key ? std::numeric_limits::max() : 0; + } +}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> +struct hasher : detail::integral_hasher {}; + +template <> // char is a different type from both signed char and unsigned char +struct hasher : detail::integral_hasher {}; + +template <> struct hasher { + size_t operator()(const std::string& key) const { + return static_cast( + hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); + } +}; + +template +struct hasher::value, void>::type> { + size_t operator()(T key) const { + return Hash()(static_cast::type>(key)); + } +}; + +template +struct hasher> { + size_t operator()(const std::pair& key) const { + return Hash()(key.first, key.second); + } +}; + +template +struct hasher> { + size_t operator() (const std::tuple& key) const { + return applyTuple(Hash(), key); + } +}; + +// recursion +template +struct TupleHasher { + size_t operator()(std::tuple const& key) const { + return hash::hash_combine( + TupleHasher()(key), + std::get(key)); + } +}; + +// base +template +struct TupleHasher<0, Ts...> { + size_t operator()(std::tuple const& key) const { + // we could do std::hash here directly, but hash_combine hides all the + // ugly templating implicitly + return hash::hash_combine(std::get<0>(key)); + } +}; + +} // namespace folly + +// Custom hash functions. +namespace std { + // Hash function for pairs. Requires default hash functions for both + // items in the pair. + template + struct hash > { + public: + size_t operator()(const std::pair& x) const { + return folly::hash::hash_combine(x.first, x.second); + } + }; + + // Hash function for tuples. Requires default hash functions for all types. + template + struct hash> { + size_t operator()(std::tuple const& key) const { + folly::TupleHasher< + std::tuple_size>::value - 1, // start index + Ts...> hasher; + + return hasher(key); + } + }; +} // namespace std diff --git a/folly/hash/test/ChecksumTest.cpp b/folly/hash/test/ChecksumTest.cpp index e5e613c6..05ac6e59 100644 --- a/folly/hash/test/ChecksumTest.cpp +++ b/folly/hash/test/ChecksumTest.cpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/folly/test/HashBenchmark.cpp b/folly/hash/test/HashBenchmark.cpp similarity index 99% rename from folly/test/HashBenchmark.cpp rename to folly/hash/test/HashBenchmark.cpp index 8c586aa9..bb49a52b 100644 --- a/folly/test/HashBenchmark.cpp +++ b/folly/hash/test/HashBenchmark.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/folly/test/HashTest.cpp b/folly/hash/test/HashTest.cpp similarity index 99% rename from folly/test/HashTest.cpp rename to folly/hash/test/HashTest.cpp index 9521aee7..68627002 100644 --- a/folly/test/HashTest.cpp +++ b/folly/hash/test/HashTest.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include #include diff --git a/folly/test/AtomicHashArrayTest.cpp b/folly/test/AtomicHashArrayTest.cpp index 86d8ae5a..2ffb701b 100644 --- a/folly/test/AtomicHashArrayTest.cpp +++ b/folly/test/AtomicHashArrayTest.cpp @@ -21,8 +21,8 @@ #include #include -#include #include +#include #include #include diff --git a/folly/test/ConcurrentSkipListBenchmark.cpp b/folly/test/ConcurrentSkipListBenchmark.cpp index 51f9a9f7..89de974b 100644 --- a/folly/test/ConcurrentSkipListBenchmark.cpp +++ b/folly/test/ConcurrentSkipListBenchmark.cpp @@ -24,8 +24,8 @@ #include #include -#include #include +#include #include #include diff --git a/folly/test/ThreadCachedIntTest.cpp b/folly/test/ThreadCachedIntTest.cpp index f4a77d3f..8709abf6 100644 --- a/folly/test/ThreadCachedIntTest.cpp +++ b/folly/test/ThreadCachedIntTest.cpp @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include -- 2.34.1