/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_BASE_HASH_H_
-#define FOLLY_BASE_HASH_H_
+#pragma once
+#include <cstdint>
#include <cstring>
-#include <stdint.h>
+#include <limits>
#include <string>
+#include <tuple>
+#include <type_traits>
#include <utility>
-#include "folly/SpookyHash.h"
+#include <folly/Bits.h>
+#include <folly/functional/ApplyTuple.h>
+#include <folly/hash/SpookyHashV1.h>
+#include <folly/hash/SpookyHashV2.h>
/*
* Various hashing functions.
namespace folly { namespace hash {
// This is a general-purpose way to create a single hash from multiple
-// hashable objects. It relies on std::hash<T> being available for all
-// relevant types and combines those hashes in an order-dependent way
-// to yield a new hash.
+// hashable objects. hash_combine_generic takes a class Hasher implementing
+// hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash.
+// hash_combine_generic hashes each argument and combines those hashes in
+// an order-dependent way to yield a new hash.
-// Never used, but gcc demands it.
-inline size_t hash_combine() {
- return 0;
-}
// This is the Hash128to64 function from Google's cityhash (available
// under the MIT License). We use it to reduce multiple 64 bit hashes
// into a single hash.
-inline size_t hash_128_to_64(const size_t upper, const size_t lower) {
+inline uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) {
// Murmur-inspired hashing.
- const size_t kMul = 0x9ddfea08eb382d69ULL;
- size_t a = (lower ^ upper) * kMul;
+ const uint64_t kMul = 0x9ddfea08eb382d69ULL;
+ uint64_t a = (lower ^ upper) * kMul;
a ^= (a >> 47);
- size_t b = (upper ^ a) * kMul;
+ uint64_t b = (upper ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
-template <typename T, typename... Ts>
-size_t hash_combine(const T& t, const Ts&... ts) {
- size_t seed = std::hash<T>()(t);
+// Never used, but gcc demands it.
+template <class Hasher>
+inline size_t hash_combine_generic() {
+ return 0;
+}
+
+template <
+ class Iter,
+ class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>>
+uint64_t hash_range(Iter begin,
+ Iter end,
+ uint64_t hash = 0,
+ Hash hasher = Hash()) {
+ for (; begin != end; ++begin) {
+ hash = hash_128_to_64(hash, hasher(*begin));
+ }
+ return hash;
+}
+
+inline uint32_t twang_32from64(uint64_t key);
+
+template <class Hasher, typename T, typename... Ts>
+size_t hash_combine_generic(const T& t, const Ts&... ts) {
+ size_t seed = Hasher::hash(t);
if (sizeof...(ts) == 0) {
return seed;
}
- size_t remainder = hash_combine(ts...);
- return hash_128_to_64(seed, remainder);
+ size_t remainder = hash_combine_generic<Hasher>(ts...);
+ /* static */ if (sizeof(size_t) == sizeof(uint32_t)) {
+ return twang_32from64((uint64_t(seed) << 32) | remainder);
+ } else {
+ return static_cast<size_t>(hash_128_to_64(seed, remainder));
+ }
+}
+
+// Simply uses std::hash to hash. Note that std::hash is not guaranteed
+// to be a very good hash function; provided std::hash doesn't collide on
+// the individual inputs, you are fine, but that won't be true for, say,
+// strings or pairs
+class StdHasher {
+ public:
+ template <typename T>
+ static size_t hash(const T& t) {
+ return std::hash<T>()(t);
+ }
+};
+
+template <typename T, typename... Ts>
+size_t hash_combine(const T& t, const Ts&... ts) {
+ return hash_combine_generic<StdHasher>(t, ts...);
}
//////////////////////////////////////////////////////////////////////
* http://www.isthe.com/chongo/tech/comp/fnv/
*/
-const uint32_t FNV_32_HASH_START = 216613626UL;
+const uint32_t FNV_32_HASH_START = 2166136261UL;
const uint64_t FNV_64_HASH_START = 14695981039346656037ULL;
+const uint64_t FNVA_64_HASH_START = 14695981039346656037ULL;
+
+inline uint32_t fnv32(const char* buf, uint32_t hash = FNV_32_HASH_START) {
+ // forcing signed char, since other platforms can use unsigned
+ const signed char* s = reinterpret_cast<const signed char*>(buf);
-inline uint32_t fnv32(const char* s,
- uint32_t hash = FNV_32_HASH_START) {
for (; *s; ++s) {
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
inline uint32_t fnv32_buf(const void* buf,
- int n,
+ size_t n,
uint32_t hash = FNV_32_HASH_START) {
- const char* char_buf = reinterpret_cast<const char*>(buf);
+ // forcing signed char, since other platforms can use unsigned
+ const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
- for (int i = 0; i < n; ++i) {
+ for (size_t i = 0; i < n; ++i) {
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
hash ^= char_buf[i];
}
inline uint32_t fnv32(const std::string& str,
- uint64_t hash = FNV_32_HASH_START) {
+ uint32_t hash = FNV_32_HASH_START) {
return fnv32_buf(str.data(), str.size(), hash);
}
-inline uint64_t fnv64(const char* s,
- uint64_t hash = FNV_64_HASH_START) {
+inline uint64_t fnv64(const char* buf, uint64_t hash = FNV_64_HASH_START) {
+ // forcing signed char, since other platforms can use unsigned
+ const signed char* s = reinterpret_cast<const signed char*>(buf);
+
for (; *s; ++s) {
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
(hash << 8) + (hash << 40);
}
inline uint64_t fnv64_buf(const void* buf,
- int n,
+ size_t n,
uint64_t hash = FNV_64_HASH_START) {
- const char* char_buf = reinterpret_cast<const char*>(buf);
+ // forcing signed char, since other platforms can use unsigned
+ const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
- for (int i = 0; i < n; ++i) {
+ for (size_t i = 0; i < n; ++i) {
hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
(hash << 8) + (hash << 40);
hash ^= char_buf[i];
return fnv64_buf(str.data(), str.size(), hash);
}
+inline uint64_t fnva64_buf(const void* buf,
+ size_t n,
+ uint64_t hash = FNVA_64_HASH_START) {
+ const uint8_t* char_buf = reinterpret_cast<const uint8_t*>(buf);
+
+ for (size_t i = 0; i < n; ++i) {
+ hash ^= char_buf[i];
+ hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
+ (hash << 8) + (hash << 40);
+ }
+ return hash;
+}
+
+inline uint64_t fnva64(const std::string& str,
+ uint64_t hash = FNVA_64_HASH_START) {
+ return fnva64_buf(str.data(), str.size(), hash);
+}
+
/*
* Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html
*/
-#define get16bits(d) (*((const uint16_t*) (d)))
+#define get16bits(d) folly::loadUnaligned<uint16_t>(d)
-inline uint32_t hsieh_hash32_buf(const void* buf, int len) {
- const char* s = reinterpret_cast<const char*>(buf);
- uint32_t hash = len;
+inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) {
+ // forcing signed char, since other platforms can use unsigned
+ const unsigned char* s = reinterpret_cast<const unsigned char*>(buf);
+ uint32_t hash = static_cast<uint32_t>(len);
uint32_t tmp;
- int rem;
+ size_t rem;
- if (len <= 0 || buf == 0) {
+ if (len <= 0 || buf == nullptr) {
return 0;
}
} // namespace hash
-template<class Key>
+namespace detail {
+struct integral_hasher {
+ template <typename I>
+ size_t operator()(I const& i) const {
+ static_assert(sizeof(I) <= 8, "input type is too wide");
+ if (sizeof(I) <= 4) { // the branch taken is known at compile time
+ auto const i32 = static_cast<int32_t>(i); // impl accident: sign-extends
+ auto const u32 = static_cast<uint32_t>(i32);
+ return static_cast<size_t>(hash::jenkins_rev_mix32(u32));
+ } else {
+ auto const u64 = static_cast<uint64_t>(i);
+ return static_cast<size_t>(hash::twang_mix64(u64));
+ }
+ }
+};
+} // namespace detail
+
+template <class Key, class Enable = void>
struct hasher;
-template<> struct hasher<int32_t> {
- size_t operator()(int32_t key) const {
- return hash::jenkins_rev_mix32(uint32_t(key));
+struct Hash {
+ template <class T>
+ size_t operator()(const T& v) const {
+ return hasher<T>()(v);
+ }
+
+ template <class T, class... Ts>
+ size_t operator()(const T& t, const Ts&... ts) const {
+ return hash::hash_128_to_64((*this)(t), (*this)(ts...));
}
};
-template<> struct hasher<uint32_t> {
- size_t operator()(uint32_t key) const {
- return hash::jenkins_rev_mix32(key);
+template <>
+struct hasher<bool> {
+ size_t operator()(bool key) const {
+ // Make sure that all the output bits depend on the input.
+ return key ? std::numeric_limits<size_t>::max() : 0;
}
};
-template<> struct hasher<int64_t> {
- size_t operator()(int64_t key) const {
- return hash::twang_mix64(uint64_t(key));
+template <>
+struct hasher<unsigned long long> : detail::integral_hasher {};
+
+template <>
+struct hasher<signed long long> : detail::integral_hasher {};
+
+template <>
+struct hasher<unsigned long> : detail::integral_hasher {};
+
+template <>
+struct hasher<signed long> : detail::integral_hasher {};
+
+template <>
+struct hasher<unsigned int> : detail::integral_hasher {};
+
+template <>
+struct hasher<signed int> : detail::integral_hasher {};
+
+template <>
+struct hasher<unsigned short> : detail::integral_hasher {};
+
+template <>
+struct hasher<signed short> : detail::integral_hasher {};
+
+template <>
+struct hasher<unsigned char> : detail::integral_hasher {};
+
+template <>
+struct hasher<signed char> : detail::integral_hasher {};
+
+template <> // char is a different type from both signed char and unsigned char
+struct hasher<char> : detail::integral_hasher {};
+
+template <> struct hasher<std::string> {
+ size_t operator()(const std::string& key) const {
+ return static_cast<size_t>(
+ hash::SpookyHashV2::Hash64(key.data(), key.size(), 0));
}
};
-template<> struct hasher<uint64_t> {
- size_t operator()(uint64_t key) const {
- return hash::twang_mix64(key);
+template <class T>
+struct hasher<T, typename std::enable_if<std::is_enum<T>::value, void>::type> {
+ size_t operator()(T key) const {
+ return Hash()(static_cast<typename std::underlying_type<T>::type>(key));
+ }
+};
+
+template <class T1, class T2>
+struct hasher<std::pair<T1, T2>> {
+ size_t operator()(const std::pair<T1, T2>& key) const {
+ return Hash()(key.first, key.second);
+ }
+};
+
+template <typename... Ts>
+struct hasher<std::tuple<Ts...>> {
+ size_t operator() (const std::tuple<Ts...>& key) const {
+ return applyTuple(Hash(), key);
+ }
+};
+
+// recursion
+template <size_t index, typename... Ts>
+struct TupleHasher {
+ size_t operator()(std::tuple<Ts...> const& key) const {
+ return hash::hash_combine(
+ TupleHasher<index - 1, Ts...>()(key),
+ std::get<index>(key));
+ }
+};
+
+// base
+template <typename... Ts>
+struct TupleHasher<0, Ts...> {
+ size_t operator()(std::tuple<Ts...> const& key) const {
+ // we could do std::hash here directly, but hash_combine hides all the
+ // ugly templating implicitly
+ return hash::hash_combine(std::get<0>(key));
}
};
// Hash function for pairs. Requires default hash functions for both
// items in the pair.
template <typename T1, typename T2>
- class hash<std::pair<T1, T2> > {
- public:
+ struct hash<std::pair<T1, T2> > {
+ public:
size_t operator()(const std::pair<T1, T2>& x) const {
return folly::hash::hash_combine(x.first, x.second);
}
};
-} // namespace std
-#endif
+ // Hash function for tuples. Requires default hash functions for all types.
+ template <typename... Ts>
+ struct hash<std::tuple<Ts...>> {
+ size_t operator()(std::tuple<Ts...> const& key) const {
+ folly::TupleHasher<
+ std::tuple_size<std::tuple<Ts...>>::value - 1, // start index
+ Ts...> hasher;
+
+ return hasher(key);
+ }
+ };
+} // namespace std