/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_GROUPVARINT_H_
-#define FOLLY_GROUPVARINT_H_
+#pragma once
-#ifndef __GNUC__
-#error GroupVarint.h requires GCC
+#if !defined(__GNUC__) && !defined(_MSC_VER)
+#error GroupVarint.h requires GCC or MSVC
#endif
-#if !defined(__x86_64__) && !defined(__i386__)
-#error GroupVarint.h requires x86_64 or i386
-#endif
+#include <folly/Portability.h>
+
+#if FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 || FOLLY_A64
+#define HAVE_GROUP_VARINT 1
#include <cstdint>
#include <limits>
-#include "folly/detail/GroupVarintDetail.h"
-#include "folly/Bits.h"
-#include "folly/Range.h"
+#include <folly/detail/GroupVarintDetail.h>
+#include <folly/Bits.h>
+#include <folly/Range.h>
+#include <folly/portability/Builtins.h>
#include <glog/logging.h>
-#ifdef __SSSE3__
-#include <x86intrin.h>
+#if FOLLY_SSE >= 3
+#include <nmmintrin.h>
namespace folly {
namespace detail {
-extern const __m128i groupVarintSSEMasks[];
+alignas(16) extern const uint64_t groupVarintSSEMasks[];
} // namespace detail
} // namespace folly
#endif
p += k2+1;
size_t k3 = b3key(k);
*d = loadUnaligned<uint32_t>(p) & kMask[k3];
- p += k3+1;
+ // p += k3+1;
return end;
}
return decode_simple(p, dest, dest+1, dest+2, dest+3);
}
-#ifdef __SSSE3__
+#if FOLLY_SSE >= 3
+ /**
+ * Just like the non-SSSE3 decode below, but with the additional constraint
+ * that we must be able to read at least 17 bytes from the input pointer, p.
+ */
static const char* decode(const char* p, uint32_t* dest) {
uint8_t key = p[0];
__m128i val = _mm_loadu_si128((const __m128i*)(p+1));
- __m128i mask = detail::groupVarintSSEMasks[key];
+ __m128i mask =
+ _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]);
__m128i r = _mm_shuffle_epi8(val, mask);
_mm_storeu_si128((__m128i*)dest, r);
return p + detail::groupVarintLengths[key];
}
+ /**
+ * Just like decode_simple, but with the additional constraint that
+ * we must be able to read at least 17 bytes from the input pointer, p.
+ */
static const char* decode(const char* p, uint32_t* a, uint32_t* b,
uint32_t* c, uint32_t* d) {
uint8_t key = p[0];
__m128i val = _mm_loadu_si128((const __m128i*)(p+1));
- __m128i mask = detail::groupVarintSSEMasks[key];
+ __m128i mask =
+ _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]);
__m128i r = _mm_shuffle_epi8(val, mask);
// Extracting 32 bits at a time out of an XMM register is a SSE4 feature
-#ifdef __SSE4__
+#if FOLLY_SSE >= 4
*a = _mm_extract_epi32(r, 0);
*b = _mm_extract_epi32(r, 1);
*c = _mm_extract_epi32(r, 2);
typedef GroupVarint<T> Base;
typedef T type;
- GroupVarintDecoder() { }
+ GroupVarintDecoder() = default;
explicit GroupVarintDecoder(StringPiece data,
size_t maxCount = (size_t)-1)
: rrest_(data.end()),
p_(data.data()),
end_(data.end()),
+ limit_(end_),
pos_(0),
count_(0),
remaining_(maxCount) {
}
- void reset(StringPiece data, size_t maxCount=(size_t)-1) {
+ void reset(StringPiece data, size_t maxCount = (size_t)-1) {
rrest_ = data.end();
p_ = data.data();
end_ = data.end();
+ limit_ = end_;
pos_ = 0;
count_ = 0;
remaining_ = maxCount;
// The best way to ensure this is to ensure that data has at least
// Base::kMaxSize - 1 bytes readable *after* the end, otherwise we'll copy
// into a temporary buffer.
- if (rem < Base::kMaxSize) {
+ if (limit_ - p_ < Base::kMaxSize) {
memcpy(tmp_, p_, rem);
p_ = tmp_;
end_ = p_ + rem;
+ limit_ = tmp_ + sizeof(tmp_);
}
pos_ = 0;
const char* n = Base::decode(p_, buf_);
const char* rrest_;
const char* p_;
const char* end_;
- char tmp_[Base::kMaxSize];
+ const char* limit_;
+ char tmp_[2 * Base::kMaxSize];
type buf_[Base::kGroupSize];
size_t pos_;
size_t count_;
} // namespace folly
-#endif /* FOLLY_GROUPVARINT_H_ */
-
+#endif /* FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 */