X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FGroupVarint.h;h=0e8f5edd74dc89544f54e3385cb1ffbd24e62b21;hp=dc26919547da4f9fe5217408ef2c1e3ed9849a04;hb=43d3a315bbd54cb187021897b566ffb53290a44b;hpb=5d43d2ab506d7b02a54ef5a5e1d662a1911d1aa2 diff --git a/folly/GroupVarint.h b/folly/GroupVarint.h index dc269195..0e8f5edd 100644 --- a/folly/GroupVarint.h +++ b/folly/GroupVarint.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,28 +14,30 @@ * limitations under the License. */ -#ifndef FOLLY_GROUPVARINT_H_ -#define FOLLY_GROUPVARINT_H_ +#pragma once -#ifndef __GNUC__ -#error GroupVarint.h requires GCC +#if !defined(__GNUC__) && !defined(_MSC_VER) +#error GroupVarint.h requires GCC or MSVC #endif -#if defined(__x86_64__) || defined(__i386__) +#include + +#if FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 || FOLLY_A64 #define HAVE_GROUP_VARINT 1 #include #include -#include "folly/detail/GroupVarintDetail.h" -#include "folly/Bits.h" -#include "folly/Range.h" +#include +#include +#include +#include #include -#ifdef __SSSE3__ -#include +#if FOLLY_SSE >= 3 +#include namespace folly { namespace detail { -extern const __m128i groupVarintSSEMasks[]; +alignas(16) extern const uint64_t groupVarintSSEMasks[]; } // namespace detail } // namespace folly #endif @@ -174,7 +176,7 @@ class GroupVarint : public detail::GroupVarintBase { p += k2+1; size_t k3 = b3key(k); *d = loadUnaligned(p) & kMask[k3]; - p += k3+1; + // p += k3+1; return end; } @@ -186,25 +188,35 @@ class GroupVarint : public detail::GroupVarintBase { return decode_simple(p, dest, dest+1, dest+2, dest+3); } -#ifdef __SSSE3__ +#if FOLLY_SSE >= 3 + /** + * Just like the non-SSSE3 decode below, but with the additional constraint + * that we must be able to read at least 17 bytes from the input pointer, p. + */ static const char* decode(const char* p, uint32_t* dest) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); _mm_storeu_si128((__m128i*)dest, r); return p + detail::groupVarintLengths[key]; } + /** + * Just like decode_simple, but with the additional constraint that + * we must be able to read at least 17 bytes from the input pointer, p. + */ static const char* decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); // Extracting 32 bits at a time out of an XMM register is a SSE4 feature -#ifdef __SSE4__ +#if FOLLY_SSE >= 4 *a = _mm_extract_epi32(r, 0); *b = _mm_extract_epi32(r, 1); *c = _mm_extract_epi32(r, 2); @@ -502,22 +514,24 @@ class GroupVarintDecoder { typedef GroupVarint Base; typedef T type; - GroupVarintDecoder() { } + GroupVarintDecoder() = default; explicit GroupVarintDecoder(StringPiece data, size_t maxCount = (size_t)-1) : rrest_(data.end()), p_(data.data()), end_(data.end()), + limit_(end_), pos_(0), count_(0), remaining_(maxCount) { } - void reset(StringPiece data, size_t maxCount=(size_t)-1) { + void reset(StringPiece data, size_t maxCount = (size_t)-1) { rrest_ = data.end(); p_ = data.data(); end_ = data.end(); + limit_ = end_; pos_ = 0; count_ = 0; remaining_ = maxCount; @@ -540,10 +554,11 @@ class GroupVarintDecoder { // The best way to ensure this is to ensure that data has at least // Base::kMaxSize - 1 bytes readable *after* the end, otherwise we'll copy // into a temporary buffer. - if (rem < Base::kMaxSize) { + if (limit_ - p_ < Base::kMaxSize) { memcpy(tmp_, p_, rem); p_ = tmp_; end_ = p_ + rem; + limit_ = tmp_ + sizeof(tmp_); } pos_ = 0; const char* n = Base::decode(p_, buf_); @@ -591,7 +606,8 @@ class GroupVarintDecoder { const char* rrest_; const char* p_; const char* end_; - char tmp_[Base::kMaxSize]; + const char* limit_; + char tmp_[2 * Base::kMaxSize]; type buf_[Base::kGroupSize]; size_t pos_; size_t count_; @@ -603,6 +619,4 @@ typedef GroupVarintDecoder GroupVarint64Decoder; } // namespace folly -#endif /* defined(__x86_64__) || defined(__i386__) */ -#endif /* FOLLY_GROUPVARINT_H_ */ - +#endif /* FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 */