X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FGroupVarint.h;h=0e8f5edd74dc89544f54e3385cb1ffbd24e62b21;hb=782325fde921b859ef6abec57eaee880d7d0d938;hp=5c506a2f1b34897840474bf33e07d90d7329f4d1;hpb=779cbf35527d33c4073a3c8929e284e9a16b0e50;p=folly.git diff --git a/folly/GroupVarint.h b/folly/GroupVarint.h index 5c506a2f..0e8f5edd 100644 --- a/folly/GroupVarint.h +++ b/folly/GroupVarint.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,30 +14,30 @@ * limitations under the License. */ -#ifndef FOLLY_GROUPVARINT_H_ -#define FOLLY_GROUPVARINT_H_ +#pragma once -#ifndef __GNUC__ -#error GroupVarint.h requires GCC +#if !defined(__GNUC__) && !defined(_MSC_VER) +#error GroupVarint.h requires GCC or MSVC #endif -#include "folly/Portability.h" +#include -#if FOLLY_X64 || defined(__i386__) +#if FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 || FOLLY_A64 #define HAVE_GROUP_VARINT 1 #include #include -#include "folly/detail/GroupVarintDetail.h" -#include "folly/Bits.h" -#include "folly/Range.h" +#include +#include +#include +#include #include -#ifdef __SSSE3__ -#include +#if FOLLY_SSE >= 3 +#include namespace folly { namespace detail { -extern const __m128i groupVarintSSEMasks[]; +alignas(16) extern const uint64_t groupVarintSSEMasks[]; } // namespace detail } // namespace folly #endif @@ -176,7 +176,7 @@ class GroupVarint : public detail::GroupVarintBase { p += k2+1; size_t k3 = b3key(k); *d = loadUnaligned(p) & kMask[k3]; - p += k3+1; + // p += k3+1; return end; } @@ -188,25 +188,35 @@ class GroupVarint : public detail::GroupVarintBase { return decode_simple(p, dest, dest+1, dest+2, dest+3); } -#ifdef __SSSE3__ +#if FOLLY_SSE >= 3 + /** + * Just like the non-SSSE3 decode below, but with the additional constraint + * that we must be able to read at least 17 bytes from the input pointer, p. + */ static const char* decode(const char* p, uint32_t* dest) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); _mm_storeu_si128((__m128i*)dest, r); return p + detail::groupVarintLengths[key]; } + /** + * Just like decode_simple, but with the additional constraint that + * we must be able to read at least 17 bytes from the input pointer, p. + */ static const char* decode(const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); // Extracting 32 bits at a time out of an XMM register is a SSE4 feature -#ifdef __SSE4__ +#if FOLLY_SSE >= 4 *a = _mm_extract_epi32(r, 0); *b = _mm_extract_epi32(r, 1); *c = _mm_extract_epi32(r, 2); @@ -504,7 +514,7 @@ class GroupVarintDecoder { typedef GroupVarint Base; typedef T type; - GroupVarintDecoder() { } + GroupVarintDecoder() = default; explicit GroupVarintDecoder(StringPiece data, size_t maxCount = (size_t)-1) @@ -609,6 +619,4 @@ typedef GroupVarintDecoder GroupVarint64Decoder; } // namespace folly -#endif /* FOLLY_X64 || defined(__i386__) */ -#endif /* FOLLY_GROUPVARINT_H_ */ - +#endif /* FOLLY_X64 || defined(__i386__) || FOLLY_PPC64 */