From: Yinghai Lu Date: Fri, 16 Dec 2016 16:46:45 +0000 (-0800) Subject: Add BEXTR instruction support X-Git-Tag: v2016.12.19.00~6 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=9660cdb0d15424e509f3a89cce9a321f15b2d48c;p=folly.git Add BEXTR instruction support Summary: BEXTR is an instruction in BMI set, which extract given number of bits from a 64 bit register starting from a given position. The position and size of extracted bits are packed into a one 64bit integer. Reviewed By: Orvid Differential Revision: D4316516 fbshipit-source-id: 3f752388763b0e26c506d7d49cf6c05cf28271c0 --- diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index cc768d55..417e954c 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -26,7 +26,9 @@ #include #include -namespace folly { namespace compression { namespace instructions { +namespace folly { +namespace compression { +namespace instructions { // NOTE: It's recommended to compile EF coding with -msse4.2, starting // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit @@ -54,6 +56,22 @@ struct Default { static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) { return value & (value - 1); } + + // Extract `length` bits starting from `start` from value. Only bits [0:63] + // will be extracted. All higher order bits in the + // result will be zeroed. If no bits are extracted, return 0. + static FOLLY_ALWAYS_INLINE uint64_t + bextr(uint64_t value, uint32_t start, uint32_t length) { + if (start > 63) { + return 0ULL; + } + if (start + length > 64) { + length = 64 - start; + } + + return (value >> start) & + ((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL)); + } }; struct Nehalem : public Default { @@ -62,11 +80,11 @@ struct Nehalem : public Default { } static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) { - // POPCNT is supported starting with Intel Nehalem, AMD K10. +// POPCNT is supported starting with Intel Nehalem, AMD K10. #if defined(__GNUC__) || defined(__clang__) // GCC and Clang won't inline the intrinsics. uint64_t result; - asm ("popcntq %1, %0" : "=r" (result) : "r" (value)); + asm("popcntq %1, %0" : "=r"(result) : "r"(value)); return result; #else return uint64_t(_mm_popcnt_u64(value)); @@ -80,17 +98,35 @@ struct Haswell : public Nehalem { } static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) { - // BMI1 is supported starting with Intel Haswell, AMD Piledriver. - // BLSR combines two instuctions into one and reduces register pressure. +// BMI1 is supported starting with Intel Haswell, AMD Piledriver. +// BLSR combines two instuctions into one and reduces register pressure. #if defined(__GNUC__) || defined(__clang__) // GCC and Clang won't inline the intrinsics. uint64_t result; - asm ("blsrq %1, %0" : "=r" (result) : "r" (value)); + asm("blsrq %1, %0" : "=r"(result) : "r"(value)); return result; #else return _blsr_u64(value); #endif } -}; -}}} // namespaces + static FOLLY_ALWAYS_INLINE uint64_t + bextr(uint64_t value, uint32_t start, uint32_t length) { +#if defined(__GNUC__) || defined(__clang__) + // GCC and Clang won't inline the intrinsics. + // Encode parameters in `pattern` where `pattern[0:7]` is `start` and + // `pattern[8:15]` is `length`. + // Ref: Intel Advanced Vector Extensions Programming Reference + uint64_t pattern = start & 0xFF; + pattern = pattern | ((length & 0xFF) << 8); + uint64_t result; + asm("bextrq %2, %1, %0" : "=r"(result) : "r"(value), "r"(pattern)); + return result; +#else + return _bextr_u64(value, start, length); +#endif + } +}; +} +} +} // namespaces diff --git a/folly/experimental/test/InstructionsTest.cpp b/folly/experimental/test/InstructionsTest.cpp new file mode 100644 index 00000000..c383eab9 --- /dev/null +++ b/folly/experimental/test/InstructionsTest.cpp @@ -0,0 +1,95 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +using namespace folly; +using namespace folly::compression::instructions; + +TEST(Instructions, BitExtraction) { + uint64_t value = + 0b11111110'11011100'10111010'10011000'01110110'01010100'00110010'00010000; + + if (not Haswell::supported()) { + return; + } + + LOG(INFO) << "Testing Haswell on supported machine"; + + // Extract 4 bits a time, starting from bit 0 + uint64_t expected = 0; + for (int i = 0; i < 64 - 4; i += 4) { + EXPECT_EQ(expected, Default::bextr(value, i, 4)); + EXPECT_EQ(expected, Haswell::bextr(value, i, 4)); + ++expected; + } + + // Extract 8 bits a time, starting from bit 1 + uint64_t value2 = value << 1; + uint64_t lower = 0; + uint64_t upper = 1; + for (int i = 1; i < 64 - 8; i += 4) { + expected = (lower & 0xF) | ((upper & 0xF) << 4); + EXPECT_EQ(expected, Default::bextr(value2, i, 8)); + EXPECT_EQ(expected, Haswell::bextr(value2, i, 8)); + ++lower; + ++upper; + } + + // Extract 16 bits a time, starting from bit 2 + uint64_t value3 = value << 2; + uint64_t part0 = 0; + uint64_t part1 = 1; + uint64_t part2 = 2; + uint64_t part3 = 3; + for (int i = 2; i < 64 - 16; i += 4) { + expected = (part0 & 0xF) | ((part1 & 0xF) << 4) | ((part2 & 0xF) << 8) | + ((part3 & 0xF) << 12); + EXPECT_EQ(expected, Default::bextr(value3, i, 16)); + EXPECT_EQ(expected, Haswell::bextr(value3, i, 16)); + ++part0; + ++part1; + ++part2; + ++part3; + } + + // Extract 32 bits + expected = 0b1011'1010'1001'1000'0111'0110'0101'0100; + EXPECT_EQ(expected, Default::bextr(value, 16, 32)); + EXPECT_EQ(expected, Haswell::bextr(value, 16, 32)); + + // Extract all 64 bits + EXPECT_EQ(value, Default::bextr(value, 0, 64)); + EXPECT_EQ(value, Haswell::bextr(value, 0, 64)); + + // Extract 0 bits + EXPECT_EQ(0, Default::bextr(value, 4, 0)); + EXPECT_EQ(0, Haswell::bextr(value, 4, 0)); + + // Make sure only up to 63-th bits will be extracted + EXPECT_EQ(0b1111, Default::bextr(value, 60, 5)); + EXPECT_EQ(0b1111, Haswell::bextr(value, 60, 5)); + + EXPECT_EQ(0, Default::bextr(value, 64, 8)); + EXPECT_EQ(0, Haswell::bextr(value, 64, 8)); + + EXPECT_EQ(value, Default::bextr(value, 0, 65)); + EXPECT_EQ(value, Haswell::bextr(value, 0, 65)); +}