Use intrinsics rather than inline assembly where possible
[folly.git] / folly / experimental / Select64.h
index 9c2e03f90aebc788e9cb992f36825092d8574d63..5d0fcaa5b51e69cec887dc4d6735cfc666f78d0f 100644 (file)
@@ -62,9 +62,15 @@ inline uint64_t select64(uint64_t x, uint64_t k) {
   return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)];
 }
 
+template <>
+uint64_t select64<compression::instructions::Haswell>(uint64_t x, uint64_t k)
+  FOLLY_TARGET_ATTRIBUTE("bmi,bmi2");
+
 template <>
 inline uint64_t select64<compression::instructions::Haswell>(uint64_t x,
                                                              uint64_t k) {
+#if defined(__GNUC__) && !__GNUC_PREREQ(4, 9)
+  // GCC 4.8 doesn't support the intrinsics.
   uint64_t result = uint64_t(1) << k;
 
   asm("pdep %1, %0, %0\n\t"
@@ -73,6 +79,9 @@ inline uint64_t select64<compression::instructions::Haswell>(uint64_t x,
       : "r"(x));
 
   return result;
+#else
+  return _tzcnt_u64(_pdep_u64(x, 1ULL << k));
+#endif
 }
 
 } // namespace folly