Use intrinsics rather than inline assembly where possible
[folly.git] / folly / experimental / Select64.h
index bee6970b06d2b41c0b8d68cf6195269a732bf0a8..5d0fcaa5b51e69cec887dc4d6735cfc666f78d0f 100644 (file)
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef FOLLY_EXPERIMENTAL_SELECT64_H
-#define FOLLY_EXPERIMENTAL_SELECT64_H
+#pragma once
 
 #include <glog/logging.h>
 
@@ -63,9 +62,15 @@ inline uint64_t select64(uint64_t x, uint64_t k) {
   return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)];
 }
 
+template <>
+uint64_t select64<compression::instructions::Haswell>(uint64_t x, uint64_t k)
+  FOLLY_TARGET_ATTRIBUTE("bmi,bmi2");
+
 template <>
 inline uint64_t select64<compression::instructions::Haswell>(uint64_t x,
                                                              uint64_t k) {
+#if defined(__GNUC__) && !__GNUC_PREREQ(4, 9)
+  // GCC 4.8 doesn't support the intrinsics.
   uint64_t result = uint64_t(1) << k;
 
   asm("pdep %1, %0, %0\n\t"
@@ -74,8 +79,9 @@ inline uint64_t select64<compression::instructions::Haswell>(uint64_t x,
       : "r"(x));
 
   return result;
+#else
+  return _tzcnt_u64(_pdep_u64(x, 1ULL << k));
+#endif
 }
 
 } // namespace folly
-
-#endif