qfind_first_byte_of may suffer from global initialization order
authorPhilip Pronin <philipp@fb.com>
Fri, 25 Jan 2013 00:36:11 +0000 (16:36 -0800)
committerJordan DeLong <jdelong@fb.com>
Mon, 4 Feb 2013 17:25:53 +0000 (09:25 -0800)
Summary: ##static## handling adds ~2 more ns overhead per call (and the first call is kinda slow), but now the logic is correct now. Also inlined ##qfind_first_byte_of##.

Test Plan: unittests

Reviewed By: tudorb@fb.com

FB internal diff: D687947

folly/Range.cpp
folly/Range.h
folly/test/RangeTest.cpp

index 6149f4ed29429e8c52c2e7ead04cd6e29cc4813f..94dc4fe9c9ea89619733381214ec7f2d83647204 100644 (file)
  * limitations under the License.
  */
 
-//
 // @author Mark Rabkin (mrabkin@fb.com)
 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
-//
 
 #include "folly/Range.h"
 
-#include "folly/CpuId.h"
 #include "folly/Likely.h"
 
 namespace folly {
@@ -86,39 +83,6 @@ size_t qfind_first_byte_of_needles16(const StringPiece& haystack,
   return StringPiece::npos;
 }
 
-size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
-                                 const StringPiece& needles)
-  __attribute__ ((__target__("sse4.2"), noinline));
-
-size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
-                                 const StringPiece& needles) {
-  if (UNLIKELY(needles.empty() || haystack.empty())) {
-    return StringPiece::npos;
-  } else if (needles.size() <= 16) {
-    // we can save some unnecessary load instructions by optimizing for
-    // the common case of needles.size() <= 16
-    return qfind_first_byte_of_needles16(haystack, needles);
-  }
-
-  size_t index = haystack.size();
-  for (size_t i = 0; i < haystack.size(); i += 16) {
-    size_t b = 16;
-    auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
-    for (size_t j = 0; j < needles.size(); j += 16) {
-      auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
-      auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
-                                               arr1, haystack.size() - i, 0);
-      b = std::min<size_t>(index, b);
-    }
-    if (b < 16) {
-      return i + b;
-    }
-  };
-  return StringPiece::npos;
-}
-
-typedef decltype(qfind_first_byte_of_sse42) Type_qfind_first_byte_of;
-
 // Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis
 // of Computer Algorithms" (1974), but the best description is here:
 // http://research.swtch.com/sparse
@@ -163,6 +127,37 @@ size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
   return StringPiece::npos;
 }
 
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+                                 const StringPiece& needles)
+  __attribute__ ((__target__("sse4.2"), noinline));
+
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+                                 const StringPiece& needles) {
+  if (UNLIKELY(needles.empty() || haystack.empty())) {
+    return StringPiece::npos;
+  } else if (needles.size() <= 16) {
+    // we can save some unnecessary load instructions by optimizing for
+    // the common case of needles.size() <= 16
+    return qfind_first_byte_of_needles16(haystack, needles);
+  }
+
+  size_t index = haystack.size();
+  for (size_t i = 0; i < haystack.size(); i += 16) {
+    size_t b = 16;
+    auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
+    for (size_t j = 0; j < needles.size(); j += 16) {
+      auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
+      auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
+                                               arr1, haystack.size() - i, 0);
+      b = std::min<size_t>(index, b);
+    }
+    if (b < 16) {
+      return i + b;
+    }
+  };
+  return StringPiece::npos;
+}
+
 size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
                                  const StringPiece& needles) {
   if (UNLIKELY(needles.empty() || haystack.empty())) {
@@ -183,14 +178,5 @@ size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
   return qfind_first_byte_of_memchr(haystack, needles);
 }
 
-auto const qfind_first_byte_of_fn =
-  folly::CpuId().sse42() ? qfind_first_byte_of_sse42
-                         : qfind_first_byte_of_nosse;
-
-size_t qfind_first_byte_of(const StringPiece& haystack,
-                           const StringPiece& needles) {
-  return qfind_first_byte_of_fn(haystack, needles);
-}
-
 }  // namespace detail
 }  // namespace folly
index 196e8d3dc7d29285f4b7bf31a54eeb7e326d17c5..d69d335ec9c504eccd4ef05afb6f6b295d37dcbf 100644 (file)
@@ -32,6 +32,7 @@
 #include <boost/utility/enable_if.hpp>
 #include <boost/type_traits.hpp>
 #include <bits/c++config.h>
+#include "folly/CpuId.h"
 #include "folly/Traits.h"
 
 namespace folly {
@@ -593,8 +594,21 @@ size_t qfind(const Range<T>& haystack,
 }
 
 namespace detail {
-size_t qfind_first_byte_of(const StringPiece& haystack,
-                           const StringPiece& needles);
+
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+                                 const StringPiece& needles);
+
+size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
+                                 const StringPiece& needles);
+
+inline size_t qfind_first_byte_of(const StringPiece& haystack,
+                                  const StringPiece& needles) {
+  static auto const qfind_first_byte_of_fn =
+    folly::CpuId().sse42() ? qfind_first_byte_of_sse42
+                           : qfind_first_byte_of_nosse;
+  return qfind_first_byte_of_fn(haystack, needles);
+}
+
 } // namespace detail
 
 template <class T, class Comp>
index cb8aec8a1cf46d3671109318132156b5929d6fce..6b29725224a4f4b1330e666b0e6218d43409885b 100644 (file)
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-//
 // @author Kristina Holst (kholst@fb.com)
 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
 
@@ -25,6 +24,7 @@
 #include "folly/Range.h"
 
 namespace folly { namespace detail {
+
 // declaration of functions in Range.cpp
 size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
                                   const StringPiece& needles);
@@ -32,9 +32,7 @@ size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
 size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
                                    const StringPiece& needles);
 
-size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
-                                 const StringPiece& needles);
-}}
+}}  // namespaces
 
 using namespace folly;
 using namespace std;