-size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
- const StringPiece& needles)
- __attribute__ ((__target__("sse4.2"), noinline));
-
-size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
- const StringPiece& needles) {
- if (UNLIKELY(needles.empty() || haystack.empty())) {
- return StringPiece::npos;
- } else if (needles.size() <= 16) {
- // we can save some unnecessary load instructions by optimizing for
- // the common case of needles.size() <= 16
- return qfind_first_byte_of_needles16(haystack, needles);
- }
-
- size_t index = haystack.size();
- for (size_t i = 0; i < haystack.size(); i += 16) {
- size_t b = 16;
- auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
- for (size_t j = 0; j < needles.size(); j += 16) {
- auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
- auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
- arr1, haystack.size() - i, 0);
- b = std::min<size_t>(index, b);
- }
- if (b < 16) {
- return i + b;
- }
- };
- return StringPiece::npos;
-}
-
-typedef decltype(qfind_first_byte_of_sse42) Type_qfind_first_byte_of;
-