From 69b46104294b1cccaeafc2c88a67e9c7f9543962 Mon Sep 17 00:00:00 2001 From: Peter Griess Date: Sat, 21 Sep 2013 11:40:58 -0500 Subject: [PATCH] Be more careful when using SSE intrinsics. Summary: - Check FOLLY_HAVE_EMMINTRIN_H before #including it in Range.cpp. - The version of Clang that Mac OS X uses for Xcode is based on GCC 4.2. It turns out that this does not have a full compliment of SSE2 builtins (e.g. __builtin_ia32_loaddqu is undefined, but __builtin_ia32_storedqu is). Work around this omission by not compiling the specialized code on this platform. @override-unit-failures Ignore unit test failures as two tests are just sitting in the postponed state forever. I investigated all the other unit test failures and they just look like flakey tests. Test Plan: - fbconfig -r folly && fbmake runtests - ./configure && make check on Ubuntu/FC/Mac Reviewed By: delong.j@fb.com FB internal diff: D999130 --- folly/Range.cpp | 12 ++++++++---- folly/Range.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/folly/Range.cpp b/folly/Range.cpp index c343a362..a36f350b 100644 --- a/folly/Range.cpp +++ b/folly/Range.cpp @@ -19,7 +19,9 @@ #include "folly/Range.h" +#if FOLLY_HAVE_EMMINTRIN_H #include // __v16qi +#endif #include namespace folly { @@ -66,7 +68,9 @@ static_assert(kMinPageSize >= 16, (reinterpret_cast(addr) / kMinPageSize) -#if FOLLY_HAVE_EMMINTRIN_H +// Earlier versions of GCC (for example, Clang on Mac OS X, which is based on +// GCC 4.2) do not have a full compliment of SSE builtins. +#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6) inline size_t nextAlignedIndex(const char* arr) { auto firstPossible = reinterpret_cast(arr) + 1; return 1 + // add 1 because the index starts at 'arr' @@ -119,7 +123,7 @@ size_t qfind_first_byte_of_needles16(const StringPiece& haystack, } return StringPiece::npos; } -#endif // FOLLY_HAVE_EMMINTRIN_H +#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+ // Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis // of Computer Algorithms" (1974), but the best description is here: @@ -165,7 +169,7 @@ size_t qfind_first_byte_of_byteset(const StringPiece& haystack, return StringPiece::npos; } -#if FOLLY_HAVE_EMMINTRIN_H +#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6) template inline size_t scanHaystackBlock(const StringPiece& haystack, @@ -254,7 +258,7 @@ size_t qfind_first_byte_of_sse42(const StringPiece& haystack, return StringPiece::npos; } -#endif // FOLLY_HAVE_EMMINTRIN_H +#endif // FOLLY_HAVE_EMMINTRIN_H && GCC 4.6+ size_t qfind_first_byte_of_nosse(const StringPiece& haystack, const StringPiece& needles) { diff --git a/folly/Range.h b/folly/Range.h index cabd9078..6925c6d4 100644 --- a/folly/Range.h +++ b/folly/Range.h @@ -628,7 +628,7 @@ namespace detail { size_t qfind_first_byte_of_nosse(const StringPiece& haystack, const StringPiece& needles); -#if FOLLY_HAVE_EMMINTRIN_H +#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6) size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles); -- 2.34.1