From c9520b48ae8100ae5fac48d067c04c8e9a86ac99 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 18 Feb 2015 07:13:48 +0000 Subject: [PATCH] [x86] Rewrite the byte shift detection to not use boolean variables to track state. I didn't like this in the code review because the pattern tends to be error prone, but I didn't see a clear way to rewrite it. Turns out that there were bugs here, I found them when fuzz testing our shuffle lowering for correctness on x86. The core of the problem is that we need to consistently test all our preconditions for the same directionality of shift and the same input vector. Instead, formulate this as two predicates (one doesn't depend on the input in any way), pass things like the directionality and input vector as inputs, and loop over the alternatives. This fixes a pattern of very rare miscompiles coming out of this code. Turned up roughly 4 out of every 1 million v8 shuffles in my fuzz testing. The new code is over half a million test runs with no failures yet. I've also fuzzed every other function in the lowering code with over 3.5 million test cases and not discovered any other miscompiles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 43 +++++++++++------------ test/CodeGen/X86/vector-shuffle-128-v8.ll | 32 +++++++++++++++++ 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cd343192038..6255cce0d2b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7888,40 +7888,39 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1, // [ 5, 6, 7, zz, zz, zz, zz, zz] // [ -1, 5, 6, 7, zz, zz, zz, zz] // [ 1, 2, -1, -1, -1, -1, zz, zz] - auto MatchByteShift = [&](int Shift) -> SDValue { - bool MatchLeft = true, MatchRight = true; - for (int l = 0; l < NumElts; l += NumLaneElts) { + + auto CheckZeros = [&](int Shift, bool LeftShift) { + for (int l = 0; l < NumElts; l += NumLaneElts) for (int i = 0; i < Shift; ++i) - MatchLeft &= Zeroable[l + i]; - for (int i = NumLaneElts - Shift; i < NumLaneElts; ++i) - MatchRight &= Zeroable[l + i]; - } - if (!(MatchLeft || MatchRight)) - return SDValue(); + if (!Zeroable[l + i + (LeftShift ? 0 : (NumLaneElts - Shift))]) + return false; - bool MatchV1 = true, MatchV2 = true; + return true; + }; + + auto MatchByteShift = [&](int Shift, bool LeftShift, SDValue V) { for (int l = 0; l < NumElts; l += NumLaneElts) { - unsigned Pos = MatchLeft ? Shift + l : l; - unsigned Low = MatchLeft ? l : Shift + l; + unsigned Pos = LeftShift ? Shift + l : l; + unsigned Low = LeftShift ? l : Shift + l; unsigned Len = NumLaneElts - Shift; - MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low); - MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + NumElts); + if (!isSequentialOrUndefInRange(Mask, Pos, Len, + Low + (V == V1 ? 0 : NumElts))) + return SDValue(); } - if (!(MatchV1 || MatchV2)) - return SDValue(); int ByteShift = Shift * Scale; - unsigned Op = MatchRight ? X86ISD::VSRLDQ : X86ISD::VSHLDQ; - SDValue V = MatchV1 ? V1 : V2; + unsigned Op = LeftShift ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V); - V = DAG.getNode(Op, DL, ShiftVT, V, - DAG.getConstant(ByteShift, MVT::i8)); + V = DAG.getNode(Op, DL, ShiftVT, V, DAG.getConstant(ByteShift, MVT::i8)); return DAG.getNode(ISD::BITCAST, DL, VT, V); }; for (int Shift = 1; Shift < NumLaneElts; ++Shift) - if (SDValue S = MatchByteShift(Shift)) - return S; + for (bool LeftShift : {true, false}) + if (CheckZeros(Shift, LeftShift)) + for (SDValue V : {V1, V2}) + if (SDValue S = MatchByteShift(Shift, LeftShift, V)) + return S; // no match return SDValue(); diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll index 9918d5e1d86..2d833467918 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2146,3 +2146,35 @@ define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) { %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> ret <8 x i16> %shuffle } + +define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) { +; SSE2-LABEL: shuffle_v8i16_fu3ucc5u: +; SSE2: # BB#0: +; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] +; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v8i16_fu3ucc5u: +; SSSE3: # BB#0: +; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] +; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4] +; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v8i16_fu3ucc5u: +; SSE41: # BB#0: +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15] +; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v8i16_fu3ucc5u: +; AVX: # BB#0: +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6],xmm1[7] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15] +; AVX-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle +} -- 2.34.1