From 265bcb1e5b106a7c5db2bfcfb13cceffe0c413be Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Fri, 31 Aug 2012 20:12:31 +0000 Subject: [PATCH] Fix PR12359 - In addition to undefined, if V2 is zero vector, skip 2nd PSHUFB and POR as well as PSHUFB will zero elements with negative indices. Patch by Sriram Murali git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- test/CodeGen/X86/pr12359.ll | 10 ++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/pr12359.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f9184f693db..f4329d3bf9d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5881,8 +5881,6 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef MaskVals = SVOp->getMask(); - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. @@ -5906,7 +5904,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (V2IsUndef) + + // As PSHUFB will zero elements with negative indices, it's safe to ignore + // the 2nd operand if it's undefined or zero. + if (V2.getOpcode() == ISD::UNDEF || + ISD::isBuildVectorAllZeros(V2.getNode())) return V1; // Calculate the shuffle mask for the second input, shuffle it, and diff --git a/test/CodeGen/X86/pr12359.ll b/test/CodeGen/X86/pr12359.ll new file mode 100644 index 00000000000..024b163fa71 --- /dev/null +++ b/test/CodeGen/X86/pr12359.ll @@ -0,0 +1,10 @@ +; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s +define <16 x i8> @shuf(<16 x i8> %inval1) { +entry: + %0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %0 +; CHECK: shuf +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pshufb +; CHECK-NEXT: ret +} -- 2.34.1