[x86] PR24562: fix incorrect folding of PSHUFB nodes with a mask where all indices...

author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)

committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)
author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)
committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 9ffce63501b93c59c04e68e9996379393a627407..82f9fa7a5e845483db20a3123c7fbb748c885416 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -21991,10 +21991,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
    MVT RootVT = Root.getSimpleValueType();
    SDLoc DL(Root);
  
    MVT RootVT = Root.getSimpleValueType();
    SDLoc DL(Root);
  
-  // Just remove no-op shuffle masks.
    if (Mask.size() == 1) {
    if (Mask.size() == 1) {
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
-                  /*AddTo*/ true);
+    int Index = Mask[0];
+    assert((Index >= 0 || Index == SM_SentinelUndef ||
+            Index == SM_SentinelZero) &&
+           "Invalid shuffle index found!");
+
+    // We may end up with an accumulated mask of size 1 as a result of
+    // widening of shuffle operands (see function canWidenShuffleElements).
+    // If the only shuffle index is equal to SM_SentinelZero then propagate
+    // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+    // mask, and therefore the entire chain of shuffles can be folded away.
+    if (Index == SM_SentinelZero)
+      DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+    else
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+                    /*AddTo*/ true);
      return true;
    }
  
      return true;
    }
  
diff --git a/test/CodeGen/X86/pr24562.ll b/test/CodeGen/X86/pr24562.ll

new file mode 100644 (file)

index 0000000..f2e134b
--- /dev/null
+++ b/test/CodeGen/X86/pr24562.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mattr=+ssse3 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; The pshufb from function @pr24562 was wrongly folded into its first operand
+; as a result of a late target shuffle combine on the legalized selection dag.
+; 
+; Check that the pshufb is correctly folded to a zero vector.
+
+define <2 x i64> @pr24562() {
+; CHECK-LABEL: pr24562:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) #2
+  %1 = bitcast <16 x i8> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Mon, 12 Oct 2015 11:25:41 +0000 (11:25 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/pr24562.ll	[new file with mode: 0644]	patch \| blob