[X86] Fix a bug in the lowering of the mask of VSELECT.

author Quentin Colombet <qcolombet@apple.com>

Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)

committer Quentin Colombet <qcolombet@apple.com>

Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)
author Quentin Colombet <qcolombet@apple.com>
Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)
committer Quentin Colombet <qcolombet@apple.com>
Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a8afe816d9027db475a00c8d6fdf1c8f683d832b..543a2fdc99f0a75ea35345565563cb817a156437 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -22598,7 +22598,12 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
      TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
                                            DCI.isBeforeLegalizeOps());
      if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
-        TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
+        (TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
+                                  TLO) &&
+         // Don't optimize vector of constants. Those are handled by
+         // the generic code and all the bits must be properly set for
+         // the generic optimizer.
+         !ISD::isBuildVectorOfConstantSDNodes(TLO.New.getNode())))
        DCI.CommitTargetLoweringOpt(TLO);
    }
  
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll

new file mode 100644 (file)

index 0000000..2d7ccf3
--- /dev/null
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -0,0 +1,27 @@
+; RUN: llc %s -o - -mattr=+avx | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; For this test we used to optimize the <i1 true, i1 false, i1 false, i1 true>
+; mask into <i32 2147483648, i32 0, i32 0, i32 2147483648> because we thought
+; we would lower that into a blend where only the high bit is relevant.
+; However, since the whole mask is constant, this is simplified incorrectly
+; by the generic code, because it was expecting -1 in place of 2147483648.
+; 
+; The problem does not occur without AVX, because vselect of v4i32 is not legal
+; nor custom.
+;
+; <rdar://problem/18675020>
+
+; CHECK-LABEL: test:
+; CHECK: vmovdqa {{.*#+}} xmm1 = [65533,124,125,14807]
+; CHECK: vmovdqa {{.*#+}} xmm1 = [65535,0,0,65535]
+; CHECK: ret
+define void @test(<4 x i16>* %a, <4 x i16>* %b) {
+body:
+  %predphi = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> <i16 -3, i16 545, i16 4385, i16 14807>, <4 x i16> <i16 123, i16 124, i16 125, i16 127>
+  %predphi42 = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer
+  store <4 x i16> %predphi, <4 x i16>* %a, align 8
+  store <4 x i16> %predphi42, <4 x i16>* %b, align 8
+  ret void
+}
author	Quentin Colombet <qcolombet@apple.com>
	Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)
committer	Quentin Colombet <qcolombet@apple.com>
	Mon, 20 Oct 2014 23:13:30 +0000 (23:13 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vselect-avx.ll	[new file with mode: 0644]	patch \| blob