Teach the DAGCombiner how to fold 'vselect' dag nodes according

author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)

committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>

Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)
author Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)
committer Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 8f4da5a567ab99479d231db1502fac1a531967cc..da9559ee87a7bc2becf3f32ddc10d660749494fe 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4402,6 +4402,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    }
  
+  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
+  if (ISD::isBuildVectorAllOnes(N0.getNode()))
+    return N1;
+  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
+  if (ISD::isBuildVectorAllZeros(N0.getNode()))
+    return N2;
+
    return SDValue();
  }
  
diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll

index 699f458dcc29975c13315d892d698dd41c05e15a..7e5b6935b900fe14d6b1cc6da753aba029f52e00 100644 (file)
--- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -40,16 +40,16 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
  
  define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-       %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
-       %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
+       %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
+       %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
         %tmp3 = or <8 x i8> %tmp1, %tmp2
         ret <8 x i8> %tmp3
  }
  
  define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
-;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-       %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
-       %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
+;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+       %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
+       %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
         %tmp3 = or <16 x i8> %tmp1, %tmp2
         ret <16 x i8> %tmp3
  }
@@ -444,10 +444,11 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b)  {
    %tmp2 = or <2 x i64> %a, %tmp1
    ret <2 x i64> %tmp2
  }
+
  define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-       %tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
-       %tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
+       %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
+       %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 >
         %tmp3 = or <2 x i32> %tmp1, %tmp2
         ret <2 x i32> %tmp3
  }
@@ -455,40 +456,40 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
  
  define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-       %tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
-       %tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
+       %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
+       %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
         %tmp3 = or <4 x i16> %tmp1, %tmp2
         ret <4 x i16> %tmp3
  }
  
  define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-       %tmp1 = and <1 x i64> %a, < i64 -1 >
-       %tmp2 = and <1 x i64> %b, < i64 0 >
+       %tmp1 = and <1 x i64> %a, < i64 -16 >
+       %tmp2 = and <1 x i64> %b, < i64 15 >
         %tmp3 = or <1 x i64> %tmp1, %tmp2
         ret <1 x i64> %tmp3
  }
  
  define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-       %tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
-       %tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
+       %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
+       %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
         %tmp3 = or <4 x i32> %tmp1, %tmp2
         ret <4 x i32> %tmp3
  }
  
  define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-       %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
-       %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
+       %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
+       %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
         %tmp3 = or <8 x i16> %tmp1, %tmp2
         ret <8 x i16> %tmp3
  }
  
  define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b)  {
  ;CHECK:  bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
-       %tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
-       %tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
+       %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
+       %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
         %tmp3 = or <2 x i64> %tmp1, %tmp2
         ret <2 x i64> %tmp3
  }
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll

index e08c5b28c5ec55f13a147c5d71f0841aa40ecec3..222068dc579faacdbf2e80f069d8af706e6be57b 100644 (file)
--- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
  
-target triple = "x86_64-unknown-linux-gnu"
-
-; Make sure that we don't crash when legalizng vselect and vsetcc and that
+; Make sure that we don't crash when legalizing vselect and vsetcc and that
  ; we are able to generate vector blend instructions.
  
-; CHECK: simple_widen
-; CHECK: blend
+; CHECK-LABEL: simple_widen
+; CHECK-NOT: blend
  ; CHECK: ret
  define void @simple_widen() {
  entry:
@@ -15,7 +13,7 @@ entry:
    ret void
  }
  
-; CHECK: complex_inreg_work
+; CHECK-LABEL: complex_inreg_work
  ; CHECK: blend
  ; CHECK: ret
  
@@ -27,8 +25,8 @@ entry:
    ret void
  }
  
-; CHECK: zero_test
-; CHECK: blend
+; CHECK-LABEL: zero_test
+; CHECK: xorps %xmm0, %xmm0
  ; CHECK: ret
  
  define void @zero_test() {
@@ -38,7 +36,7 @@ entry:
    ret void
  }
  
-; CHECK: full_test
+; CHECK-LABEL: full_test
  ; CHECK: blend
  ; CHECK: ret
  
diff --git a/test/CodeGen/X86/avx512-vselect-crash.ll b/test/CodeGen/X86/avx512-vselect-crash.ll

index 7cca51d5e2dc58503e4b4880bd53e1179611661f..9d652d36a5249e3d309d34e6dfc40f256717c4d4 100644 (file)
--- a/test/CodeGen/X86/avx512-vselect-crash.ll
+++ b/test/CodeGen/X86/avx512-vselect-crash.ll
@@ -1,7 +1,7 @@
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
  
  ; CHECK-LABEL: test
-; CHECK: vmovdqu32
+; CHECK: vpxord
  ; CHECK: ret
  define <16 x i32> @test() {
  entry:
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll

index d431f59c4b8532f011d54ee6cbe1124973840dd4..36c79838ff2cc0af90978411ffb0006dd27bb83a 100644 (file)
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -130,4 +130,47 @@ define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
  ; CHECK-NOT: psraw
  ; CHECK: ret
  
+; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
+
+define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
+  %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %1
+}
+; CHECK-LABEL: test14
+; CHECK-NOT: psllw
+; CHECK-NOT: psraw
+; CHECK-NOT: pcmpeq
+; CHECK: ret
+
+define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
+  %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %1
+}
+; CHECK-LABEL: test15
+; CHECK-NOT: psllw
+; CHECK-NOT: psraw
+; CHECK-NOT: pcmpeq
+; CHECK: ret
+
+; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
+
+define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
+  %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %1
+} 
+; CHECK-LABEL: test16
+; CHECK-NOT: psllw
+; CHECK-NOT: psraw
+; CHECK-NOT: xorps
+; CHECK: ret 
+
+define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
+  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %1
+}
+; CHECK-LABEL: test17
+; CHECK-NOT: psllw
+; CHECK-NOT: psraw
+; CHECK-NOT: xorps
+; CHECK: ret
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
	Wed, 8 Jan 2014 18:33:04 +0000 (18:33 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AArch64/neon-bitwise-instructions.ll		patch \| blob \| history
test/CodeGen/X86/2011-10-19-widen_vselect.ll		patch \| blob \| history
test/CodeGen/X86/avx512-vselect-crash.ll		patch \| blob \| history
test/CodeGen/X86/vselect.ll		patch \| blob \| history