From 638e97f13512b8424fac28e4ed79964300bcf8b0 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Wed, 8 Jan 2014 18:33:04 +0000 Subject: [PATCH] Teach the DAGCombiner how to fold 'vselect' dag nodes according to the following two rules: 1) fold (vselect (build_vector AllOnes), A, B) -> A 2) fold (vselect (build_vector AllZeros), A, B) -> B git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198777 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +++ .../AArch64/neon-bitwise-instructions.ll | 35 +++++++-------- test/CodeGen/X86/2011-10-19-widen_vselect.ll | 18 ++++---- test/CodeGen/X86/avx512-vselect-crash.ll | 2 +- test/CodeGen/X86/vselect.ll | 43 +++++++++++++++++++ 5 files changed, 77 insertions(+), 28 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8f4da5a567a..da9559ee87a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4402,6 +4402,13 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + return SDValue(); } diff --git a/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 699f458dcc2..7e5b6935b90 100644 --- a/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -40,16 +40,16 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) { define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > - %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > + %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 > + %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 > %tmp3 = or <8 x i8> %tmp1, %tmp2 ret <8 x i8> %tmp3 } define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { -;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > - %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b + %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 > + %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp3 = or <16 x i8> %tmp1, %tmp2 ret <16 x i8> %tmp3 } @@ -444,10 +444,11 @@ define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) { %tmp2 = or <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 } + define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 > - %tmp2 = and <2 x i32> %b, < i32 0, i32 0 > + %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 > + %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 > %tmp3 = or <2 x i32> %tmp1, %tmp2 ret <2 x i32> %tmp3 } @@ -455,40 +456,40 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 > - %tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 > + %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 > + %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 > %tmp3 = or <4 x i16> %tmp1, %tmp2 ret <4 x i16> %tmp3 } define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp1 = and <1 x i64> %a, < i64 -1 > - %tmp2 = and <1 x i64> %b, < i64 0 > + %tmp1 = and <1 x i64> %a, < i64 -16 > + %tmp2 = and <1 x i64> %b, < i64 15 > %tmp3 = or <1 x i64> %tmp1, %tmp2 ret <1 x i64> %tmp3 } define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 > - %tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 > + %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 > + %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 > %tmp3 = or <4 x i32> %tmp1, %tmp2 ret <4 x i32> %tmp3 } define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 > - %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 > + %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 > + %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 > %tmp3 = or <8 x i16> %tmp1, %tmp2 ret <8 x i16> %tmp3 } define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 > - %tmp2 = and <2 x i64> %b, < i64 0, i64 0 > + %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 > + %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 > %tmp3 = or <2 x i64> %tmp1, %tmp2 ret <2 x i64> %tmp3 } diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll index e08c5b28c5e..222068dc579 100644 --- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll +++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll @@ -1,12 +1,10 @@ -; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -target triple = "x86_64-unknown-linux-gnu" - -; Make sure that we don't crash when legalizng vselect and vsetcc and that +; Make sure that we don't crash when legalizing vselect and vsetcc and that ; we are able to generate vector blend instructions. -; CHECK: simple_widen -; CHECK: blend +; CHECK-LABEL: simple_widen +; CHECK-NOT: blend ; CHECK: ret define void @simple_widen() { entry: @@ -15,7 +13,7 @@ entry: ret void } -; CHECK: complex_inreg_work +; CHECK-LABEL: complex_inreg_work ; CHECK: blend ; CHECK: ret @@ -27,8 +25,8 @@ entry: ret void } -; CHECK: zero_test -; CHECK: blend +; CHECK-LABEL: zero_test +; CHECK: xorps %xmm0, %xmm0 ; CHECK: ret define void @zero_test() { @@ -38,7 +36,7 @@ entry: ret void } -; CHECK: full_test +; CHECK-LABEL: full_test ; CHECK: blend ; CHECK: ret diff --git a/test/CodeGen/X86/avx512-vselect-crash.ll b/test/CodeGen/X86/avx512-vselect-crash.ll index 7cca51d5e2d..9d652d36a52 100644 --- a/test/CodeGen/X86/avx512-vselect-crash.ll +++ b/test/CodeGen/X86/avx512-vselect-crash.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s ; CHECK-LABEL: test -; CHECK: vmovdqu32 +; CHECK: vpxord ; CHECK: ret define <16 x i32> @test() { entry: diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index d431f59c4b8..36c79838ff2 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -130,4 +130,47 @@ define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) { ; CHECK-NOT: psraw ; CHECK: ret +; Fold (vselect (build_vector AllOnes), N1, N2) -> N1 + +define <4 x float> @test14(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test14 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK-NOT: pcmpeq +; CHECK: ret + +define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test15 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK-NOT: pcmpeq +; CHECK: ret + +; Fold (vselect (build_vector AllZeros), N1, N2) -> N2 + +define <4 x float> @test16(<4 x float> %a, <4 x float> %b) { + %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} +; CHECK-LABEL: test16 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK-NOT: xorps +; CHECK: ret + +define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) { + %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} +; CHECK-LABEL: test17 +; CHECK-NOT: psllw +; CHECK-NOT: psraw +; CHECK-NOT: xorps +; CHECK: ret -- 2.34.1