From 9472b4fbf96fb840eb416acd00bbee173c62e8da Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 8 Dec 2012 22:49:19 +0000 Subject: [PATCH] Teach DAG combine to handle vector logical operations with vectors of all 1s or all 0s. These cases can show up when vectors are split for legalizing. Fix some tests that were dependent on these cases not being combined. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 30 ++++++++++++++++++++++++ test/CodeGen/X86/2011-11-30-or.ll | 8 +++---- test/CodeGen/X86/fold-pcmpeqd-2.ll | 10 ++++---- test/CodeGen/X86/sse2-blend.ll | 26 ++++++++++---------- 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6cd07c2c729..4a17dc0d2c3 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2427,6 +2427,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; } // fold (and x, undef) -> 0 @@ -3025,6 +3037,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N1; } // fold (or x, undef) -> -1 @@ -3334,6 +3358,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll index 0a949eb29b8..f66248bc5a7 100644 --- a/test/CodeGen/X86/2011-11-30-or.ll +++ b/test/CodeGen/X86/2011-11-30-or.ll @@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6" define void @select_func() { entry: %c.lobit.i.i.i = ashr <8 x i16> , - %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64> %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64> - %neg.i.i.i.i = xor <2 x i64> %a35, - %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i - %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i + %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, + %and.i.i.i = and <8 x i16> %neg.i.i.i.i, + %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64> + %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16> store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4 ret void diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index 9cf4607cf5b..6cd27618de1 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -43,21 +43,21 @@ forbody: ; preds = %forcond %mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1] %add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1] %bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1] %bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1] %mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1] %bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1] - %andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1] + %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1] %xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %orps203.i = or <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] + %orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1] %bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1] %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] - %andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1] + %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] - %andnps.i17 = and <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1] + %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll index 2f4317bf294..67ce1be1352 100644 --- a/test/CodeGen/X86/sse2-blend.ll +++ b/test/CodeGen/X86/sse2-blend.ll @@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_i64 -; CHECK: xorps -; CHECK: andps ; CHECK: andnps +; CHECK: andps ; CHECK: orps ; CHECK: ret -define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) { - %A = load <4 x i64>* %v1 - %B = load <4 x i64>* %v2 - %vsel = select <4 x i1> , <4 x i64> %A, <4 x i64> %B - store <4 x i64 > %vsel, <4 x i64>* %v1 +define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) { + %A = load <2 x i64>* %v1 + %B = load <2 x i64>* %v2 + %vsel = select <2 x i1> , <2 x i64> %A, <2 x i64> %B + store <2 x i64 > %vsel, <2 x i64>* %v1 ret void } ; Without forcing instructions, fall back to the preferred PS domain. ; CHECK: vsel_double -; CHECK: xorps -; CHECK: andps ; CHECK: andnps +; CHECK: andps ; CHECK: orps ; CHECK: ret -define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { - %A = load <4 x double>* %v1 - %B = load <4 x double>* %v2 - %vsel = select <4 x i1> , <4 x double> %A, <4 x double> %B - store <4 x double > %vsel, <4 x double>* %v1 +define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) { + %A = load <2 x double>* %v1 + %B = load <2 x double>* %v2 + %vsel = select <2 x i1> , <2 x double> %A, <2 x double> %B + store <2 x double > %vsel, <2 x double>* %v1 ret void } -- 2.34.1