From: Andrea Di Biagio Date: Fri, 21 Nov 2014 14:32:06 +0000 (+0000) Subject: [DAG] Teach how to turn a build_vector into a shuffle if some of the operands are... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=607099b6972b59c9b786a147655c674299bae4de;p=oota-llvm.git [DAG] Teach how to turn a build_vector into a shuffle if some of the operands are zero. Before this patch, the DAGCombiner only tried to convert build_vector dag nodes into shuffles if all operands were either extract_vector_elt or undef. This patch improves that logic and teaches the DAGCombiner how to deal with build_vector dag nodes where one or more operands are zero. A build_vector dag node with some zero operands is turned into a shuffle only if the resulting shuffle mask is legal for the target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222536 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 663fe04792c..201429fe754 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10557,26 +10557,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); SDValue VecIn1, VecIn2; + bool UsesZeroVector = false; for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Op.getOpcode() == ISD::UNDEF) continue; + + // See if we can combine this build_vector into a blend with a zero vector. + if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && + cast(Op.getNode())->isNullValue()) || + (Op.getOpcode() == ISD::ConstantFP && + cast(Op.getNode())->getValueAPF().isZero()))) { + UsesZeroVector = true; + continue; + } // If this input is something other than a EXTRACT_VECTOR_ELT with a // constant index, bail out. - if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(N->getOperand(i).getOperand(1))) { + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa(Op.getOperand(1))) { VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } // We allow up to two distinct input vectors. - SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + SDValue ExtractedFromVec = Op.getOperand(0); if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode()) { + } else if (!VecIn2.getNode() && !UsesZeroVector) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. @@ -10589,16 +10600,26 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getNode()) { SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + unsigned Opcode = N->getOperand(i).getOpcode(); + if (Opcode == ISD::UNDEF) { Mask.push_back(-1); continue; } + // Operands can also be zero. + if (Opcode != ISD::EXTRACT_VECTOR_ELT) { + assert(UsesZeroVector && + (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && + "Unexpected node found!"); + Mask.push_back(NumInScalars+i); + continue; + } + // If extracting from the first vector, just use the index directly. SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); + unsigned ExtIndex = cast(ExtVal)->getZExtValue(); if (Extract.getOperand(0) == VecIn1) { - unsigned ExtIndex = cast(ExtVal)->getZExtValue(); if (ExtIndex > VT.getVectorNumElements()) return SDValue(); @@ -10607,10 +10628,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // Otherwise, use InIdx + VecSize - unsigned Idx = cast(ExtVal)->getZExtValue(); - Mask.push_back(Idx+NumInScalars); + Mask.push_back(NumInScalars+ExtIndex); } + // Avoid introducing illegal shuffles with zero. + if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + return SDValue(); + // We can't generate a shuffle node with mismatched input and output types. // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { @@ -10634,8 +10658,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + if (UsesZeroVector) + VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : + DAG.getConstantFP(0.0, VT); + else + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); // Check that we were able to transform all incoming values to the same // type. diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index d5c6f74e4b4..2bdfafe8374 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -527,14 +527,14 @@ define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) { ; X32: ## BB#0: ; X32-NEXT: xorps %xmm2, %xmm2 ; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: shuf_X00A: ; X64: ## BB#0: ; X64-NEXT: xorps %xmm2, %xmm2 ; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; X64-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 %vecinit = insertelement <4 x float> undef, float %vecext, i32 0 @@ -549,7 +549,7 @@ define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) { ; X32: ## BB#0: ; X32-NEXT: xorps %xmm1, %xmm1 ; X32-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,zero,xmm0[0] +; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0] ; X32-NEXT: movaps %xmm1, %xmm0 ; X32-NEXT: retl ; @@ -557,7 +557,7 @@ define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) { ; X64: ## BB#0: ; X64-NEXT: xorps %xmm1, %xmm1 ; X64-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,zero,xmm0[0] +; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0] ; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %vecext = extractelement <4 x float> %x, i32 0 @@ -572,8 +572,8 @@ define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) { ; X32-LABEL: shuf_X0YC: ; X32: ## BB#0: ; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,xmm0[1],zero +; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3] +; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero ; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2] ; X32-NEXT: movaps %xmm2, %xmm0 ; X32-NEXT: retl @@ -581,8 +581,8 @@ define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) { ; X64-LABEL: shuf_X0YC: ; X64: ## BB#0: ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] -; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],zero,xmm0[1],zero +; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3] +; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero ; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2] ; X64-NEXT: movaps %xmm2, %xmm0 ; X64-NEXT: retq @@ -741,8 +741,7 @@ define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) { define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) { ; X32-LABEL: i32_shuf_X0YC: ; X32: ## BB#0: -; X32-NEXT: xorps %xmm2, %xmm2 -; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] +; X32-NEXT: pmovzxdq %xmm0, %xmm2 ; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero ; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2] ; X32-NEXT: movaps %xmm2, %xmm0 @@ -750,8 +749,7 @@ define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) { ; ; X64-LABEL: i32_shuf_X0YC: ; X64: ## BB#0: -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] +; X64-NEXT: pmovzxdq %xmm0, %xmm2 ; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero ; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2] ; X64-NEXT: movaps %xmm2, %xmm0 @@ -1146,3 +1144,42 @@ entry: %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3 ret <4 x float> %vecinit3 } + +define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) { +; X32-LABEL: build_vector_to_shuffle_1: +; X32: ## BB#0: +; X32-NEXT: xorps %xmm1, %xmm1 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; X32-NEXT: retl +; +; X64-LABEL: build_vector_to_shuffle_1: +; X64: ## BB#0: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3] +; X64-NEXT: retq +entry: + %vecext = extractelement <4 x float> %A, i32 1 + %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1 + %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2 + %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> + ret <4 x float> %vecinit3 +} + +define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) { +; X32-LABEL: build_vector_to_shuffle_2: +; X32: ## BB#0: +; X32-NEXT: xorps %xmm1, %xmm1 +; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; X32-NEXT: retl +; +; X64-LABEL: build_vector_to_shuffle_2: +; X64: ## BB#0: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] +; X64-NEXT: retq +entry: + %vecext = extractelement <4 x float> %A, i32 1 + %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1 + %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2 + ret <4 x float> %vecinit1 +}