From 41cda401577bdc8b2d357199671dd6e038f0c83a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Feb 2015 22:08:56 +0000 Subject: [PATCH] Reapplied D7816 & rL230177 & rL230278 - with an additional fix toensure that the smallest build vector input scalar type is always used. Additional (crash) test cases already committed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230388 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 110 +++++++++++++---------- test/CodeGen/X86/vector-zext.ll | 33 +++---- 2 files changed, 76 insertions(+), 67 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 51494d971d5..0437f59b4fe 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -385,7 +385,7 @@ namespace { bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -4913,7 +4913,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); @@ -4925,7 +4925,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { DAG.getConstant(IncrementSize, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); @@ -4988,7 +4988,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -5000,7 +5000,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { DAG.getConstant(IncrementSize, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -9949,11 +9949,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Make sure we have something to merge. if (NumElem < 2) return false; - + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned EarliestNodeUsed = 0; - + for (unsigned i=0; i < NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion @@ -9962,11 +9962,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) EarliestNodeUsed = i; } - + // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); - + SDValue StoredVal; if (UseVector) { // Find a legal type for the vector store. @@ -9989,7 +9989,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return false; Ops.push_back(Val); } - + // Build the extracted vector elements back into a vector. StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); } @@ -10000,7 +10000,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( unsigned StoreBW = NumElem * ElementSizeBytes * 8; APInt StoreInt(StoreBW, 0); - + // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = TLI.isLittleEndian(); @@ -10017,18 +10017,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( llvm_unreachable("Invalid constant element type"); } } - + // Create the new Load and Store operations. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); StoredVal = DAG.getConstant(StoreInt, StoreTy); } - + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); - + // Replace the first store with the new store CombineTo(EarliestOp, NewStore); // Erase all other stores. @@ -10050,7 +10050,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); deleteAndRecombine(St); } - + return true; } @@ -10071,7 +10071,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; @@ -10281,7 +10281,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // consecutive loads). if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; - + // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); if (TLI.isTypeLegal(Ty)) @@ -11197,7 +11197,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { // Just because the floating-point vector type is legal does not necessarily // mean that the corresponding integer vector type is. if (!isTypeLegal(NVT)) - return SDValue(); + return SDValue(); SmallVector Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -11354,10 +11354,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); - + // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); @@ -11430,36 +11430,56 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } } + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) // -> (BUILD_VECTOR A, B, ..., C, D, ...) - if (N->getNumOperands() == 2 && - N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && - N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { SmallVector Opnds; - unsigned BuildVecNumElts = N0.getNumOperands(); - - EVT SclTy0 = N0.getOperand(0)->getValueType(0); - EVT SclTy1 = N1.getOperand(0)->getValueType(0); - if (SclTy0.isFloatingPoint()) { - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); - } else { + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. - EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N0.getOperand(i))); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, - N1.getOperand(i))); + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); } + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back(Op.getOperand(i)); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } @@ -11971,7 +11991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); - + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 132e17fef4d..568687dfd17 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -358,22 +358,16 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; ; AVX1-LABEL: shuf_zext_8i16_to_8i32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuf_zext_8i16_to_8i32: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX2-NEXT: vpunpcklwd{{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX2-NEXT: # kill +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> @@ -410,22 +404,17 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; ; AVX1-LABEL: shuf_zext_4i32_to_4i64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,1],xmm1[0,0] -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2,1,3] -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuf_zext_4i32_to_4i64: ; AVX2: # BB#0: # %entry ; AVX2-NEXT: # kill ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: vmovd %eax, %xmm1 -; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX2-NEXT: retq entry: %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> -- 2.34.1