X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=fd96519e45573d0d6d661fc6d976862c5c5b7176;hb=d913d9d2c387de1072858c91feba78284dfd3e79;hp=5a6ea8e681078eeec6ec91d69ccef955b917c74d;hpb=c0319dd9c254d0854c94f407e3eefec57922880d;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a6ea8e6810..fd96519e455 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17,9 +17,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -327,6 +327,7 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -363,6 +364,28 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; + }; + + /// This is a helper function for MergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store. + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, + EVT MemVT, unsigned NumElem, + bool IsConstantSrc, bool UseVector); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -738,10 +761,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one @@ -759,10 +781,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return SDValue(); } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one @@ -877,8 +898,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, @@ -1099,8 +1120,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), @@ -1480,7 +1501,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are - // rededundant. + // redundant. Changed = true; break; @@ -1509,7 +1530,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDValue Result; - // If we've change things around then replace token factor. + // If we've changed things around then replace token factor. if (Changed) { if (Ops.empty()) { // The entry token is the only possible outcome. @@ -1519,8 +1540,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } - // Don't add users to work list. - return CombineTo(N, Result, false); + // Add users to worklist if AA is enabled, since it may introduce + // a lot of new chained token factors while removing memory deps. + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + return CombineTo(N, Result, UseAA /*add to worklist*/); } return Result; @@ -2800,6 +2824,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before @@ -2926,7 +2951,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2946,7 +2971,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2972,10 +2997,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT LoadedVT = LN0->getMemoryVT(); + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, @@ -2990,7 +3016,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -3010,7 +3037,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, @@ -3447,12 +3473,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); - if (!COR.getNode()) - return SDValue(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), COR); + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) @@ -3528,6 +3553,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); + } + // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -4068,8 +4104,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); - if (C.getNode()) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4615,6 +4650,43 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } + +/// \brief Generate Min/Max node +static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4694,6 +4766,28 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + + SDValue FMinMax = + combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), + N1, N2, CC, TLI, DAG); + if (FMinMax) + return FMinMax; + } + if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) @@ -4785,7 +4879,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = dyn_cast(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getData(); + SDValue Data = MST->getValue(); SDLoc DL(N); // If the MSTORE data type requires splitting and the mask is provided by a @@ -4828,7 +4922,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + MST->isTruncatingStore()); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -4840,7 +4935,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + MST->isTruncatingStore()); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -4901,7 +4997,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ISD::NON_EXTLOAD); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -4912,7 +5009,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ISD::NON_EXTLOAD); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -5043,13 +5141,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } - - // Fold to a simpler select_cc - if (SCC.getOpcode() == ISD::SETCC) + } else if (SCC->getOpcode() == ISD::UNDEF) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + } } // If we can fold this based on the true/false value, do so. @@ -5210,6 +5311,102 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl &SetCCs, } } +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || + !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector SetCCs; + if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector Loads; + SmallVector Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), + Align, LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + CombineTo(N0.getNode(), Trunc, NewChain); + ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, + (ISD::NodeType)N->getOpcode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5276,17 +5473,18 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - // None of the supported targets knows how to perform load and sign extend - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -5303,6 +5501,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) // fold (sext ( extload x)) -> (sext (truncate (sextload x))) if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && @@ -5310,7 +5513,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5330,7 +5533,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { @@ -5566,17 +5769,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - // None of the supported targets knows how to perform load and vector_zext - // on vectors in one instruction. We only perform this transformation on - // scalars. - if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && - ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); if (DoXform) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, @@ -5594,13 +5798,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } } + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { @@ -5637,7 +5846,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5799,7 +6008,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) @@ -5829,7 +6038,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -5958,7 +6167,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); @@ -6165,7 +6374,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast(N0)->getMemoryVT() && ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6181,7 +6390,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { N0.hasOneUse() && EVT == cast(N0)->getMemoryVT() && ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6484,19 +6693,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); - if (Res.getNode() != N) { - if (!LegalOperations || - TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) - return Res; - - // Folding it resulted in an illegal node, and it's too late to - // do that. Clean up the old node and forego the transformation. - // Ideally this won't happen very often, because instcombine - // and the earlier dagcombine runs (where illegal nodes are - // permitted) should have folded most of them already. - deleteAndRecombine(Res.getNode()); - } + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -6895,6 +7100,58 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } } return SDValue(); @@ -6988,6 +7245,107 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { DAG.getNode(ISD::FNEG, dl, VT, N00), N01, DAG.getNode(ISD::FNEG, dl, VT, N1)); } + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); @@ -7324,7 +7682,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (DAG.getTarget().Options.UnsafeFPMath) { + if (DAG.getTarget().Options.UnsafeFPMath && + !TLI.isFsqrtCheap()) { // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { EVT VT = RV.getValueType(); @@ -7531,11 +7890,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round (fp_round x)) -> (fp_round x) if (N0.getOpcode() == ISD::FP_ROUND) { - // This is a value preserving truncation if both round's are. - bool IsTrunc = N->getConstantOperandVal(1) == 1 && - N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(IsTrunc)); + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -7593,7 +7957,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -9125,7 +9489,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { if (NotMaskLZ == 64) return Result; // All zero mask. // See if we have a continuous run of bits. If so, we have 0*1+0* - if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) return Result; // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. @@ -9272,9 +9636,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + // The narrowing should be profitable, the load/store operation should be + // legal (or custom) and the store size should be equal to the NewVT width. while (NewBW < BitWidth && - !(TLI.isOperationLegalOrCustom(Opc, NewVT) && - TLI.isNarrowingProfitable(VT, NewVT))) { + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } @@ -9474,19 +9841,116 @@ struct BaseIndexOffset { } }; -/// Holds a pointer to an LSBaseSDNode as well as information on where it -/// is located in a sequence of memory operations connected by a chain. -struct MemOpLink { - MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): - MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } - // Ptr to the mem node. - LSBaseSDNode *MemNode; - // Offset from the base ptr. - int64_t OffsetFromBase; - // What is the sequence number of this mem node. - // Lowest mem operand in the DAG starts at zero. - unsigned SequenceNum; -}; +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl &StoreNodes, EVT MemVT, + unsigned NumElem, bool IsConstantSrc, bool UseVector) { + // Make sure we have something to merge. + if (NumElem < 2) + return false; + + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned EarliestNodeUsed = 0; + + for (unsigned i=0; i < NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // earliest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) + EarliestNodeUsed = i; + } + + // The earliest Node in the DAG. + LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + if (IsConstantSrc) { + // A vector store with a constant source implies that the constant is + // zero; we only handle merging stores of constant zeros because the zero + // can be materialized without a load. + // It may be beneficial to loosen this restriction to allow non-zero + // store merging. + StoredVal = DAG.getConstant(0, Ty); + } else { + SmallVector Ops; + for (unsigned i = 0; i < NumElem ; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All of the operands of a BUILD_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); + } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ? (NumElem - 1 - i) : i; + StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt <<= ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast(Val)) { + StoreInt |= C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast(Val)) { + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); + } + + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the first store with the new store + CombineTo(EarliestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + if (StoreNodes[i].MemNode == EarliestOp) + continue; + StoreSDNode *St = cast(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); @@ -9499,11 +9963,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Perform an early exit check. Do not bother looking at stored values that - // are not constants or loads. + // are not constants, loads, or extracted vector elements. SDValue StoredVal = St->getValue(); bool IsLoadSrc = isa(StoredVal); - if (!isa(StoredVal) && !isa(StoredVal) && - !IsLoadSrc) + bool IsConstantSrc = isa(StoredVal) || + isa(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; // Only look at ends of store sequences. @@ -9645,7 +10112,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; // Store the constants into memory as one consecutive store. - if (!IsLoadSrc) { + if (IsConstantSrc) { unsigned LastLegalType = 0; unsigned LastLegalVectorType = 0; bool NonZero = false; @@ -9694,85 +10161,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; - // Make sure we have something to merge. - if (NumElem < 2) - return false; - - unsigned EarliestNodeUsed = 0; - for (unsigned i=0; i < NumElem; ++i) { - // Find a chain for the new wide-store operand. Notice that some - // of the store nodes that we found may not be selected for inclusion - // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; - } - - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - SDLoc DL(StoreNodes[0].MemNode); + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + } - SDValue StoredVal; - if (UseVector) { + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecEltSrc) { + unsigned NumElem = 0; + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a vector. + // It should be possible to handle mixed sources, but load sources need + // more careful handling (see the block of code below that handles + // consecutive loads). + if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); - StoredVal = DAG.getConstant(0, Ty); - } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); - - // Construct a single integer constant which is made of the smaller - // constant inputs. - bool IsLE = TLI.isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ?(NumElem - 1 - i) : i; - StoreSDNode *St = cast(StoreNodes[Idx].MemNode); - SDValue Val = St->getValue(); - StoreInt<<=ElementSizeBytes*8; - if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt|=C->getAPIntValue().zext(StoreBW); - } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); - } else { - assert(false && "Invalid constant element type"); - } - } - - // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); - } - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - false, false, - FirstInChain->getAlignment()); - - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) - continue; - StoreSDNode *St = cast(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + NumElem = i + 1; } - return true; + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + false, true); } // Below we handle the case of multiple consecutive stores that @@ -9870,9 +10285,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) LastLegalIntegerType = i+1; } } @@ -10310,7 +10725,8 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; Load = DAG.getExtLoad( @@ -11132,7 +11548,8 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } -// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -11146,6 +11563,18 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); unsigned NumConcats = NumElts / NumElemsPerConcat; + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + std::all_of(SVN->getMask().begin() + NumElemsPerConcat, + SVN->getMask().end(), [](int i) { return i == -1; })) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), + ArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + // Look at every vector that's inserted. We're looking for exact // subvector-sized copies from a concatenated vector for (unsigned I = 0; I != NumConcats; ++I) { @@ -11244,7 +11673,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If it is a splat, check if the argument vector is another splat or a - // build_vector with all scalar elements the same. + // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); @@ -11281,6 +11710,24 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Splat of , return if (AllSame) return N0; + + // If the splatted element is a constant, just build the vector out of + // constants directly. + const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + if (isa(Splatted) || isa(Splatted)) { + SmallVector Ops; + for (unsigned i = 0; i != NumElts; ++i) { + Ops.push_back(Splatted); + } + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; + } } }