X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=ab3a6631d3393ffb569189e53d443c792f23e6ef;hp=feeb1267e3535dd5f0efab2f33358cd78d07ecf1;hb=939b388b46f46ba2cb12cca94dfe1a87ac1b9a24;hpb=4f5829ac4df6d8d6b0d7976af15b4684dd15c336 diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index feeb1267e35..ab3a6631d33 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -245,6 +245,7 @@ namespace { SDValue visitUMULO(SDNode *N); SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); + SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); @@ -255,6 +256,7 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); + SDValue visitBSWAP(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -268,6 +270,7 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -336,6 +339,7 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -386,6 +390,13 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a + /// constant build_vector of the stored constant values in Stores. + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const; + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. @@ -394,6 +405,13 @@ namespace { EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); + /// This is a helper function for MergeConsecutiveStores. + /// Stores that may be merged are placed in StoreNodes. + /// Loads that may alias with those stores are placed in AliasLoadNodes. + void getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl &StoreNodes, + SmallVectorImpl &AliasLoadNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -411,9 +429,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - auto *F = DAG.getMachineFunction().getFunction(); - ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize); + ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); } /// Runs the dag combiner on all nodes in the work list @@ -427,8 +443,9 @@ namespace { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) return LHSTy; - return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) - : TLI.getPointerTy(); + auto &DL = DAG.getDataLayout(); + return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) + : TLI.getPointerTy(DL); } /// This method returns true if we are running before type legalization or @@ -440,7 +457,7 @@ namespace { /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { - return TLI.getSetCCResultType(*DAG.getContext(), VT); + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; } @@ -618,7 +635,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) - if (N0CFP->getValueAPF().isZero()) + if (N0CFP->isZero()) return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -1176,8 +1193,8 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { continue; if (N->use_empty()) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Nodes.insert(N->getOperand(i).getNode()); + for (const SDValue &ChildN : N->op_values()) + Nodes.insert(ChildN.getNode()); removeFromWorklist(N); DAG.DeleteNode(N); @@ -1199,9 +1216,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) - AddToWorklist(I); + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any @@ -1250,9 +1266,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // worklist as well. Because the worklist uniques things already, this // won't repeatedly process the same operand. CombinedNodes.insert(N); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (!CombinedNodes.count(N->getOperand(i).getNode())) - AddToWorklist(N->getOperand(i).getNode()); + for (const SDValue &ChildN : N->op_values()) + if (!CombinedNodes.count(ChildN.getNode())) + AddToWorklist(ChildN.getNode()); SDValue RV = combine(N); @@ -1326,6 +1342,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UMULO: return visitUMULO(N); case ISD::SDIVREM: return visitSDIVREM(N); case ISD::UDIVREM: return visitUDIVREM(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -1334,6 +1354,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::BSWAP: return visitBSWAP(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1347,6 +1368,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1452,12 +1474,9 @@ SDValue DAGCombiner::combine(SDNode *N) { if (isa(N0) || !isa(N1)) { SDValue Ops[] = {N1, N0}; SDNode *CSENode; - if (const BinaryWithFlagsSDNode *BinNode = - dyn_cast(N)) { + if (const auto *BinNode = dyn_cast(N)) { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, - BinNode->Flags.hasNoUnsignedWrap(), - BinNode->Flags.hasNoSignedWrap(), - BinNode->Flags.hasExact()); + &BinNode->Flags); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -1508,8 +1527,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { SDNode *TF = TFs[i]; // Check each of the operands. - for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { - SDValue Op = TF->getOperand(i); + for (const SDValue &Op : TF->op_values()) { switch (Op.getOpcode()) { case ISD::EntryToken: @@ -1585,6 +1603,28 @@ static bool isNullConstant(SDValue V) { return Const != nullptr && Const->isNullValue(); } +static bool isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +static bool isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +static bool isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isOne(); +} + +/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a +/// ContantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1608,8 +1648,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS @@ -1628,7 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) { + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(N1C->getAPIntValue()+ @@ -1702,34 +1742,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && - N1.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast(N1.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); - if (N0.getOpcode() == ISD::SHL && - N0.getOperand(0).getOpcode() == ISD::SUB) - if (ConstantSDNode *C = - dyn_cast(N0.getOperand(0).getOperand(0))) - if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, - DAG.getNode(ISD::SHL, SDLoc(N), VT, - N0.getOperand(0).getOperand(1), - N0.getOperand(1))); + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); if (N1.getOpcode() == ISD::AND) { SDValue AndOp0 = N1.getOperand(0); - ConstantSDNode *AndOp1 = dyn_cast(N1->getOperand(1)); unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); unsigned DestBits = VT.getScalarType().getSizeInBits(); // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } @@ -1850,8 +1883,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) @@ -1861,7 +1894,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) @@ -1957,13 +1990,12 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { } // fold (subc x, 0) -> x + no borrow - ConstantSDNode *N0C = dyn_cast(N0); if (isNullConstant(N1)) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1994,6 +2026,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { bool N0IsConst = false; bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { @@ -2004,15 +2038,19 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = isa(N0); - if (N0IsConst) + if (N0IsConst) { ConstValue0 = cast(N0)->getAPIntValue(); + N0IsOpaqueConst = cast(N0)->isOpaque(); + } N1IsConst = isa(N1); - if (N1IsConst) + if (N1IsConst) { ConstValue1 = cast(N1)->getAPIntValue(); + N1IsOpaqueConst = cast(N1)->isOpaque(); + } } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst) + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, N0.getNode(), N1.getNode()); @@ -2037,14 +2075,16 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(0, DL, VT), N0); } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && + IsFullSplat) { SDLoc DL(N); return DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && + IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -2122,10 +2162,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && N1C->isNullValue()) + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getAPIntValue() == 1LL) + if (N1C && N1C->isOne()) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) { @@ -2141,17 +2181,21 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { N0, N1); } + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { - // If dividing by powers of two is cheap, then don't perform the following - // fold. - if (TLI.isPow2SDivCheap()) + // FIXME: We check for the exact bit here because the generic lowering gives + // better results in that case. The target-specific lowering should learn how + // to handle exact sdivs efficiently. + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + !cast(N)->Flags.hasExact() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { + // If integer division is cheap, then don't perform the following fold. + if (TLI.isIntDivCheap(N->getValueType(0), MinSize)) return SDValue(); // Target-specific implementation of sdiv x, pow2. - SDValue Res = BuildSDIVPow2(N); - if (Res.getNode()) + if (SDValue Res = BuildSDIVPow2(N)) return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); @@ -2187,10 +2231,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildSDIV(N); - if (Op.getNode()) return Op; - } + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) + if (SDValue Op = BuildSDIV(N)) + return Op; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) @@ -2215,10 +2258,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && N1C->getAPIntValue().isPowerOf2()) { + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { SDLoc DL(N); return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, @@ -2226,7 +2271,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); SDLoc DL(N); @@ -2240,11 +2285,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } } } + // fold (udiv x, c) -> alternate - if (N1C && !TLI.isIntDivCheap()) { - SDValue Op = BuildUDIV(N); - if (Op.getNode()) return Op; - } + bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), MinSize)) + if (SDValue Op = BuildUDIV(N)) + return Op; // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) @@ -2264,8 +2310,10 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // fold (srem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { @@ -2306,17 +2354,20 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // fold (urem c1, c2) -> c1%c2 ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, N0C, N1C); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, + N0C, N1C)) + return Folded; // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) { + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { SDLoc DL(N); SDValue Add = @@ -2357,7 +2408,6 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2365,7 +2415,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (N1C && N1C->getAPIntValue() == 1) { + if (isOneConstant(N1)) { SDLoc DL(N); return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, @@ -2399,7 +2449,6 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2407,7 +2456,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (isNullConstant(N1)) return N1; // fold (mulhu x, 1) -> 0 - if (N1C && N1C->getAPIntValue() == 1) + if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) @@ -2485,8 +2534,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2516,8 +2565,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; EVT VT = N->getValueType(0); SDLoc DL(N); @@ -2567,15 +2616,39 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { } SDValue DAGCombiner::visitSDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM)) + return Res; return SDValue(); } SDValue DAGCombiner::visitUDIVREM(SDNode *N) { - SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); - if (Res.getNode()) return Res; + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM)) + return Res; + + return SDValue(); +} + +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); } @@ -2759,28 +2832,28 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + if (isAllOnesConstant(LR)) { + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } } } // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) if (LL == RL && isa(LR) && isa(RR) && Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((isNullConstant(LR) && - cast(RR)->isAllOnesValue()) || - (cast(LR)->isAllOnesValue() && - isNullConstant(RR)))) { + Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { SDLoc DL(N0); SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), LL, DAG.getConstant(1, DL, @@ -2872,16 +2945,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && N1C) + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N0; // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarType().getSizeInBits(); @@ -3065,7 +3138,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // For big endian targets, we need to add an offset to the pointer // to load the correct bytes. For little endian systems, we merely // need to read fewer bytes from the same pointer. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; @@ -3095,10 +3168,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return Combined; // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. @@ -3431,8 +3503,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { ISD::CondCode Op0 = cast(CC0)->get(); ISD::CondCode Op1 = cast(CC1)->get(); - if (LR == RR && isa(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { + if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { @@ -3443,8 +3514,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorklist(ANDNode.getNode()); @@ -3470,26 +3540,29 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { } // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - SDLoc DL(LocReference); - return DAG.getNode(ISD::AND, DL, VT, X, - DAG.getConstant(LHSMask | RHSMask, DL, VT)); + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } } } @@ -3595,9 +3668,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); - if (N0C && N1C) + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS if (isConstantIntBuildVectorOrConstantInt(N0) && @@ -3607,7 +3680,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (isNullConstant(N1)) return N0; // fold (or x, -1) -> -1 - if (N1C && N1C->isAllOnesValue()) + if (isAllOnesConstant(N1)) return N1; // fold (or x, c) -> c iff (x & ~c) == 0 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) @@ -3617,11 +3690,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Combined; // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) - SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) return BSwap; - BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) return BSwap; // reassociate or @@ -3642,10 +3713,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) @@ -3939,8 +4009,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS @@ -3976,7 +4046,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) - if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); @@ -3988,7 +4058,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -4000,7 +4070,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (N1C && N1C->isAllOnesValue() && + if (isAllOnesConstant(N1) && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa(RHS) || isa(LHS)) { @@ -4021,15 +4091,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { - ConstantSDNode *N00C = dyn_cast(N0.getOperand(0)); - ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); - if (N00C) { + if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), DL, VT)); } - if (N01C) { + if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { SDLoc DL(N); return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ @@ -4058,21 +4126,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // consistent result. // - Pushing the zero left requires shifting one bits in from the right. // A rotate left of ~1 is a nice way of achieving the desired result. - if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - if (auto *N1C = dyn_cast(N1.getNode())) - if (N0.getOpcode() == ISD::SHL) - if (auto *ShlLHS = dyn_cast(N0.getOperand(0))) - if (N1C->isAllOnesValue() && ShlLHS->isOne()) { - SDLoc DL(N); - return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), - N0.getOperand(1)); - } + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL + && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; // Simplify the expression using non-local knowledge. if (!VT.isVector() && @@ -4085,10 +4149,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { /// Handle transforms common to the three shifts, when the shift amount is a /// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { - // We can't and shouldn't fold opaque constants. - if (Amt->isOpaque()) - return SDValue(); - SDNode *LHS = N->getOperand(0).getNode(); if (!LHS->hasOneUse()) return SDValue(); @@ -4115,8 +4175,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = dyn_cast(LHS->getOperand(1)); - if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant. // If it is not a shift, it pessimizes some common cases like: @@ -4169,15 +4229,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { SDValue N01 = N->getOperand(0).getOperand(1); if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { - EVT TruncVT = N->getValueType(0); - SDValue N00 = N->getOperand(0).getOperand(0); - APInt TruncC = N01C->getAPIntValue(); - TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); - SDLoc DL(N); + if (!N01C->isOpaque()) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); - return DAG.getNode(ISD::AND, DL, TruncVT, - DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), - DAG.getConstant(TruncC, DL, TruncVT)); + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); + } } } @@ -4231,14 +4293,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<(N0); - if (N0C && N1C) + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 if (isNullConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef - if (N1C && N1C->getZExtValue() >= OpSizeInBits) + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -4325,6 +4387,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 + if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && + cast(N0)->Flags.hasExact()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t C1 = N0C1->getZExtValue(); + uint64_t C2 = N1C->getZExtValue(); + SDLoc DL(N); + if (C1 <= C2) + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(C2 - C1, DL, N1.getValueType())); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), + DAG.getConstant(C1 - C2, DL, N1.getValueType())); + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -4377,12 +4455,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); } - if (N1C) { - SDValue NewSHL = visitShiftByConstant(N, N1C); - if (NewSHL.getNode()) - return NewSHL; + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Folded = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C); + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); + } } + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + return NewSHL; + return SDValue(); } @@ -4402,14 +4486,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra c1, c2) -> (sra c1, c2) - ConstantSDNode *N0C = dyn_cast(N0); - if (N0C && N1C) + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 if (isNullConstant(N0)) return N0; // fold (sra -1, x) -> -1 - if (N0C && N0C->isAllOnesValue()) + if (isAllOnesConstant(N0)) return N0; // fold (sra x, (setge c, size(x))) -> undef if (N1C && N1C->getZExtValue() >= OpSizeInBits) @@ -4526,11 +4610,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); - if (N1C) { - SDValue NewSRA = visitShiftByConstant(N, N1C); - if (NewSRA.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N, N1C)) return NewSRA; - } return SDValue(); } @@ -4551,8 +4633,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl c1, c2) -> c1 >>u c2 - ConstantSDNode *N0C = dyn_cast(N0); - if (N0C && N1C) + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 if (isNullConstant(N0)) @@ -4687,8 +4769,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); } @@ -4697,15 +4778,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - if (N1C) { - SDValue NewSRL = visitShiftByConstant(N, N1C); - if (NewSRL.getNode()) + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N, N1C)) return NewSRL; - } // Attempt to convert a srl of a load into a narrower zero-extending load. - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // Here is a common situation. We want to optimize: @@ -4740,12 +4818,25 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBSWAP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bswap c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + // fold (bswap (bswap x)) -> x + if (N0.getOpcode() == ISD::BSWAP) + return N0->getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4755,7 +4846,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4765,7 +4856,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4775,7 +4866,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4785,7 +4876,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4843,8 +4934,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return !N0C->isNullValue() ? N1 : N2; } // fold (select C, 1, X) -> (or C, X) - ConstantSDNode *N1C = dyn_cast(N1); - if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) + if (VT == MVT::i1 && isOneConstant(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) // We can't do this reliably if integer based booleans have different contents @@ -4855,14 +4945,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. - ConstantSDNode *N2C = dyn_cast(N2); if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && TLI.getBooleanContents(false, false) == TLI.getBooleanContents(false, true) && TLI.getBooleanContents(false, false) == TargetLowering::ZeroOrOneBooleanContent)) && - isNullConstant(N1) && N2C && N2C->isOne()) { + isNullConstant(N1) && isOneConstant(N2)) { SDValue XORNode; if (VT == VT0) { SDLoc DL(N); @@ -4884,7 +4973,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) - if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); @@ -4894,81 +4983,58 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) - if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) + if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) - if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) + if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) return SDValue(N, 0); // Don't revisit N. - // fold selects based on a setcc into other things, such as min/max/abs - if (N0.getOpcode() == ISD::SETCC) { - // select x, y (fcmp lt x, y) -> fminnum x, y - // select x, y (fcmp gt x, y) -> fmaxnum x, y - // - // This is OK if we don't care about what happens if either operand is a - // NaN. - // - - // FIXME: Instead of testing for UnsafeFPMath, this should be checking for - // no signed zeros as well as no nans. - const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && - DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { - ISD::CondCode CC = cast(N0.getOperand(2))->get(); - - SDValue FMinMax = - combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), - N1, N2, CC, TLI, DAG); - if (FMinMax) - return FMinMax; - } - - if ((!LegalOperations && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); - } - if (VT0 == MVT::i1) { - if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { - // select (and Cond0, Cond1), X, Y - // -> select Cond0, (select Cond1, X, Y), Y - if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its prefered form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence + = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, InnerSelect, N2); - } - // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) - if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { - SDValue Cond0 = N0->getOperand(0); - SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, InnerSelect); - } } // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y - if (N1->getOpcode() == ISD::SELECT) { + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { SDValue N1_0 = N1->getOperand(0); SDValue N1_1 = N1->getOperand(1); SDValue N1_2 = N1->getOperand(2); if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), N0, N1_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, @@ -4981,13 +5047,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y - if (N2->getOpcode() == ISD::SELECT) { + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { SDValue N2_0 = N2->getOperand(0); SDValue N2_1 = N2->getOperand(1); SDValue N2_2 = N2->getOperand(2); if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. - if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), N0, N2_0); return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, @@ -5001,6 +5067,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + + if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, CC, + TLI, DAG)) + return FMinMax; + } + + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(SDLoc(N), N0, N1, N2); + } + return SDValue(); } @@ -5119,7 +5217,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), + getMachineMemOperand(MSC->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); @@ -5258,7 +5356,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -5455,8 +5553,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (N1.getOpcode() == ISD::CONCAT_VECTORS && N2.getOpcode() == ISD::CONCAT_VECTORS && ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { - SDValue CV = ConvertSelectToConcatVector(N, DAG); - if (CV.getNode()) + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) return CV; } @@ -5512,12 +5609,12 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } -// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext -// dag node into a ConstantSDNode or a build_vector of constants. -// This function is called by the DAGCombiner when visiting sext/zext/aext -// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). -// Vector extends are not folded if operations are legal; this is to -// avoid introducing illegal build_vector dag nodes. +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +/// a build_vector of constants. +/// This function is called by the DAGCombiner when visiting sext/zext/aext +/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +/// Vector extends are not folded if operations are legal; this is to +/// avoid introducing illegal build_vector dag nodes. static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes, bool LegalOperations) { @@ -5526,7 +5623,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -5546,9 +5644,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); - unsigned ShAmt = VTBits - EVTBits; SmallVector Elts; - unsigned NumElts = N0->getNumOperands(); + unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); for (unsigned i=0; i != NumElts; ++i) { @@ -5559,14 +5656,13 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, } SDLoc DL(Op); - ConstantSDNode *CurrentND = cast(Op); - const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); - if (Opcode == ISD::SIGN_EXTEND) - Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - DL, SVT)); + // Get the constant value and if needed trunc it to the size of the type. + // Nodes like build_vector might have constants wider than the scalar type. + APInt C = cast(Op)->getAPIntValue().zextOrTrunc(EVTBits); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); else - Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), - DL, SVT)); + Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); @@ -5770,8 +5866,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6053,8 +6148,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6066,32 +6160,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (truncate x)) -> (and x, mask) - if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { - + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { - SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorklist(oye); } - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return SDValue(N, 0); // Return N so it doesn't get rechecked! } - SDValue Op = N0.getOperand(0); - if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT)) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = N0.getOperand(0); + if (SrcVT.bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } else if (SrcVT.bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } - return DAG.getZeroExtendInReg(Op, SDLoc(N), - N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -6311,8 +6418,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); - if (NarrowLoad.getNode()) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { SDNode* oye = N0.getNode()->getOperand(0).getNode(); if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); @@ -6472,15 +6578,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) break; - if (ConstantSDNode *RHSC = dyn_cast(V.getOperand(1))) { + if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { // See if we can recursively simplify the LHS. unsigned Amt = RHSC->getZExtValue(); // Watch out for shift count overflow though. if (Amt >= Mask.getBitWidth()) break; APInt NewMask = Mask << Amt; - SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); - if (SimplifyLHS.getNode()) + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -6609,7 +6714,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) { + if (DAG.getDataLayout().isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; @@ -6704,8 +6809,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (load x)) -> (smaller sextload x) // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) - SDValue NarrowLoad = ReduceLoadWidth(N); - if (NarrowLoad.getNode()) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) return NarrowLoad; // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) @@ -6790,10 +6894,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -6846,7 +6964,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); - EVT IndexTy = TLI.getVectorIdxTy(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), @@ -6918,9 +7036,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { - SDValue Reduced = ReduceLoadWidth(N); - if (Reduced.getNode()) + if (SDValue Reduced = ReduceLoadWidth(N)) return Reduced; + // Handle the case where the load remains an extending load even // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { @@ -7013,8 +7131,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { !LD2->isVolatile() && DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) @@ -7070,13 +7188,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Do not change the width of a volatile load. !cast(N0)->isVolatile() && // Do not remove the cast if the types differ in endian layout. - TLI.hasBigEndianPartOrdering(N0.getValueType()) == - TLI.hasBigEndianPartOrdering(VT) && + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -7159,11 +7277,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. - if (N0.getOpcode() == ISD::BUILD_PAIR) { - SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); - if (CombineLD.getNode()) + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) return CombineLD; - } // Remove double bitcasts from shuffles - this is often a legacy of // XformToShuffleWithZero being used to combine bitmaskings (of @@ -7176,10 +7292,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { ShuffleVectorSDNode *SVN = cast(N0); // If operands are a bitcast, peek through if it casts the original VT. - // If operands are a UNDEF or constant, just bitcast back to original VT. + // If operands are a constant, just bitcast back to original VT. auto PeekThroughBitcast = [&](SDValue Op) { if (Op.getOpcode() == ISD::BITCAST && - Op.getOperand(0)->getValueType(0) == VT) + Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) @@ -7244,8 +7360,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { DstEltVT, BV->getOperand(0))); SmallVector Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue Op = BV->getOperand(i); + for (SDValue Op : BV->op_values()) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) @@ -7289,7 +7404,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; i += NumInputsPerOutput) { - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); APInt NewBits = APInt(DstBitSize, 0); bool EltIsUndef = true; for (unsigned j = 0; j != NumInputsPerOutput; ++j) { @@ -7320,13 +7435,13 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { + for (const SDValue &Op : BV->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } - APInt OpVal = cast(BV->getOperand(i))-> + APInt OpVal = cast(Op)-> getAPIntValue().zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { @@ -7336,7 +7451,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } // For big endian targets, swap the order of the pieces of each element. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } @@ -7373,6 +7488,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { bool Aggressive = TLI.enableAggressiveFMAFusion(VT); bool LookThroughFPExt = TLI.isFPExtFree(VT); + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && N0.getOpcode() == ISD::FMUL && + N1.getOpcode() == ISD::FMUL) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { @@ -7827,7 +7950,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { bool AllowNewConst = (Level < AfterLegalizeDAG); // fold (fadd A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) @@ -7923,8 +8046,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // enable-unsafe-fp-math // FADD -> FMA combines: - SDValue Fused = visitFADDForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -7958,11 +8080,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { // (fsub A, 0) -> A - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N0; // (fsub 0, B) -> -B - if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -7988,8 +8110,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // FSUB -> FMA combines: - SDValue Fused = visitFSUBForFMACombine(N); - if (Fused) { + if (SDValue Fused = visitFSUBForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -8028,7 +8149,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (Options.UnsafeFPMath) { // fold (fmul A, 0) -> 0 - if (N1CFP && N1CFP->getValueAPF().isZero()) + if (N1CFP && N1CFP->isZero()) return N1; // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) @@ -8041,7 +8162,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { auto *BV1 = dyn_cast(N1); auto *BV00 = dyn_cast(N00); auto *BV01 = dyn_cast(N01); - + // Check 1: Make sure that the first operand of the inner multiply is NOT // a constant. Otherwise, we may induce infinite looping. if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { @@ -8059,7 +8180,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs // during an early run of DAGCombiner can prevent folding with fmuls // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + if (N0.getOpcode() == ISD::FADD && + (N0.getOperand(0) == N0.getOperand(1)) && + N0.hasOneUse()) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); @@ -8173,6 +8296,66 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return SDValue(); } +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different target +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + // Skip if current node is a reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector Users; + for (auto *U : N1->uses()) + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) + Users.insert(U); + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + // FIXME: This optimization requires some level of fast-math, so the + // created reciprocal node should at least have the 'allowReciprocal' + // fast-math-flag set. + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8273,44 +8456,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } - // Combine multiple FDIVs with the same divisor into multiple FMULs by the - // reciprocal. - // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) - // Notice that this is not always beneficial. One reason is different target - // may have different costs for FDIV and FMUL, so sometimes the cost of two - // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason - // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". - if (Options.UnsafeFPMath) { - // Skip if current node is a reciprocal. - if (N0CFP && N0CFP->isExactlyValue(1.0)) - return SDValue(); - - SmallVector Users; - // Find all FDIV users of the same divisor. - for (SDNode::use_iterator UI = N1.getNode()->use_begin(), - UE = N1.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = UI.getUse().getUser(); - if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) - Users.push_back(User); - } - - if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDLoc DL(N); - SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // floating point 1.0 - SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); - - // Dividend / Divisor -> Dividend * Reciprocal - for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { - if ((*I)->getOperand(0) != FPOne) { - SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, - (*I)->getOperand(0), Reciprocal); - DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); - } - } - return SDValue(); - } - } + if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) + return CombineRepeatedDivisors; return SDValue(); } @@ -8330,30 +8477,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (DAG.getTarget().Options.UnsafeFPMath && - !TLI.isFsqrtCheap()) { - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { - EVT VT = RV.getValueType(); - SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); - AddToWorklist(RV.getNode()); + if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + return SDValue(); - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - SDValue ZeroCmp = - DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT), - N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); + if (!RV) + return SDValue(); - RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - DL, VT, ZeroCmp, Zero, RV); - return RV; - } - } - return SDValue(); + EVT VT = RV.getValueType(); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); + + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, Zero, RV); } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { @@ -8747,7 +8893,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { } // (fneg (fmul c, x)) -> (fmul -c, x) - if (N0.getOpcode() == ISD::FMUL) { + if (N0.getOpcode() == ISD::FMUL && + (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); if (CFP1) { APFloat CVal = CFP1->getValueAPF(); @@ -8950,8 +9097,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = TheXor->getOperand(1); if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. - SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode()) { + if (SDValue Tmp = visitXOR(TheXor)) { if (Tmp.getNode() != TheXor) { DEBUG(dbgs() << "\nReplacing.8 "; TheXor->dump(&DAG); @@ -8973,12 +9119,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (ConstantSDNode *RHSCI = dyn_cast(Op0)) - if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); - Equal = true; - } + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } EVT SetCCVT = N1.getValueType(); if (LegalTypes) @@ -9033,14 +9178,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT; + unsigned AS; + if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); } else if (StoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); } else return false; @@ -9064,7 +9213,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); } /// Try turning a load/store into a pre-indexed load/store when the base @@ -9634,8 +9784,8 @@ struct LoadedSlice { void addSliceGain(const LoadedSlice &LS) { // Each slice saves a truncate. const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); - if (!TLI.isTruncateFree(LS.Inst->getValueType(0), - LS.Inst->getOperand(0).getValueType())) + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) ++Truncates; // If there is a shift amount, this slice gets rid of it. if (LS.Shift) @@ -9789,8 +9939,7 @@ struct LoadedSlice { /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); - bool IsBigEndian = - DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + bool IsBigEndian = DAG->getDataLayout().isBigEndian(); assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); uint64_t Offset = Shift / 8; unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; @@ -9873,7 +10022,7 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment constraint. - unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( ResVT.getTypeForEVT(*DAG->getContext())); if (RequiredAlignment > getAlignment()) @@ -10154,8 +10303,8 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { return Result; // Fail. else { bool isOk = false; - for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) - if (Chain->getOperand(i).getNode() == LD) { + for (const SDValue &ChainOp : Chain->op_values()) + if (ChainOp.getNode() == LD) { isOk = true; break; } @@ -10241,7 +10390,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, unsigned StOffset; unsigned NewAlign = St->getAlignment(); - if (DAG.getTargetLoweringInfo().isLittleEndian()) + if (DAG.getDataLayout().isLittleEndian()) StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; @@ -10354,12 +10503,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (TLI.isBigEndian()) + if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) + if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), @@ -10423,7 +10572,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { unsigned LDAlign = LD->getAlignment(); unsigned STAlign = ST->getAlignment(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); + unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -10538,6 +10687,18 @@ struct BaseIndexOffset { }; } // namespace +SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef Stores, + EVT Ty) const { + SmallVector BuildVector; + + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) + BuildVector.push_back(cast(Stores[I].MemNode)->getValue()); + + return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); +} + bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector) { @@ -10568,12 +10729,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - // A vector store with a constant source implies that the constant is - // zero; we only handle merging stores of constant zeros because the zero - // can be materialized without a load. - // It may be beneficial to loosen this restriction to allow non-zero - // store merging. - StoredVal = DAG.getConstant(0, DL, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumElem ; ++i) { @@ -10593,28 +10749,28 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. - bool IsLE = TLI.isLittleEndian(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); for (unsigned i = 0; i < NumElem ; ++i) { unsigned Idx = IsLE ? (NumElem - 1 - i) : i; StoreSDNode *St = cast(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes*8; + StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zext(StoreBW); + StoreInt |= C->getAPIntValue().zext(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } @@ -10649,73 +10805,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return true; } -static bool allowableAlignment(const SelectionDAG &DAG, - const TargetLowering &TLI, EVT EVTTy, - unsigned AS, unsigned Align) { - if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) - return true; - - Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty); - return (Align >= ABIAlignment); -} - -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { - if (OptLevel == CodeGenOpt::None) - return false; - - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::NoImplicitFloat); - - // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) - return false; - - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants, loads, or extracted vector elements. - SDValue StoredVal = St->getValue(); - bool IsLoadSrc = isa(StoredVal); - bool IsConstantSrc = isa(StoredVal) || - isa(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) - return false; - - // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 0); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; - +void DAGCombiner::getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl &StoreNodes, + SmallVectorImpl &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. if (!BasePtr.Base.getNode()) - return false; + return; // Do not handle stores to undef base pointers. if (BasePtr.Base.getOpcode() == ISD::UNDEF) - return false; - - // Save the LoadSDNodes that we find in the chain. - // We need to make sure that these nodes do not interfere with - // any of the store nodes. - SmallVector AliasLoadNodes; - - // Save the StoreSDNodes that we find in the chain. - SmallVector StoreNodes; + return; // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. + EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; while (Index) { @@ -10772,6 +10880,50 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } } } +} + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants, loads, or extracted vector elements. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa(StoredVal); + bool IsConstantSrc = isa(StoredVal) || + isa(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 0); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector StoreNodes; + + getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) @@ -10818,6 +10970,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { @@ -10838,36 +10992,44 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } // Find a legal type for the constant store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign)) { LastLegalType = i+1; // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign)) { LastLegalType = i + 1; } } // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign)) { LastLegalVectorType = i + 1; } } - // We only use vectors if the constant is known to be zero and the - // function is not marked with the noimplicitfloat attribute. - if (NonZero || NoVectors) + + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if (NoVectors) { LastLegalVectorType = 0; + } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, + LastLegalVectorType, + FirstStoreAS)) { + LastLegalVectorType = 0; + } // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) @@ -10896,9 +11058,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Find a legal type for the vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); if (TLI.isTypeLegal(Ty) && - allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign)) NumElem = i + 1; } @@ -10986,33 +11149,37 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LastConsecutiveLoad = i; // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign)) { LastLegalVectorType = i + 1; } // Find a legal type for the integer store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && - allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign)) LastLegalIntegerType = i + 1; // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, - FirstStoreAlign) && - allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, - FirstLoadAlign)) + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstLoadAS, FirstLoadAlign)) LastLegalIntegerType = i+1; } } @@ -11047,10 +11214,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); @@ -11104,8 +11271,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = TLI.getDataLayout()-> - getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -11164,7 +11331,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); @@ -11210,8 +11378,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try transforming a pair floating point load / store ops to integer // load / store ops. - SDValue NewST = TransformFPLoadStorePair(N); - if (NewST.getNode()) + if (SDValue NewST = TransformFPLoadStorePair(N)) return NewST; bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA @@ -11413,7 +11580,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VecEltVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) @@ -11547,7 +11714,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // scalar_to_vector here as well. if (!LegalOperations) { - EVT IndexTy = TLI.getVectorIdxTy(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } @@ -11724,7 +11891,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!ValidTypes) return SDValue(); - bool isLE = TLI.isLittleEndian(); + bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): @@ -11873,9 +12040,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (Op.getOpcode() == ISD::UNDEF) continue; // See if we can combine this build_vector into a blend with a zero vector. - if (!VecIn2.getNode() && (isNullConstant(Op) || - (Op.getOpcode() == ISD::ConstantFP && - cast(Op.getNode())->getValueAPF().isZero()))) { + if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { UsesZeroVector = true; continue; } @@ -11980,10 +12145,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); - VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, dl, TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecIn1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); } else return SDValue(); } @@ -12050,7 +12218,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { } // If any of the operands is a floating point scalar bitcast to a vector, - // use floating point types throughout, and bitcast everything. + // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. if (AnyFP) { SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); @@ -12073,12 +12241,81 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); } -SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { - // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of - // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector - // inputs come from at most two distinct vectors, turn this into a shuffle - // node. +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector Mask; + for (SDValue Op : N->ops()) { + // Peek through any bitcast. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + if (ExtVec.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + EVT ExtVT = ExtVec.getValueType(); + if (!isa(Op.getOperand(1))) + return SDValue(); + int ExtIdx = cast(Op.getOperand(1))->getZExtValue(); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + + // At most we can reference 2 inputs in the final shuffle. + if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) return N->getOperand(0); @@ -12179,6 +12416,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -12881,7 +13123,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue RHS = N->getOperand(1); SDLoc dl(N); - // Make sure we're not running after operation legalization where it + // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. if (LegalOperations) return SDValue(); @@ -12892,38 +13134,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + SmallVector Indices; - unsigned NumElts = RHS.getNumOperands(); + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.getOpcode() == ISD::UNDEF) { + Indices.push_back(-1); + continue; + } - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (const ConstantSDNode *EltC = dyn_cast(Elt)) { - if (EltC->isAllOnesValue()) - Indices.push_back(i); - else if (EltC->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } else { + APInt Bits; + if (isa(Elt)) + Bits = cast(Elt)->getAPIntValue(); + else if (isa(Elt)) + Bits = cast(Elt)->getValueAPF().bitcastToAPInt(); + else return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + } else { + Bits = Bits.lshr(SubIdx * NumSubBits); } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) + Indices.push_back(i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); + else + return SDValue(); } // Let's see if the target supports this vector_shuffle. - EVT RVT = RHS.getValueType(); - if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, dl, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); - } + SDValue Zero = DAG.getConstant(0, dl, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + DAG.getBitcast(ClearVT, LHS), + Zero, &Indices[0])); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; return SDValue(); } @@ -12936,9 +13216,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (SDValue Shuffle = XformToShuffleWithZero(N)) - return Shuffle; - // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. if (LHS.getOpcode() == ISD::BUILD_VECTOR && @@ -12957,7 +13234,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || N->getOpcode() == ISD::FDIV) { if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && - cast(RHSOp.getNode())->getValueAPF().isZero())) + cast(RHSOp.getNode())->isZero())) break; } @@ -12989,6 +13266,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); } + // Try to convert a constant mask AND into a shuffle clear mask. + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). @@ -13221,7 +13502,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast(N1)) { // Allow either -0.0 or 0.0 - if (CFP->getValueAPF().isZero()) { + if (CFP->isZero()) { // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && N3.getOpcode() == ISD::FNEG && @@ -13259,12 +13540,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, const_cast(TV->getConstantFPValue()) }; Type *FPTy = Elts[0]->getType(); - const DataLayout &TD = *TLI.getDataLayout(); + const DataLayout &TD = DAG.getDataLayout(); // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); - SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), - TD.getPrefTypeAlignment(FPTy)); + SDValue CPIdx = + DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); unsigned Alignment = cast(CPIdx)->getAlignment(); // Get the offsets to the 0 and 1 element of the array so that we can @@ -13283,17 +13565,18 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorklist(CPIdx.getNode()); - return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, - false, false, Alignment); + return DAG.getLoad( + TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); } } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) - if (N1C && isNullConstant(N3) && CC == ISD::SETLT && - (N1C->isNullValue() || // (a < 0) ? b : 0 - (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 + if (isNullConstant(N3) && CC == ISD::SETLT && + (isNullConstant(N1) || // (a < 0) ? b : 0 + (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { @@ -13368,7 +13651,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. - if (NotExtCompare && N2C->getAPIntValue() == 1) + if (NotExtCompare && N2C->isOne()) return SDValue(); // Get a SetCC of the condition @@ -13396,7 +13679,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, AddToWorklist(SCC.getNode()); AddToWorklist(Temp.getNode()); - if (N2C->getAPIntValue() == 1) + if (N2C->isOne()) return Temp; // shl setcc result by log2 n2c @@ -13410,7 +13693,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Check to see if this is the equivalent of setcc // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. - if (0 && isNullConstant(N3) && N2C && (N2C->getAPIntValue() == 1ULL)) { + if (0 && isNullConstant(N3) && isOneConstant(N2)) { EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { @@ -13442,7 +13725,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { + if (isAllOnesConstant(N1) && CC == ISD::SETGT) { SDLoc DL(N0); SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, DAG.getConstant(XType.getSizeInBits() - 1, DL, @@ -13737,6 +14020,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are both volatile then they cannot be reordered. if (Op0->isVolatile() && Op1->isVolatile()) return true; + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. These should really be checking the equivalent of mayWrite, + // but it only matters for memory nodes other than load /store. + if (Op0->isInvariant() && Op1->writeMem()) + return false; + + if (Op1->isInvariant() && Op0->writeMem()) + return false; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -13805,14 +14097,12 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { Op0->getSrcValueOffset() - MinOffset; int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + Op1->getSrcValueOffset() - MinOffset; - AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), - Overlap1, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - AliasAnalysis::Location(Op1->getMemOperand()->getValue(), - Overlap2, - UseTBAA ? Op1->getAAInfo() : AAMDNodes())); - if (AAResult == AliasAnalysis::NoAlias) + AliasResult AAResult = + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + if (AAResult == NoAlias) return false; } @@ -13838,8 +14128,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { - SDValue Chain = Chains.back(); - Chains.pop_back(); + SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the // chain until we find two aliases. If we've seen two aliases, assume we'll @@ -13946,7 +14235,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, UIE = M->use_end(); UI != UIE; ++UI) if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI).second) { - if (isa(*UI) || isa(*UI)) { + if (isa(*UI)) { // We've not visited this use, and we care about it (it could have an // ordering dependency with the original node). Aliases.clear();