X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=66c1356152da93eca6940abbc9c4dc8fbeee7fb9;hp=fecbe9fec8dfc8a6fa565f7756e4868df01ae09b;hb=959b27677107891425e123121d46bfda0f398c9a;hpb=a631cd85ca010f303e64362a03bd8ecce8b3a2fc diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fecbe9fec8d..66c1356152d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -246,10 +246,11 @@ namespace { SDValue visitSDIVREM(SDNode *N); SDValue visitUDIVREM(SDNode *N); SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); - SDValue SimplifyVUnaryOp(SDNode *N); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -302,9 +303,14 @@ namespace { SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); + + SDValue visitFADDForFMACombine(SDNode *N); + SDValue visitFSUBForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -385,7 +391,7 @@ namespace { bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); - + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -586,7 +592,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::ConstantFP: { APFloat V = cast(Op)->getValueAPF(); V.changeSign(); - return DAG.getConstantFP(V, Op.getValueType()); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: // FIXME: determine better conditions for this xform. @@ -703,13 +709,23 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { EltVT.getSizeInBits() >= SplatBitSize); } -// \brief Returns the SDNode if it is a constant BuildVector or constant. -static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa(N)) return N.getNode(); - BuildVectorSDNode *BV = dyn_cast(N); - if (BV && BV->isConstant()) - return BV; + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// \brief Returns the SDNode if it is a constant float BuildVector +// or constant float. +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); return nullptr; } @@ -755,10 +771,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -775,10 +791,10 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } @@ -1180,11 +1196,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; - // Early exit if this basic block is in an optnone function. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::OptimizeNone)) - return; - // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -1369,9 +1380,11 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSTORE: return visitMSTORE(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); } return SDValue(); } @@ -1437,9 +1450,10 @@ SDValue DAGCombiner::combine(SDNode *N) { SDNode *CSENode; if (const BinaryWithFlagsSDNode *BinNode = dyn_cast(N)) { - CSENode = DAG.getNodeIfExists( - N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), - BinNode->hasNoSignedWrap(), BinNode->isExact()); + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + BinNode->Flags.hasNoUnsignedWrap(), + BinNode->Flags.hasNoSignedWrap(), + BinNode->Flags.hasExact()); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -1565,14 +1579,12 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (add x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1587,10 +1599,13 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) @@ -1604,14 +1619,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) { (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, + if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), VT), + N0C->getAPIntValue(), DL, VT), N0.getOperand(1)); + } // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode()) + if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa(N0.getOperand(0)) && @@ -1727,7 +1743,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); } } @@ -1738,8 +1754,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. @@ -1749,6 +1763,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); @@ -1780,10 +1796,10 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && !N1C) return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); @@ -1801,25 +1817,21 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes) { if (!VT.isVector()) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); return SDValue(); } SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : - dyn_cast(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sub x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N1.getNode())) @@ -1831,12 +1843,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = dyn_cast(N0.getNode()); + ConstantSDNode *N1C = dyn_cast(N1.getNode()); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) - if (N1C) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, - DAG.getConstant(-N1C->getAPIntValue(), VT)); + if (N1C) { + SDLoc DL(N); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); @@ -1850,10 +1866,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) return N0.getOperand(0); // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast(N1.getOperand(1).getNode()); if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDLoc DL(N); SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), - VT); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, + DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1894,7 +1913,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (GlobalAddressSDNode *GB = dyn_cast(N1)) if (GA->getGlobal() == GB->getGlobal()) return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), - VT); + SDLoc(N), VT); } // sub X, (sextinreg Y i1) -> add X, (and Y 1) @@ -1903,7 +1922,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (TN->getVT() == MVT::i1) { SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); } } @@ -1914,8 +1933,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue DAGCombiner::visitSUBC(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an SUB. @@ -1925,12 +1942,16 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { MVT::Glue)); // fold (subc x, x) -> 0 + no borrow - if (N0 == N1) - return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), + if (N0 == N1) { + SDLoc DL(N); + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + } // fold (subc x, 0) -> x + no borrow + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); @@ -1963,33 +1984,35 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; bool N1IsConst = false; APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast(N0) != nullptr; - ConstValue0 = N0IsConst ? (dyn_cast(N0))->getAPIntValue() - : APInt(); - N1IsConst = dyn_cast(N1) != nullptr; - ConstValue1 = N1IsConst ? (dyn_cast(N1))->getAPIntValue() - : APInt(); + N0IsConst = isa(N0); + if (N0IsConst) + ConstValue0 = cast(N0)->getAPIntValue(); + N1IsConst = isa(N1); + if (N1IsConst) + ConstValue1 = cast(N1)->getAPIntValue(); } // fold (mul c1, c2) -> c1*c2 if (N0IsConst && N1IsConst) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); - // canonicalize constant to RHS - if (N0IsConst && !N1IsConst) + // canonicalize constant to RHS (vector doesn't have to splat) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2002,23 +2025,28 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1IsConst && ConstValue1 == 1 && IsFullSplat) return N0; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(ConstValue1.logBase2(), + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) { + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ConstValue1.logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, - DAG.getConstant(Log2Val, + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, getShiftAmountTy(N0.getValueType())))); } @@ -2069,8 +2097,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode()) + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) return RMUL; return SDValue(); @@ -2079,26 +2106,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (sdiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); // fold (sdiv X, 1) -> X if (N1C && N1C->getAPIntValue() == 1LL) return N0; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), N0); + if (N1C && N1C->isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { @@ -2121,24 +2149,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return Res; unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + SDLoc DL(N); // Splat the sign bit into the register SDValue SGN = - DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = - DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, + DAG.getNode(ISD::SRL, DL, VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); AddToWorklist(SRL.getNode()); AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, + DAG.getConstant(lg2, DL, + getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -2146,10 +2176,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } - // if integer divide is expensive and we satisfy the requirements, emit an + // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); @@ -2158,7 +2188,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2169,36 +2199,38 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (udiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1C && N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::SRL, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, getShiftAmountTy(N0.getValueType()))); + } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, + SDLoc DL(N); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), - ADDVT)); + DL, ADDVT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); } } } @@ -2210,7 +2242,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // undef / X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X / undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2221,13 +2253,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, N0C, N1C); // If we know the sign bits of both operands are zero, strength reduce to a // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { @@ -2252,7 +2284,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2263,27 +2295,30 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = isConstOrConstSplat(N0); - ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N0C && N1C && !N1C->isNullValue()) - return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) - if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, - DAG.getConstant(N1C->getAPIntValue()-1,VT)); + if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { + SDLoc DL(N); SDValue Add = - DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), + DAG.getNode(ISD::ADD, DL, VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT)); AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2305,7 +2340,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // undef % X -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // X % undef -> undef if (N1.getOpcode() == ISD::UNDEF) return N1; @@ -2324,13 +2359,16 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (N1C && N1C->isNullValue()) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, + if (N1C && N1C->getAPIntValue() == 1) { + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + DL, getShiftAmountTy(N0.getValueType()))); + } // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -2343,7 +2381,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2363,10 +2402,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return N1; // fold (mulhu x, 1) -> 0 if (N1C && N1C->getAPIntValue() == 1) - return DAG.getConstant(0, N0.getValueType()); + return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2379,7 +2418,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } @@ -2445,8 +2485,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2457,7 +2497,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2475,8 +2516,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); - // If the type twice as wide is legal, transform the mulhu to a wider multiply - // plus a shift. + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); @@ -2487,7 +2528,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); @@ -2643,7 +2685,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // build vector of all zeros that might be illegal at this stage. if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2664,7 +2706,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = N0->getOperand(0); if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { if (!LegalTypes) - ShOp = DAG.getConstant(0, VT); + ShOp = DAG.getConstant(0, SDLoc(N), VT); else ShOp = SDValue(); } @@ -2685,19 +2727,122 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { return SDValue(); } +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(LocReference), VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast(CC0)->get(); + ISD::CondCode Op1 = cast(CC1)->get(); + + if (LR == RR && isa(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa(LR) && isa(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast(LR)->isNullValue() && + cast(RR)->isAllOnesValue()) || + (cast(LR)->isAllOnesValue() && + cast(RR)->isNullValue()))) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL(N0); + SDValue NewAdd = + DAG.getNode(ISD::ADD, DL, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N1.getValueType(); - unsigned BitWidth = VT.getScalarType().getSizeInBits(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -2705,13 +2850,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant( APInt::getNullValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getNullValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2720,25 +2865,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return N0; } - // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (and c1, c2) -> c1&c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) return N0; // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(BitWidth))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode()) + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -2809,9 +2954,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SplatBitSize = SplatBitSize * 2) SplatValue |= SplatValue.shl(SplatBitSize); - Constant = APInt::getAllOnesValue(BitWidth); - for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) - Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (SplatBitSize % BitWidth == 0) { + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } } } @@ -2864,117 +3013,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast(CC0)->get(); - ISD::CondCode Op1 = cast(CC1)->get(); - - if (LR == RR && isa(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) - if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) - if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - } - // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) - if (LL == RL && isa(LR) && isa(RR) && - Op0 == Op1 && LL.getValueType().isInteger() && - Op0 == ISD::SETNE && ((cast(LR)->isNullValue() && - cast(RR)->isAllOnesValue()) || - (cast(LR)->isAllOnesValue() && - cast(RR)->isNullValue()))) { - SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), - LL, DAG.getConstant(1, LL.getValueType())); - AddToWorklist(ADDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ADDNode, - DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - - // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) - if (N0.getOpcode() == N1.getOpcode()) { - SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); - if (Tmp.getNode()) return Tmp; - } - - // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) - // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - - // fold (zext_inreg (extload x)) -> (zextload x) - if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { - LoadSDNode *LN0 = cast(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use - if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { - LoadSDNode *LN0 = cast(N0); - EVT MemVT = LN0->getMemoryVT(); - // If we zero all the possible extended bits, then we can turn this into - // a zextload if we are running before legalize or the operation is legal. - unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); - if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - MemVT.getScalarType().getSizeInBits())) && - ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, - LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) @@ -3025,8 +3063,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); + SDLoc DL(LN0); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, + NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -3046,33 +3085,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } - if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { - if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal - // immediate for an add, but it is legal if its top c2 bits are set, - // transform the ADD so the immediate doesn't need to be materialized - // in a register. - if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); - if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { - ADDC |= Mask; - if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { - SDValue NewAdd = - DAG.getNode(ISD::ADD, SDLoc(N0), VT, - N0.getOperand(0), DAG.getConstant(ADDC, VT)); - CombineTo(N0.getNode(), NewAdd); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - } - } - } + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) { + SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); + if (Tmp.getNode()) return Tmp; } + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), @@ -3183,9 +3249,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); - if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, - DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } return Res; } @@ -3319,37 +3388,129 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); } - // Make sure the parts are all coming from the same node. - if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) - return SDValue(); + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); + + // Result of the bswap should be rotated by 16. If it's not legal, then + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + SDLoc(LocReference), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast(CC0)->get(); + ISD::CondCode Op1 = cast(CC1)->get(); + + if (LR == RR && isa(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (cast(LR)->isNullValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (cast(LR)->isAllOnesValue() && + (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N1.getOperand(1).getOpcode() == ISD::Constant && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = + cast(N0.getOperand(1))->getAPIntValue(); + const APInt &RHSMask = + cast(N1.getOperand(1))->getAPIntValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, - SDValue(Parts[0],0)); + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } - // Result of the bswap should be rotated by 16. If it's not legal, then - // do (x << 16) | (x >> 16). - SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); - if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); - if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, SDLoc(N), VT, - DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); } SDValue DAGCombiner::visitOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LL, LR, RL, RR, CC0, CC1; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (or x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3363,13 +3524,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getConstant( APInt::getAllOnesValue( N0.getValueType().getScalarType().getSizeInBits()), - N0.getValueType()); + SDLoc(N), N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) // do not return N1, because undef node may exist in N1 return DAG.getConstant( APInt::getAllOnesValue( N1.getValueType().getScalarType().getSizeInBits()), - N1.getValueType()); + SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3428,17 +3589,14 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } - // fold (or x, undef) -> -1 - if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { - EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; - return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); - } // fold (or c1, c2) -> c1|c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) @@ -3450,6 +3608,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) return N1; + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); if (BSwap.getNode()) @@ -3459,8 +3620,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode()) + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. @@ -3468,97 +3628,20 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + N1C, C1)) return DAG.getNode( ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); return SDValue(); } } - // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) - if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ - ISD::CondCode Op0 = cast(CC0)->get(); - ISD::CondCode Op1 = cast(CC1)->get(); - - if (LR == RR && isa(LR) && Op0 == Op1 && - LL.getValueType().isInteger()) { - // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) - // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) - if (cast(LR)->isNullValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ORNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); - } - // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) - // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) - if (cast(LR)->isAllOnesValue() && - (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), - LR.getValueType(), LL, RL); - AddToWorklist(ANDNode.getNode()); - return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); - } - } - // canonicalize equivalent to ll == rl - if (LL == RR && LR == RL) { - Op1 = ISD::getSetCCSwappedOperands(Op1); - std::swap(RL, RR); - } - if (LL == RL && LR == RR) { - bool isInteger = LL.getValueType().isInteger(); - ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); - if (Result != ISD::SETCC_INVALID && - (!LegalOperations || - (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && - TLI.isOperationLegal(ISD::SETCC, - getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(SDLoc(N), N0.getValueType(), - LL, LR, Result); - } - } - // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); if (Tmp.getNode()) return Tmp; } - // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(1).getOpcode() == ISD::Constant && - N1.getOperand(1).getOpcode() == ISD::Constant && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - // We can only do this xform if we know that bits from X that are set in C2 - // but not in C1 are already zero. Likewise for Y. - const APInt &LHSMask = - cast(N0.getOperand(1))->getAPIntValue(); - const APInt &RHSMask = - cast(N1.getOperand(1))->getAPIntValue(); - - if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && - DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, X, - DAG.getConstant(LHSMask | RHSMask, VT)); - } - } - - // (or (and X, M), (and X, N)) -> (and X, (or M, N)) - if (N0.getOpcode() == ISD::AND && - N1.getOpcode() == ISD::AND && - N0.getOperand(0) == N1.getOperand(0) && - // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { - SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(1), N1.getOperand(1)); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); - } - // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); @@ -3786,7 +3869,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { Mask &= cast(RHSMask)->getAPIntValue() | LHSBits; } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); } return Rot.getNode(); @@ -3828,15 +3911,12 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - SDValue LHS, RHS, CC; - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (xor x, 0) -> x, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) @@ -3847,27 +3927,30 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (xor undef, undef) -> 0. This is a common idiom (misuse). if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef if (N0.getOpcode() == ISD::UNDEF) return N0; if (N1.getOpcode() == ISD::UNDEF) return N1; // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (N0C && !N1C) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode()) + if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) return RXOR; // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), @@ -3892,8 +3975,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, - DAG.getConstant(1, V.getValueType())); + SDLoc DL(N0); + V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, + DAG.getConstant(1, DL, V.getValueType())); AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } @@ -3934,19 +4018,51 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N1C && N0.getOpcode() == ISD::XOR) { ConstantSDNode *N00C = dyn_cast(N0.getOperand(0)); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); - if (N00C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), + if (N00C) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ - N00C->getAPIntValue(), VT)); - if (N01C) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), + N00C->getAPIntValue(), DL, VT)); + } + if (N01C) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ - N01C->getAPIntValue(), VT)); + N01C->getAPIntValue(), DL, VT)); + } } // fold (xor x, x) -> 0 if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + if (auto *N1C = dyn_cast(N1.getNode())) + if (N0.getOpcode() == ISD::SHL) + if (auto *ShlLHS = dyn_cast(N0.getOperand(0))) + if (N1C->isAllOnesValue() && ShlLHS->isOne()) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); @@ -4052,10 +4168,11 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { SDValue N00 = N->getOperand(0).getOperand(0); APInt TruncC = N01C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); - return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, - DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), - DAG.getConstant(TruncC, TruncVT)); + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); } } @@ -4077,15 +4194,14 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast(N1); // If setcc produces all-one true value then: @@ -4099,7 +4215,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4109,8 +4226,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl c1, c2) -> c1<(N0); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 if (N0C && N0C->isNullValue()) return N0; @@ -4122,11 +4240,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return N0; // fold (shl undef, x) -> 0 if (N0.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -4143,10 +4261,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N0C1->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4166,12 +4285,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT InnerShiftVT = N0Op0.getValueType(); uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); if (c2 >= OpSizeInBits - InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, SDLoc(N0), VT, - DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), - DAG.getConstant(c1 + c2, N1.getValueType())); + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } } @@ -4189,8 +4309,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c1 == c2) { SDValue NewOp0 = N0.getOperand(0); EVT CountVT = NewOp0.getOperand(1).getValueType(); - SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), - NewOp0, DAG.getConstant(c2, CountVT)); + SDLoc DL(N); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), + NewOp0, + DAG.getConstant(c2, DL, CountVT)); AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } @@ -4211,25 +4333,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2 - c1); - Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { Mask = Mask.lshr(c1 - c2); - Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, N1.getValueType())); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); } - return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, - DAG.getConstant(Mask, VT)); + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); } } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { unsigned BitSize = VT.getScalarSizeInBits(); + SDLoc DL(N); SDValue HiBitsMask = DAG.getConstant(APInt::getHighBitsSet(BitSize, - BitSize - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), + BitSize - N1C->getZExtValue()), + DL, VT); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); } @@ -4257,22 +4384,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (sra c1, c2) -> (sra c1, c2) + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra 0, x) -> 0 if (N0C && N0C->isNullValue()) return N0; @@ -4305,8 +4432,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits - 1; - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(Sum, N1.getValueType())); + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum, DL, N1.getValueType())); } } @@ -4338,14 +4466,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, - getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, - N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, - Shift); - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), - N->getValueType(0), Trunc); + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); } } } @@ -4372,12 +4501,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT LargeVT = N0Op0.getValueType(); if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDLoc DL(N); SDValue Amt = - DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, getShiftAmountTy(N0Op0.getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); } } } @@ -4403,22 +4533,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold vector ops + ConstantSDNode *N1C = dyn_cast(N1); if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; N1C = isConstOrConstSplat(N1); } // fold (srl c1, c2) -> c1 >>u c2 + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 if (N0C && N0C->isNullValue()) return N0; @@ -4431,17 +4561,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // if (srl x, c) is known to be zero, return 0 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnesValue(OpSizeInBits))) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { uint64_t c1 = N01C->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); if (c1 + c2 >= OpSizeInBits) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(c1 + c2, N1.getValueType())); + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); } } @@ -4457,12 +4588,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); // This is only valid if the OpSizeInBits + c1 = size of inner shift. if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, - DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, ShiftCountVT))); + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); } } @@ -4471,8 +4604,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { unsigned BitSize = N0.getScalarValueSizeInBits(); if (BitSize <= 64) { uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), - DAG.getConstant(~0ULL >> ShAmt, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, DL, VT)); } } @@ -4486,14 +4620,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, N0.getOperand(0), - DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), - DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); } } @@ -4512,12 +4649,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); + if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. APInt UnknownBits = ~KnownZero; - if (UnknownBits == 0) return DAG.getConstant(1, VT); + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. if ((UnknownBits & (UnknownBits - 1)) == 0) { @@ -4529,13 +4666,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); AddToWorklist(Op.getNode()); } - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - Op, DAG.getConstant(1, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); } } @@ -4686,9 +4826,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); - ConstantSDNode *N2C = dyn_cast(N2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); @@ -4696,12 +4833,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1 == N2) return N1; // fold (select true, X, Y) -> X + ConstantSDNode *N0C = dyn_cast(N0); if (N0C && !N0C->isNullValue()) return N1; // fold (select false, X, Y) -> Y if (N0C && N0C->isNullValue()) return N2; // fold (select C, 1, X) -> (or C, X) + ConstantSDNode *N1C = dyn_cast(N1); if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) @@ -4713,6 +4852,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // undiscoverable (or not reasonably discoverable). For example, it could be // in another basic block or it could require searching a complicated // expression. + ConstantSDNode *N2C = dyn_cast(N2); if (VT.isInteger() && (VT0 == MVT::i1 || (VT0.isInteger() && TLI.getBooleanContents(false, false) == @@ -4721,11 +4861,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; - if (VT == VT0) - return DAG.getNode(ISD::XOR, SDLoc(N), VT0, - N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, - N0, DAG.getConstant(1, VT0)); + if (VT == VT0) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT0, + N0, DAG.getConstant(1, DL, VT0)); + } + SDLoc DL0(N0); + XORNode = DAG.getNode(ISD::XOR, DL0, VT0, + N0, DAG.getConstant(1, DL0, VT0)); AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); @@ -4792,6 +4935,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } @@ -4913,7 +5119,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MST->getAAInfo(), MST->getRanges()); @@ -4922,10 +5128,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), + getMachineMemOperand(MST->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, MST->getAAInfo(), MST->getRanges()); @@ -4988,7 +5194,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -4997,10 +5203,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -5056,7 +5262,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { EVT VT = LHS.getValueType(); SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -5064,6 +5270,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + // If the VSELECT result requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -5208,14 +5417,15 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, continue; } + SDLoc DL(Op); ConstantSDNode *CurrentND = cast(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); if (Opcode == ISD::SIGN_EXTEND) Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); else Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), - SVT)); + DL, SVT)); } return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); @@ -5381,7 +5591,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { Align, LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Stride, BasePtr.getValueType())); + DAG.getConstant(Stride, DL, BasePtr.getValueType())); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -5544,14 +5754,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5594,11 +5805,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDLoc DL(N); SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -5610,7 +5822,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, VT)); + NegOne, DAG.getConstant(0, DL, VT)); } } } @@ -5759,8 +5971,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (zext (load x)) -> (zext (truncate (zextload x))) @@ -5820,14 +6033,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - ExtLoad, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5865,19 +6079,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. EVT EltVT = VT.getVectorElementType(); + SDLoc DL(N); SmallVector OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, EltVT)); + DAG.getConstant(1, DL, EltVT)); if (VT.getSizeInBits() == N0VT.getSizeInBits()) // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); // If the desired elements are smaller or larger than the source @@ -5890,18 +6105,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSExtOrTrunc(VsetCC, DL, VT), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } @@ -5993,8 +6209,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - X, DAG.getConstant(Mask, VT)); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -6077,9 +6294,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); SDValue SCC = - SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, VT), DAG.getConstant(0, VT), + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -6100,7 +6318,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) - return DAG.getConstant(NewVal, V.getValueType()); + return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); break; } case ISD::OR: @@ -6260,9 +6478,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), + SDLoc DL(LN0); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType)); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -6291,11 +6510,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. + SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) - Result = DAG.getConstant(0, VT); + Result = DAG.getConstant(0, DL, VT); else - Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); } // Return the new loaded value. @@ -6422,7 +6642,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ConstantSDNode *CurrentND = cast(Op); const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), - Op.getValueType())); + SDLoc(Op), Op.getValueType())); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); @@ -6440,7 +6660,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isa(N0)) + if (isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -6493,9 +6713,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); + SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - SDLoc(N), TrTy, V, - DAG.getConstant(Index, IndexTy)); + DL, TrTy, V, + DAG.getConstant(Index, DL, IndexTy)); } } @@ -6741,13 +6962,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { N0.getOperand(0)); AddToWorklist(NewConv.getNode()); + SDLoc DL(N); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, SDLoc(N), VT, - NewConv, DAG.getConstant(SignBit, VT)); + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - NewConv, DAG.getConstant(~SignBit, VT)); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); } // fold (bitconvert (fcopysign cst, x)) -> @@ -6772,9 +6994,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, SDLoc(X), + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, X.getValueType(), X, - DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorklist(X.getNode()); @@ -6782,13 +7006,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, - X, DAG.getConstant(SignBit, VT)); + X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, - Cst, DAG.getConstant(~SignBit, VT)); + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); @@ -6802,6 +7026,51 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return CombineLD; } + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a UNDEF or constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0)->getValueType(0) == VT) + return SDValue(Op.getOperand(0)); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); + }; + + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + } + return SDValue(); } @@ -6870,6 +7139,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } + SDLoc DL(BV); + // Okay, we know the src/dst types are both integers of differing types. // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); @@ -6896,16 +7167,15 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (EltIsUndef) Ops.push_back(DAG.getUNDEF(DstEltVT)); else - Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input // turns into multiple outputs. - bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, NumOutputsPerInput*BV->getNumOperands()); @@ -6922,11 +7192,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) - // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - Ops[0]); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); OpVal = OpVal.lshr(DstBitSize); } @@ -6935,23 +7201,47 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } -// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad -static SDValue performFaddFmulCombines(unsigned FusedOpcode, - bool Aggressive, - SDNode *N, - const TargetLowering &TLI, - SelectionDAG &DAG) { +/// Try to perform FMA combining on a given FADD node. +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { + + + + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(FusedOpcode, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), N1); } @@ -6959,53 +7249,180 @@ static SDValue performFaddFmulCombines(unsigned FusedOpcode, // Note: Commutes FADD operands. if (N1.getOpcode() == ISD::FMUL && (Aggressive || N1->hasOneUse())) { - return DAG.getNode(FusedOpcode, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), N0); } + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); + } + } + // More folding opportunities when target permits. - if (Aggressive) { + if ((UnsafeFPMath || HasFMAD) && Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (N0.getOpcode() == ISD::FMA && + if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { - return DAG.getNode(FusedOpcode, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), N1)); } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (N1->getOpcode() == ISD::FMA && + if (N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { - return DAG.getNode(FusedOpcode, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), - DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(2).getOperand(0), N1.getOperand(2).getOperand(1), N0)); } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); + } + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); + } + } + + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (N120.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); + } + } + + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (N102.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); + } + } + } } return SDValue(); } -static SDValue performFsubFmulCombines(unsigned FusedOpcode, - bool Aggressive, - SDNode *N, - const TargetLowering &TLI, - SelectionDAG &DAG) { +/// Try to perform FMA combining on a given FSUB node. +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { + + + SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - SDLoc SL(N); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && + TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = ((!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && + UnsafeFPMath); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(FusedOpcode, SL, VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, N1)); } @@ -7014,7 +7431,7 @@ static SDValue performFsubFmulCombines(unsigned FusedOpcode, // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && (Aggressive || N1->hasOneUse())) - return DAG.getNode(FusedOpcode, SL, VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), N0); @@ -7025,41 +7442,214 @@ static SDValue performFsubFmulCombines(unsigned FusedOpcode, (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); - return DAG.getNode(FusedOpcode, SL, VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, DAG.getNode(ISD::FNEG, SL, VT, N1)); } + // Look through FP_EXTEND nodes to do more combining. + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + } + // More folding opportunities when target permits. - if (Aggressive) { + if ((UnsafeFPMath || HasFMAD) && Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (N0.getOpcode() == FusedOpcode && + if (N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL) { - return DAG.getNode(FusedOpcode, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(FusedOpcode, SDLoc(N), VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SL, VT, N1))); } // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (N1.getOpcode() == FusedOpcode && + if (N1.getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); - return DAG.getNode(FusedOpcode, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1), - DAG.getNode(FusedOpcode, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N20), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), + N21, N0)); } + + if (UnsafeFPMath && LookThroughFPExt) { + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (N120.getOpcode() == ISD::FMUL) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue N100 = N1.getOperand(0).getOperand(0); + SDValue N101 = N1.getOperand(0).getOperand(1); + SDValue N102 = N1.getOperand(0).getOperand(2); + if (N102.getOpcode() == ISD::FMUL) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } + } } return SDValue(); @@ -7074,10 +7664,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) @@ -7118,11 +7707,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, SDLoc(N), VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); + return DAG.getConstantFP(0.0, SDLoc(N), VT); // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -7134,20 +7723,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDLoc DL(N); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); } // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDLoc DL(N); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); } } @@ -7158,20 +7749,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDLoc DL(N); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + DAG.getConstantFP(1.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); } // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDLoc DL(N); + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + DAG.getConstantFP(2.0, DL, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); } } @@ -7179,18 +7772,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); + (N0.getOperand(0) == N1)) { + SDLoc DL(N); + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT)); + } } if (N1.getOpcode() == ISD::FADD && AllowNewConst) { ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N1.getOperand(0) == N0) { + SDLoc DL(N); + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT)); + } } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) @@ -7198,61 +7795,37 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } - } // enable-unsafe-fp-math - - if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { - // Assume if there is an fmad instruction that it should be aggressively - // used. - if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG)) - return Fused; - } - - // FADD -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - if (!TLI.isOperationLegal(ISD::FMAD, VT)) { - // Don't form FMA if we are preferring FMAD. - if (SDValue Fused - = performFaddFmulCombines(ISD::FMA, - TLI.enableAggressiveFMAFusion(VT), - N, TLI, DAG)) { - return Fused; + N0.getOperand(0) == N1.getOperand(0)) { + SDLoc DL(N); + return DAG.getNode(ISD::FMUL, DL, VT, + N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); } } - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), N1); - } - - // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), N0); + // Canonicalize chains of adds to LHS to simplify the following transform. + if (N0.getOpcode() != ISD::FADD && N1.getOpcode() == ISD::FADD) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); + + // Convert a chain of 3 dependent operations into 2 independent operations + // and 1 dependent operation: + // (fadd N0: (fadd N00: (fadd z, w), N01: y), N1: x) -> + // (fadd N00: (fadd z, w), (fadd N1: x, N01: y)) + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N1.getOpcode() != ISD::FADD) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FADD) { + SDValue N01 = N0.getOperand(1); + SDValue NewAdd = DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N01); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N00, NewAdd); } } + } // enable-unsafe-fp-math + + // FADD -> FMA combines: + SDValue Fused = visitFADDForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7268,14 +7841,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -7298,7 +7870,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub x, x) -> 0.0 if (N0 == N1) - return DAG.getConstantFP(0.0f, VT); + return DAG.getConstantFP(0.0f, dl, VT); // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) @@ -7314,96 +7886,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) { - // Assume if there is an fmad instruction that it should be aggressively - // used. - if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG)) - return Fused; - } - // FSUB -> FMA combines: - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && - (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { - - if (!TLI.isOperationLegal(ISD::FMAD, VT)) { - // Don't form FMA if we are preferring FMAD. - - if (SDValue Fused - = performFsubFmulCombines(ISD::FMA, - TLI.enableAggressiveFMAFusion(VT), - N, TLI, DAG)) { - return Fused; - } - } - - // When FP_EXTEND nodes are free on the target, and there is an opportunity - // to combine into FMA, arrange such nodes accordingly. - if (TLI.isFPExtFree(VT)) { - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); - } - - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::FMUL) - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (N000.getOpcode() == ISD::FMUL) { - return DAG.getNode(ISD::FMA, dl, VT, - DAG.getNode(ISD::FNEG, dl, VT, - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), - VT, N000.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, - N000.getOperand(1)), - DAG.getNode(ISD::FNEG, dl, VT, N1)); - } - } - } - } + SDValue Fused = visitFSUBForFMACombine(N); + if (Fused) { + AddToWorklist(Fused.getNode()); + return Fused; } return SDValue(); @@ -7420,15 +7907,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // Canonicalize vector constant to RHS. - if (N0.getOpcode() == ISD::BUILD_VECTOR && - N1.getOpcode() != ISD::BUILD_VECTOR) - if (auto *BV0 = dyn_cast(N0)) - if (BV0->isConstant()) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 @@ -7436,7 +7916,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS - if (N0CFP && !N1CFP) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 1.0) -> A @@ -7453,14 +7934,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); auto *BV1 = dyn_cast(N1); + auto *BV00 = dyn_cast(N00); auto *BV01 = dyn_cast(N01); - if ((N1CFP && isConstOrConstSplatFP(N01)) || - (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { - SDLoc SL(N); - SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); - return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts); + } } } @@ -7470,9 +7960,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // inserted during lowering. if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { SDLoc SL(N); - const SDValue Two = DAG.getConstantFP(2.0, VT); + const SDValue Two = DAG.getConstantFP(2.0, SL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); } } @@ -7570,14 +8060,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(1.0, VT))); + N1, DAG.getConstantFP(1.0, dl, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, - N1, DAG.getConstantFP(-1.0, VT))); + N1, DAG.getConstantFP(-1.0, dl, VT))); return SDValue(); @@ -7593,10 +8083,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) @@ -7618,8 +8107,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT)); } // If this FDIV is part of a reciprocal square root, it may be folded @@ -7707,8 +8196,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } if (TLI.combineRepeatedFPDivisors(Users.size())) { - SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 - SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // floating point 1.0 + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); // Dividend / Divisor -> Dividend * Reciprocal for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { @@ -7745,20 +8235,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { EVT VT = RV.getValueType(); - RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); AddToWorklist(RV.getNode()); // Unfortunately, RV is now NaN if the input was exactly 0. // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue ZeroCmp = - DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT), N->getOperand(0), Zero, ISD::SETEQ); AddToWorklist(ZeroCmp.getNode()); AddToWorklist(RV.getNode()); RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, - SDLoc(N), VT, ZeroCmp, Zero, RV); + DL, VT, ZeroCmp, Zero, RV); return RV; } } @@ -7817,12 +8308,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7844,11 +8334,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -7857,11 +8348,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -7870,12 +8362,11 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantSDNode *N0C = dyn_cast(N0); EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && + if (isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -7897,11 +8388,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } } @@ -7999,9 +8491,11 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // single-step fp_round we want to fold to. // In other words, double rounding isn't the same as rounding. // Also, this is a value preserving truncation iff both fp_round's are. - if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), - DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc)); + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) @@ -8024,8 +8518,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); + SDLoc DL(N); + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); } return SDValue(); @@ -8033,7 +8528,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. @@ -8042,9 +8536,14 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. if (N0.getOpcode() == ISD::FP_ROUND @@ -8068,7 +8567,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -8078,11 +8578,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); @@ -8090,11 +8589,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); @@ -8102,11 +8600,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { SDValue DAGCombiner::visitFFLOOR(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); @@ -8117,14 +8614,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // Constant fold FNEG. - if (isa(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) @@ -8148,8 +8640,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // For a scalar, just generate 0x80... SignMask = APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } @@ -8182,7 +8675,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -8203,7 +8696,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { if (N0CFP && N1CFP) { const APFloat &C0 = N0CFP->getValueAPF(); const APFloat &C1 = N1CFP->getValueAPF(); - return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); } if (N0CFP) { @@ -8219,13 +8712,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (VT.isVector()) { - SDValue FoldedVOp = SimplifyVUnaryOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; - } - // fold (fabs c1) -> fabs(c1) - if (isa(N0)) + if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) @@ -8255,8 +8743,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // For a scalar, just generate 0x7f... SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); } - Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(SignMask, IntVT)); + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } @@ -8326,13 +8815,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDLoc DL(N); SDValue SetCC = - DAG.getSetCC(SDLoc(N), + DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -8448,11 +8938,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; - VT = Use->getValueType(0); + VT = LD->getMemoryVT(); } else if (StoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; - VT = ST->getValue().getValueType(); + VT = ST->getMemoryVT(); } else return false; @@ -8686,12 +9176,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; else CNV = CNV - Offset1; + SDLoc DL(OtherUses[i]); + // We can now generate the new expression. - SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - SDLoc(OtherUses[i]), + DL, OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); deleteAndRecombine(OtherUses[i]); @@ -8840,7 +9332,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { "Cannot split out indexing using opaque target constants"); if (Inc.getOpcode() == ISD::TargetConstant) { ConstantSDNode *ConstInc = cast(Inc); - Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), ConstInc->getValueType(0)); } @@ -8941,7 +9433,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), Align, LD->getAAInfo()); - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } @@ -9106,9 +9599,6 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - LoadedSlice(const LoadedSlice &LS) - : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. @@ -9235,8 +9725,9 @@ struct LoadedSlice { if (Offset) { // BaseAddr = BaseAddr + Offset. EVT ArithType = BaseAddr.getValueType(); - BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, - DAG->getConstant(Offset, ArithType)); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); } // Create the type of the loaded slice according to its size. @@ -9642,10 +10133,12 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. - if (ByteShift) - IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, DC->getShiftAmountTy(IVal.getValueType()))); + } // Figure out the offset for the store and the alignment of the access. unsigned StOffset; @@ -9658,8 +10151,9 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), - Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } @@ -9773,7 +10267,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, - DAG.getConstant(PtrOff, Ptr.getValueType())); + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), @@ -9781,7 +10276,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->isInvariant(), NewAlign, LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, - DAG.getConstant(NewImm, NewVT)); + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), @@ -9855,6 +10351,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } +namespace { /// Helper struct to parse and store a memory address as base + index + offset. /// We ignore sign extensions when it is safe to do so. /// The following two expressions are not equivalent. To differentiate we need @@ -9942,6 +10439,7 @@ struct BaseIndexOffset { return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); } }; +} // namespace bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl &StoreNodes, EVT MemVT, @@ -9949,24 +10447,24 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // Make sure we have something to merge. if (NumElem < 2) return false; - + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned EarliestNodeUsed = 0; - + unsigned LatestNodeUsed = 0; + for (unsigned i=0; i < NumElem; ++i) { // Find a chain for the new wide-store operand. Notice that some // of the store nodes that we found may not be selected for inclusion // in the wide store. The chain we use needs to be the chain of the - // earliest store node which is *used* and replaced by the wide store. - if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } - - // The earliest Node in the DAG. - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; SDLoc DL(StoreNodes[0].MemNode); - + SDValue StoredVal; if (UseVector) { // Find a legal type for the vector store. @@ -9978,7 +10476,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // can be materialized without a load. // It may be beneficial to loosen this restriction to allow non-zero // store merging. - StoredVal = DAG.getConstant(0, Ty); + StoredVal = DAG.getConstant(0, DL, Ty); } else { SmallVector Ops; for (unsigned i = 0; i < NumElem ; ++i) { @@ -9989,7 +10487,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( return false; Ops.push_back(Val); } - + // Build the extracted vector elements back into a vector. StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); } @@ -10000,7 +10498,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( unsigned StoreBW = NumElem * ElementSizeBytes * 8; APInt StoreInt(StoreBW, 0); - + // Construct a single integer constant which is made of the smaller // constant inputs. bool IsLE = TLI.isLittleEndian(); @@ -10017,23 +10515,23 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( llvm_unreachable("Invalid constant element type"); } } - + // Create the new Load and Store operations. EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - StoredVal = DAG.getConstant(StoreInt, StoreTy); + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } - - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, + + SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); - - // Replace the first store with the new store - CombineTo(EarliestOp, NewStore); + + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); // ReplaceAllUsesWith will replace all uses that existed when it was @@ -10050,11 +10548,14 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); deleteAndRecombine(St); } - + return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( @@ -10071,7 +10572,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { bool IsConstantSrc = isa(StoredVal) || isa(StoredVal); bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) return false; @@ -10281,7 +10782,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // consecutive loads). if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; - + // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); if (TLI.isTypeLegal(Ty)) @@ -10407,18 +10908,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (NumElem < 2) return false; - // The earliest Node in the DAG. - unsigned EarliestNodeUsed = 0; - LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; for (unsigned i=1; i StoreNodes[EarliestNodeUsed].SequenceNum) - EarliestNodeUsed = i; + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; } + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + // Find if it is better to use vectors or integers to load and store // to memory. EVT JointMemOpVT; @@ -10440,7 +10942,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { false, false, false, FirstLoad->getAlignment()); - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, + SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, FirstInChain->getAlignment()); @@ -10458,12 +10960,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); } - // Replace the first store with the new store. - CombineTo(EarliestOp, NewStore); + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); // Erase all other stores. for (unsigned i = 0; i < NumElem ; ++i) { // Remove all Store nodes. - if (StoreNodes[i].MemNode == EarliestOp) + if (StoreNodes[i].MemNode == LatestOp) continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); @@ -10518,8 +11020,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue(), MVT::i32); + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10528,8 +11032,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - getZExtValue(), MVT::i64); + getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, SDLoc(N), Tmp, Ptr, ST->getMemOperand()); } @@ -10540,8 +11045,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); - SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); @@ -10549,18 +11054,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { bool isNonTemporal = ST->isNonTemporal(); AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc DL(N); + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, Alignment, AAInfo); - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -10572,11 +11079,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, SDLoc(N), Value, + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } } } @@ -10797,24 +11308,26 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDValue Offset; EVT PtrType = NewPtr.getValueType(); MachinePointerInfo MPI; + SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; if (TLI.isBigEndian()) PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); + Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { Offset = DAG.getNode( - ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, - DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + ISD::MUL, DL, EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), DL, EltNo.getValueType())); if (TLI.isBigEndian()) Offset = DAG.getNode( - ISD::SUB, SDLoc(EVE), EltNo.getValueType(), - DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + ISD::SUB, DL, EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), DL, EltNo.getValueType()), + Offset); MPI = OriginalLoad->getPointerInfo(); } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); // The replacement we need to do here is a little tricky: we need to // replace an extractelement of a load with a load. @@ -10924,8 +11437,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LegalOperations) { EVT IndexTy = TLI.getVectorIdxTy(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, - SVInVec, DAG.getConstant(OrigElt, IndexTy)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, + DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); } } @@ -11014,7 +11527,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; - EltNo = DAG.getConstant(Elt, EltNo.getValueType()); + EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); } } @@ -11104,7 +11617,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): - DAG.getConstant(0, SourceType); + DAG.getConstant(0, SDLoc(N), SourceType); unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); SmallVector Ops(NewBVElems, Filler); @@ -11197,7 +11710,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { // Just because the floating-point vector type is legal does not necessarily // mean that the corresponding integer vector type is. if (!isTypeLegal(NVT)) - return SDValue(); + return SDValue(); SmallVector Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -11223,12 +11736,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - SDValue V = reduceBuildVecExtToExtBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; - V = reduceBuildVecConvertToConvertBuildVec(N); - if (V.getNode()) + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT @@ -11349,26 +11860,27 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. // We don't support having two input vectors that are too large. - if (VecIn2.getNode()) + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) return SDValue(); if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); - + // Try to replace VecIn1 with two extract_subvectors // No need to update the masks, they should still be correct. - VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, - DAG.getConstant(0, TLI.getVectorIdxTy())); - UsesZeroVector = false; + DAG.getConstant(0, dl, TLI.getVectorIdxTy())); } else return SDValue(); } if (UsesZeroVector) - VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : - DAG.getConstantFP(0.0, VT); + VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : + DAG.getConstantFP(0.0, dl, VT); else // If VecIn2 is unused then change it to undef. VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); @@ -11389,6 +11901,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { return SDValue(); } +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector @@ -11404,9 +11978,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); - // Optimize concat_vectors where one of the vectors is undef. - if (N->getNumOperands() == 2 && - N->getOperand(1)->getOpcode() == ISD::UNDEF) { + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -11414,6 +11989,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (In->getOpcode() == ISD::BITCAST && !In->getOperand(0)->getValueType(0).isVector()) { SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + EVT SclTy = Scalar->getValueType(0); if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) @@ -11444,28 +12028,30 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { EVT SVT = VT.getScalarType(); EVT MinVT = SVT; - if (!SVT.isFloatingPoint()) + if (!SVT.isFloatingPoint()) { // If BUILD_VECTOR are from built from integer, they may have different - // operand types. Get the smaller type and truncate all operands to it. + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; for (const SDValue &Op : N->ops()) if (ISD::BUILD_VECTOR == Op.getOpcode()) { EVT OpSVT = Op.getOperand(0)->getValueType(0); - MinVT = MinVT.bitsLE(OpSVT) ? MinVT : OpSVT; + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; } + assert(FoundMinVT && "Concat vector type mismatch"); + } for (const SDValue &Op : N->ops()) { EVT OpVT = Op.getValueType(); unsigned NumElts = OpVT.getVectorNumElements(); if (ISD::UNDEF == Op.getOpcode()) - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(DAG.getUNDEF(MinVT)); + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); if (ISD::BUILD_VECTOR == Op.getOpcode()) { if (SVT.isFloatingPoint()) { assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); - for (unsigned i = 0; i != NumElts; ++i) - Opnds.push_back(Op.getOperand(i)); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); } else { for (unsigned i = 0; i != NumElts; ++i) Opnds.push_back( @@ -11479,6 +12065,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -11540,7 +12130,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); - unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); + unsigned Idx = N->getConstantOperandVal(1); unsigned NumElems = NVT.getVectorNumElements(); assert((Idx % NumElems) == 0 && "IDX in concat is not a multiple of the result vector length."); @@ -11843,7 +12433,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); return NewBV; } } @@ -11865,6 +12455,118 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + SmallVector Ops; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M % NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + break; + } + } + Ops.push_back(Op); + } + if (Ops.size() == VT.getVectorNumElements()) { + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + } + } + + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef Mask, int Scale) { + if (Scale == 1) + return SmallVector(Mask.begin(), Mask.end()); + + SmallVector NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast(BC0); + SmallVector InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) @@ -11974,20 +12676,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Avoid introducing shuffles with illegal mask. if (!TLI.isShuffleMaskLegal(Mask, VT)) { - // Compute the commuted shuffle mask and test again. - for (unsigned i = 0; i != NumElts; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElts) - Mask[i] = idx + NumElts; - else - Mask[i] = idx - NumElts; - } + ShuffleVectorSDNode::commuteMask(Mask); if (!TLI.isShuffleMaskLegal(Mask, VT)) return SDValue(); - + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) @@ -12003,6 +12696,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N2 = N->getOperand(2); @@ -12030,50 +12751,67 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() == ISD::BUILD_VECTOR) { - SmallVector Indices; - unsigned NumElts = RHS.getNumOperands(); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Elt = RHS.getOperand(i); - if (!isa(Elt)) - return SDValue(); + SDLoc dl(N); - if (cast(Elt)->isAllOnesValue()) - Indices.push_back(i); - else if (cast(Elt)->isNullValue()) - Indices.push_back(NumElts+i); - else - return SDValue(); - } + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); + + if (N->getOpcode() != ISD::AND) + return SDValue(); + + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); - // Let's see if the target supports this vector_shuffle and make sure - // we're not running after operation legalization where it may have - // custom lowered the vector shuffles. - EVT RVT = RHS.getValueType(); - if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT)) + if (RHS.getOpcode() == ISD::BUILD_VECTOR) { + SmallVector Indices; + unsigned NumElts = RHS.getNumOperands(); + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Elt = RHS.getOperand(i); + if (!isa(Elt)) return SDValue(); - // Return the new VECTOR_SHUFFLE node. - EVT EltVT = RVT.getVectorElementType(); - SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); - LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); - SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); + if (cast(Elt)->isAllOnesValue()) + Indices.push_back(i); + else if (cast(Elt)->isNullValue()) + Indices.push_back(NumElts+i); + else + return SDValue(); } + + // Let's see if the target supports this vector_shuffle. + EVT RVT = RHS.getValueType(); + if (!TLI.isVectorClearMaskLegal(Indices, RVT)) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + EVT EltVT = RVT.getVectorElementType(); + SmallVector ZeroOps(RVT.getVectorNumElements(), + DAG.getConstant(0, dl, EltVT)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); + SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } return SDValue(); @@ -12086,8 +12824,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - SDValue Shuffle = XformToShuffleWithZero(N); - if (Shuffle.getNode()) return Shuffle; + + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold // this operation. @@ -12165,38 +12904,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// Visit a binary vector operation, like FABS/FNEG. -SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVUnaryOp only works on vectors!"); - - SDValue N0 = N->getOperand(0); - - if (N0.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); - - // Operand is a BUILD_VECTOR node, see if we can constant fold it. - SmallVector Ops; - for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { - SDValue Op = N0.getOperand(i); - if (Op.getOpcode() != ISD::UNDEF && - Op.getOpcode() != ISD::ConstantFP) - break; - EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); - if (FoldOp.getOpcode() != ISD::UNDEF && - FoldOp.getOpcode() != ISD::ConstantFP) - break; - Ops.push_back(FoldOp); - AddToWorklist(FoldOp.getNode()); - } - - if (Ops.size() != N0.getNumOperands()) - return SDValue(); - - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); -} - SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -12233,6 +12940,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *NegZero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = dyn_cast(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = dyn_cast(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (NegZero && NegZero->isNegative() && NegZero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } // Cannot simplify select with vector condition if (TheSelect->getOperand(0).getValueType().isVector()) return false; @@ -12254,6 +12993,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. LLD->isVolatile() || RLD->isVolatile() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || // If this is an EXTLOAD, the VT's must match. LLD->getMemoryVT() != RLD->getMemoryVT() || // If this is an EXTLOAD, the kind of extension must match. @@ -12421,9 +13163,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Get the offsets to the 0 and 1 element of the array so that we can // select between them. - SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Zero = DAG.getIntPtrConstant(0, DL); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); - SDValue One = DAG.getIntPtrConstant(EltSize); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), @@ -12438,7 +13180,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); - } } @@ -12452,10 +13193,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. - if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); - ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, + ShCtV = XType.getSizeInBits() - ShCtV - 1; + SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); @@ -12471,7 +13212,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, + SDLoc(N0), getShiftAmountTy(N0.getValueType()))); AddToWorklist(Shift.getNode()); @@ -12500,14 +13242,14 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); @@ -12556,7 +13298,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // shl setcc result by log2 n2c return DAG.getNode( ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), + DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), getShiftAmountTy(Temp.getValueType()))); } } @@ -12581,24 +13323,28 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), + SDLoc(Ctlz), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), - XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); + SDLoc DL(N0); + SDValue NegN0 = DAG.getNode(ISD::SUB, DL, + XType, DAG.getConstant(0, DL, XType), N0); + SDValue NotN0 = DAG.getNOT(DL, N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + SDLoc DL(N0); + SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); + return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, + XType)); } } @@ -12621,11 +13367,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, + SDLoc DL(N0); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, + DAG.getConstant(XType.getSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), + SDValue Add = DAG.getNode(ISD::ADD, DL, XType, N0, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -12726,7 +13473,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { // does not require additional intermediate precision] EVT VT = Op.getValueType(); SDLoc DL(Op); - SDValue FPOne = DAG.getConstantFP(1.0, VT); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); AddToWorklist(Est.getNode()); @@ -12761,7 +13508,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. @@ -12797,8 +13544,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); - SDValue MinusThree = DAG.getConstantFP(-3.0, VT); - SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) {