X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=8ba06698942ed0c33e63bb9d7356fb0a325974de;hb=5b870aff81da0c07413f0241087bb3722954b83d;hp=7f2c91bc5c081023dc0ff75a6cc867366137007a;hpb=4ea480499c40cd7e28bf35cacda33ccbab2aab07;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7f2c91bc5c0..8ba06698942 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -31,13 +31,12 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include -#include using namespace llvm; STATISTIC(NodesCombined , "Number of dag nodes combined"); @@ -56,7 +55,7 @@ namespace { //------------------------------ DAGCombiner ---------------------------------// - class VISIBILITY_HIDDEN DAGCombiner { + class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; CombineLevel Level; @@ -120,7 +119,8 @@ namespace { /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool SimplifyDemandedBits(SDValue Op) { - APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -215,12 +215,12 @@ namespace { SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); - SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); - SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); @@ -238,14 +238,17 @@ namespace { /// overlap. bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const; + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const; + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -253,7 +256,7 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - MVT getShiftAmountTy() { + EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } @@ -276,8 +279,7 @@ public: namespace { /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. -class VISIBILITY_HIDDEN WorkListRemover : - public SelectionDAG::DAGUpdateListener { +class WorkListRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} @@ -497,7 +499,7 @@ static bool isOneUseSetCC(SDValue N) { SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, SDValue N0, SDValue N1) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) { if (isa(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) @@ -539,11 +541,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); - DOUT << " and " << NumTo-1 << " other values\n"; - DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) - assert(N->getValueType(i) == To[i].getValueType() && + DEBUG(dbgs() << "\nReplacing.1 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To, &DeadNodes); @@ -614,9 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(dbgs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -682,9 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(dbgs() << "\nReplacing.3 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + RV.getNode()->dump(&DAG); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -879,7 +888,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if ((CombinerAA || Op.hasOneUse()) && + if (Op.hasOneUse() && std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { // Queue up for processing. TFs.push_back(Op.getNode()); @@ -900,7 +909,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -924,9 +933,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorkListRemover DeadNodes(*this); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -935,7 +949,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { static SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ConstantSDNode *N01C = dyn_cast(N01); @@ -959,7 +973,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1050,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { @@ -1074,6 +1088,26 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (Result.getNode()) return Result; } + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) + if (N1.getOpcode() == ISD::SHL && + N1.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast(N1.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && + N0.getOperand(0).getOpcode() == ISD::SUB) + if (ConstantSDNode *C = + dyn_cast(N0.getOperand(0).getOperand(0))) + if (C->getAPIntValue() == 0) + return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); + return SDValue(); } @@ -1082,7 +1116,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. if (N->hasNUsesOfValue(0, 1)) @@ -1102,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { @@ -1144,7 +1178,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1162,6 +1196,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1C) return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, DAG.getConstant(-N1C->getAPIntValue(), VT)); + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + if (N0C && N0C->isAllOnesValue()) + return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); @@ -1217,7 +1254,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1310,7 +1347,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1397,7 +1434,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1417,7 +1454,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { - MVT ADDVT = N1.getOperand(1).getValueType(); + EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -1449,7 +1486,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1491,7 +1528,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1543,7 +1580,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1564,7 +1601,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1667,19 +1704,28 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { /// two operands of the same opcode, try to simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + // For each of OP in AND/OR/XOR: // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) - if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || - (N0.getOpcode() == ISD::TRUNCATE && - !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + !VT.isVector() && + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1711,7 +1757,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); unsigned BitWidth = VT.getSizeInBits(); // fold vector ops @@ -1819,22 +1865,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1844,19 +1892,20 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1865,48 +1914,71 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - if (N1C && N0.getOpcode() == ISD::LOAD) { - LoadSDNode *LN0 = cast(N0); + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast(N0.getOperand(0)) + : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && - // Do not change the width of a volatile load. - !LN0->isVolatile()) { - MVT EVT = MVT::Other; + LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - EVT = MVT::getIntegerVT(ActiveBits); - - MVT LoadedVT = LN0->getMemoryVT(); - - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { - MVT PtrType = N0.getOperand(1).getValueType(); - - // For big endian targets, we need to add an offset to the pointer to - // load the correct bytes. For little endian systems, we merely need to - // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; - unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - if (TLI.isBigEndian()) { - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); + AddToWorkList(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); + + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (TLI.isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } - AddToWorkList(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), - NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - EVT, LN0->isVolatile(), Alignment); - AddToWorkList(N); - CombineTo(N0.getNode(), Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + AddToWorkList(NewPtr.getNode()); + + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + Alignment); + AddToWorkList(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } } } @@ -1920,7 +1992,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1929,8 +2001,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(~0ULL, VT); + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2060,7 +2134,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; // The target must have at least one rotate flavor. @@ -2221,7 +2295,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -2389,7 +2463,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { !isa(BinOpLHSVal->getOperand(1))) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is a signed shift right, and the high bit is modified by the // logical operation, do not perform the transformation. The highBitSet @@ -2419,8 +2493,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (shl c1, c2) -> c1< (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && @@ -2444,7 +2518,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2477,9 +2551,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); if (c1 < VT.getSizeInBits()) { uint64_t c2 = N1C->getZExtValue(); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1), + VT); SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << c1, VT)); + HiBitsMask); if (c2 > c1) return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, DAG.getConstant(c2-c1, N1.getValueType())); @@ -2489,9 +2567,15 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + HiBitsMask); + } return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); } @@ -2501,7 +2585,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2513,7 +2598,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N0C->isAllOnesValue()) return N0; // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + if (N1C && N1C->getZExtValue() >= OpSizeInBits) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -2521,18 +2606,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { - unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); - MVT EVT = MVT::getIntegerVT(LowBits); - if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, - N0.getOperand(0), DAG.getValueType(EVT)); + N0.getOperand(0), DAG.getValueType(ExtVT)); } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) if (N1C && N0.getOpcode() == ISD::SRA) { if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } @@ -2548,15 +2637,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { const ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. - unsigned VTValSize = VT.getSizeInBits(); - MVT TruncVT = - MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal - // on that type, and the the truncate to that type is both legal and free, + // on that type, and the truncate to that type is both legal and free, // perform the transform. if ((ShiftAmt > 0) && TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && @@ -2580,10 +2668,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -2611,8 +2699,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) @@ -2645,7 +2733,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? - MVT SmallVT = N0.getOperand(0).getValueType(); + EVT SmallVT = N0.getOperand(0).getValueType(); if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); @@ -2666,7 +2754,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()); + APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all @@ -2704,7 +2792,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2723,12 +2811,47 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRL.getNode()) + return NewSRL; + } + + // Here is a common situation. We want to optimize: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // However when after the source operand of SRL is optimized into AND, the SRL + // itself may not be optimized further. Look for it and add the BRCOND into + // the worklist. + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } + + return SDValue(); } SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (isa(N0)) @@ -2738,7 +2861,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (isa(N0)) @@ -2748,7 +2871,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (isa(N0)) @@ -2763,8 +2886,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); - MVT VT = N->getValueType(0); - MVT VT0 = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) @@ -2950,7 +3073,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (sext c1) -> c1 if (isa(N0)) @@ -2975,9 +3098,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. SDValue Op = N0.getOperand(0); - unsigned OpBits = Op.getValueType().getSizeInBits(); - unsigned MidBits = N0.getValueType().getSizeInBits(); - unsigned DestBits = VT.getSizeInBits(); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); unsigned NumSignBits = DAG.ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -3000,9 +3123,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { - if (Op.getValueType().bitsLT(VT)) + if (OpBits < DestBits) Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); - else if (Op.getValueType().bitsGT(VT)) + else if (OpBits > DestBits) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, DAG.getValueType(N0.getValueType())); @@ -3024,7 +3147,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3059,14 +3183,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3094,11 +3219,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + NegOne, DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + DAG.getSetCC(N->getDebugLoc(), + TLI.getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); } @@ -3113,7 +3248,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (zext c1) -> c1 if (isa(N0)) @@ -3137,14 +3272,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && + (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -3182,7 +3321,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3217,14 +3357,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), @@ -3243,12 +3384,34 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SCC.getNode()) return SCC; } + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SHL) { + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); + unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - + N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); + if (ShAmt > KnownZeroBits) + return SDValue(); + } + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, + N0.getOperand(1).getValueType(), + N0.getOperand(1))); + } + return SDValue(); } SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (aext c1) -> c1 if (isa(N0)) @@ -3316,7 +3479,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), ExtLoad); @@ -3353,12 +3517,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->getSrcValueOffset(), MemVT, + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), @@ -3423,31 +3588,32 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = VT; + EVT VT = N->getValueType(0); + EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) return SDValue(); - // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; - EVT = cast(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + ExtVT = cast(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); } - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); - if (N0.getValueType().getSizeInBits() <= EVTBits) + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } } @@ -3455,18 +3621,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (isa(N0) && N0.hasOneUse() && EVT.isRound() && + if (isa(N0) && N0.hasOneUse() && ExtVT.isRound() && cast(N0)->getMemoryVT().getSizeInBits() > EVTBits && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); - MVT PtrType = N0.getOperand(1).getValueType(); + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } @@ -3480,10 +3646,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load = (ExtType == ISD::NON_EXTLOAD) ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - LN0->isVolatile(), NewAlign) + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - EVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3500,17 +3667,17 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - MVT VT = N->getValueType(0); - MVT EVT = cast(N1)->getVT(); - unsigned VTBits = VT.getSizeInBits(); - unsigned EVTBits = EVT.getSizeInBits(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N1)->getVT(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); // fold (sext_in_reg c1) -> c1 if (isa(N0) || N0.getOpcode() == ISD::UNDEF) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -3525,7 +3692,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3549,11 +3716,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *ShAmt = dyn_cast(N0.getOperand(1))) - if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -3570,7 +3737,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3586,7 +3754,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3596,7 +3765,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -3619,7 +3788,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); else // if the source and dest are the same type, we can drop both the extend - // and the truncate + // and the truncate. return N0.getOperand(0); } @@ -3646,15 +3815,14 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { /// CombineConsecutiveLoads - build_pair (load, load) -> load /// if load locations are consecutive. -SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); - MVT LD1VT = LD1->getValueType(0); - const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + EVT LD1VT = LD1->getValueType(0); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && @@ -3662,16 +3830,16 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { // If one is volatile it might be ok, but play conservative and bail out. !LD1->isVolatile() && !LD2->isVolatile() && - TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getSrcValue(), - LD1->getSrcValueOffset(), false, Align); + LD1->getSrcValueOffset(), false, false, Align); } return SDValue(); @@ -3679,7 +3847,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending @@ -3697,7 +3865,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { break; } - MVT DestEltVT = N->getValueType(0).getVectorElementType(); + EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) @@ -3707,7 +3875,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); - if (Res.getNode() != N) return Res; + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -3723,14 +3902,15 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), OrigAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), @@ -3766,7 +3946,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); - MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); @@ -3814,7 +3994,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } @@ -3822,8 +4002,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { - MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); @@ -3845,7 +4025,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - MVT VT = MVT::getVectorVT(DstEltVT, + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); @@ -3858,7 +4038,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // Convert the input float vector to a int vector where the elements are the // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); - MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -3867,7 +4047,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); - MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. @@ -3903,7 +4083,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); } - MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -3912,7 +4092,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // turns into multiple outputs. bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { @@ -3949,7 +4130,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3990,7 +4171,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4024,7 +4205,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4047,7 +4228,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); - // fold (fmul X, (fneg 1.0)) -> (fneg X) + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); @@ -4079,7 +4260,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4112,7 +4293,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP && VT != MVT::ppcf128) @@ -4126,7 +4307,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); @@ -4174,8 +4355,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4196,8 +4377,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4218,7 +4399,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 if (N0CFP) @@ -4230,7 +4411,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 if (N0CFP && VT != MVT::ppcf128) @@ -4243,7 +4424,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP && N0.getValueType() != MVT::ppcf128) @@ -4276,8 +4457,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N->getOperand(1))->getVT(); ConstantFPSDNode *N0CFP = dyn_cast(N0); // fold (fp_round_inreg c1fp) -> c1fp @@ -4292,7 +4473,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && @@ -4325,7 +4506,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), @@ -4339,23 +4521,25 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); if (isNegatibleForFree(N0, LegalOperations)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + if (N0.getOpcode() == ISD::BIT_CONVERT && + !VT.isVector() && + N0.getNode()->hasOneUse() && + N0.getOperand(0).getValueType().isInteger()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), - N->getValueType(0), Int); + VT, Int); } } @@ -4365,7 +4549,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (N0CFP && VT != MVT::ppcf128) @@ -4384,7 +4568,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4401,14 +4585,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N1C = dyn_cast(N1); - // never taken branch, fold to chain - if (N1C && N1C->isNullValue()) - return Chain; - // unconditional branch - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && @@ -4418,6 +4601,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { + // Look pass truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // @@ -4429,7 +4619,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // into // // %a = ... - // %b = and %a, 2 + // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // @@ -4440,9 +4630,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = N1.getOperand(1); if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { - SDValue AndOp0 = Op0.getOperand(0); SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { @@ -4456,16 +4644,76 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } // Replace the uses of SRL with SETCC - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, SetCC, N2); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + SDValue Tmp = visitXOR(TheXor); + if (Tmp.getNode()) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (ConstantSDNode *RHSCI = dyn_cast(Op0)) + if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N1.getValueType(); + if (LegalTypes) + SetCCVT = TLI.getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + } + } return SDValue(); } @@ -4476,22 +4724,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), N->getDebugLoc(), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null(Simp.getNode()); - - // fold br_cc true, dest -> br dest (unconditional branch) - if (SCCC && !SCCC->isNullValue()) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, - N->getOperand(0), N->getOperand(4)); - // fold br_cc false, dest -> unconditional fall through - if (SCCC && SCCC->isNullValue()) - return N->getOperand(0); - // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, @@ -4514,7 +4758,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4602,9 +4846,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(dbgs() << "\nReplacing.4 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4639,7 +4885,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4675,7 +4921,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset) + if (Ptr == Offset && Op->getOpcode() == ISD::ADD) std::swap(BasePtr, Offset); if (Ptr != BasePtr) continue; @@ -4734,9 +4980,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(dbgs() << "\nReplacing.5 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4765,49 +5013,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// InferAlignment - If we can infer some alignment information from this -/// pointer, return it. -static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { - // If this is a direct reference to a stack slot, use information about the - // stack slot's alignment. - int FrameIdx = 1 << 31; - int64_t FrameOffset = 0; - if (FrameIndexSDNode *FI = dyn_cast(Ptr)) { - FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa(Ptr.getOperand(1)) && - isa(Ptr.getOperand(0))) { - FrameIdx = cast(Ptr.getOperand(0))->getIndex(); - FrameOffset = Ptr.getConstantOperandVal(1); - } - - if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. - const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return std::max(Align, FIInfoAlign); - } - } - - return 0; -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); @@ -4815,13 +5020,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), - LD->isVolatile(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align); } } @@ -4838,9 +5043,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); - DOUT << "\n"; + DEBUG(dbgs() << "\nReplacing.6 "; + N->dump(&DAG); + dbgs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + dbgs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4856,9 +5063,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); - DOUT << " and 2 other values\n"; + DEBUG(dbgs() << "\nReplacing.7 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -4899,7 +5108,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -4907,13 +5117,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); } // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -4940,7 +5154,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); - MVT VT = Value.getValueType(); + EVT VT = Value.getValueType(); if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) return SDValue(); @@ -4967,12 +5181,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); - MVT NewVT = MVT::getIntegerVT(NewBW); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); while (NewBW < BitWidth && !(TLI.isOperationLegalOrCustom(Opc, NewVT) && TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); - NewVT = MVT::getIntegerVT(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); @@ -4994,7 +5208,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5003,13 +5217,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), LD->getChain(), NewPtr, LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), NewAlign); + LD->isVolatile(), LD->isNonTemporal(), + NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), NewVal, NewPtr, ST->getSrcValue(), ST->getSrcValueOffset(), - false, NewAlign); + false, false, NewAlign); AddToWorkList(NewPtr.getNode()); AddToWorkList(NewLD.getNode()); @@ -5033,12 +5248,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align); } } @@ -5047,15 +5262,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); - MVT SVT = Value.getOperand(0).getValueType(); + EVT SVT = Value.getOperand(0).getValueType(); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(SVT.getTypeForMVT()); + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign); + ST->getSrcValueOffset(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign); } // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' @@ -5066,7 +5282,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT()) { + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f80: // We don't do this for these yet. case MVT::f128: @@ -5081,7 +5297,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } break; case MVT::f64: @@ -5093,7 +5309,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), - ST->getAlignment()); + ST->isNonTemporal(), ST->getAlignment()); } else if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for @@ -5107,18 +5323,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - isVolatile, ST->getAlignment()); + isVolatile, isNonTemporal, + ST->getAlignment()); Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SVOffset += 4; Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, Ptr, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); + SVOffset, isVolatile, isNonTemporal, + Alignment); return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, St0, St1); } @@ -5134,23 +5353,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If there is a better chain. if (Chain != BetterChain) { - // Replace the chain to avoid dependency. SDValue ReplStore; + + // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), - ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->getMemoryVT(), ST->isVolatile(), + ST->isNonTemporal(), ST->getAlignment()); } else { ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } // Create token to keep both nodes around. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplStore); + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Don't add users to work list. return CombineTo(N, Token, false); } @@ -5175,13 +5399,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, APInt::getLowBitsSet( - Value.getValueSizeInBits(), + Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getSizeInBits()))) return SDValue(N, 0); } @@ -5208,7 +5433,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->getMemoryVT(), - ST->isVolatile(), ST->getAlignment()); + ST->isVolatile(), ST->isNonTemporal(), + ST->getAlignment()); } return ReduceLoadOpStoreWidth(N); @@ -5234,10 +5460,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // BUILD_VECTOR with undef elements and the inserted element. if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && isa(EltNo)) { - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = InVec.getValueType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - SmallVector Ops(NElts, DAG.getUNDEF(EVT)); + SmallVector Ops(NElts, DAG.getUNDEF(EltVT)); unsigned Elt = cast(EltNo)->getZExtValue(); if (Elt < Ops.size()) @@ -5253,12 +5479,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - // If the operand is wider than the vector element type then it is implicitly - // truncated. Make that explicit here. - MVT EltVT = InVec.getValueType().getVectorElementType(); + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); - if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); + EVT NVT = N->getValueType(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + } return InOp; } @@ -5275,18 +5505,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned Elt = cast(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); - MVT LVT = EVT; + EVT VT = InVec.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BIT_CONVERT) { - MVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) BCNumEltsChanged = true; InVec = InVec.getOperand(0); - EVT = BCVT.getVectorElementType(); + ExtVT = BCVT.getVectorElementType(); NewLoad = true; } @@ -5295,7 +5525,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == EVT && + InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { LN0 = cast(InVec.getOperand(0)); } else if ((SVN = dyn_cast(InVec))) { @@ -5329,7 +5559,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -5340,7 +5570,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewPtr = LN0->getBasePtr(); if (Elt) { unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; - MVT PtrType = NewPtr.getValueType(); + EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, @@ -5349,7 +5579,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), Align); } return SDValue(); @@ -5357,8 +5587,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - MVT VT = N->getValueType(0); - MVT EltType = VT.getVectorElementType(); + EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -5455,11 +5684,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); @@ -5517,7 +5745,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -5540,14 +5768,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { } // Let's see if the target supports this vector_shuffle. - MVT RVT = RHS.getValueType(); + EVT RVT = RHS.getValueType(); if (!TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RVT.getVectorElementType(); + EVT EltVT = RVT.getVectorElementType(); SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EVT)); + DAG.getConstant(0, EltVT)); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); @@ -5566,10 +5794,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); - MVT EltType = VT.getVectorElementType(); + EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -5612,7 +5840,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) { - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5676,33 +5904,50 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // If this is an EXTLOAD, the VT's must match. if (LLD->getMemoryVT() == RLD->getMemoryVT()) { - // FIXME: this conflates two src values, discarding one. This is not - // the right thing to do, but nothing uses srcvalues now. When they do, - // turn SrcValue into a list of locations. + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; + if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast(LLDVal->getType())) + LLDAddrSpace = PT->getAddressSpace(); + } + if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { + if (const PointerType *PT = dyn_cast(RLDVal->getType())) + RLDAddrSpace = PT->getAddressSpace(); + } SDValue Addr; - if (TheSelect->getOpcode() == ISD::SELECT) { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) { - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); - } - } else { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) { - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), - TheSelect->getOperand(1), - LLD->getBasePtr(), RLD->getBasePtr(), - TheSelect->getOperand(4)); + if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { + if (TheSelect->getOpcode() == ISD::SELECT) { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && + (!RLD->hasAnyUseOfValue(1) || + !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } + } else { + // Check that the condition doesn't reach either load. If so, folding + // this will induce a cycle into the DAG. + if ((!LLD->hasAnyUseOfValue(1) || + (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && + (!RLD->hasAnyUseOfValue(1) || + (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && + !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } } } @@ -5712,18 +5957,18 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, Load = DAG.getLoad(TheSelect->getValueType(0), TheSelect->getDebugLoc(), LLD->getChain(), - Addr,LLD->getSrcValue(), - LLD->getSrcValueOffset(), + Addr, 0, 0, LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), TheSelect->getDebugLoc(), TheSelect->getValueType(0), - LLD->getChain(), Addr, LLD->getSrcValue(), - LLD->getSrcValueOffset(), + LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); } @@ -5751,7 +5996,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // (x ? y : y) -> y. if (N2 == N3) return N2; - MVT VT = N2.getValueType(); + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); ConstantSDNode *N3C = dyn_cast(N3.getNode()); @@ -5831,7 +6076,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, CstOffset); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, false, - Alignment); + false, Alignment); } } @@ -5843,8 +6088,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 - MVT XType = N0.getValueType(); - MVT AType = N2.getValueType(); + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. @@ -5923,7 +6168,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); @@ -5965,7 +6210,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy())); @@ -5980,7 +6225,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (SubC->isNullValue() && XType.isInteger()) { SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, @@ -5999,7 +6244,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo @@ -6035,11 +6280,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is known not to alias with anything -/// but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; + Base = Ptr; Offset = 0; GV = 0; CV = 0; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -6048,36 +6294,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { Offset += C->getZExtValue(); } } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast(Base)) { + CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() + : (void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } // If it's any of the following then it can't alias with anything but itself. - return isa(Base) || - isa(Base) || - isa(Base); + return isa(Base); } /// isAlias - Return true if there is any possibility that the two addresses /// overlap. bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const { + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); - bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + GlobalValue *GV1, *GV2; + void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); - // If they have a same base address then... - if (Base1 == Base2) - // Check to see if the addresses overlap. + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know both bases then they can't alias. - if (KnownBase1 && KnownBase2) return false; + // If we know what the bases are, and they aren't identical, then we know they + // cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); @@ -6097,18 +6380,22 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const { + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign) const { if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); + SrcValueAlign = LD->getOriginalAlignment(); return true; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); + SrcValueAlign = ST->getOriginalAlignment(); } else { llvm_unreachable("FindAliasInfo expected a memory operand"); } @@ -6121,28 +6408,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVector &Aliases) { SmallVector Chains; // List of chains to visit. - std::set Visited; // Visited node set. + SmallPtrSet Visited; // Visited node set. // Get alias information for node. SDValue Ptr; - int64_t Size = 0; - const Value *SrcValue = 0; - int SrcValueOffset = 0; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + unsigned SrcValueAlign; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign); // Starting off. Chains.push_back(OriginalChain); - + unsigned Depth = 0; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } - // Don't bother if we've been before. - if (Visited.find(Chain.getNode()) != Visited.end()) continue; - Visited.insert(Chain.getNode()); + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; switch (Chain.getOpcode()) { case ISD::EntryToken: @@ -6153,35 +6457,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::STORE: { // Get alias information for Chain. SDValue OpPtr; - int64_t OpSize = 0; - const Value *OpSrcValue = 0; - int OpSrcValueOffset = 0; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + unsigned OpSrcValueAlign; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset); + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign)) { Aliases.push_back(Chain); } else { // Look further up the chain. Chains.push_back(Chain.getOperand(0)); - // Clean up old chain. - AddToWorkList(Chain.getNode()); + ++Depth; } break; } case ISD::TokenFactor: - // We have to check each of the operands of the token factor, so we queue - // then up. Adding the operands to the queue (stack) in reverse order - // maintains the original order and increases the likelihood that getNode - // will find a matching token factor (CSE.) + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); - // Eliminate the token factor if we can. - AddToWorkList(Chain.getNode()); + ++Depth; break; default: @@ -6207,15 +6516,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, - &Aliases[0], Aliases.size()); - - // Make sure the old chain gets cleaned up. - if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); - - return NewChain; + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); } // SelectionDAG::Combine - This is the entry point for the file.