X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=242fffd180284b8978a5ae1fa477ace21b6e3ca8;hb=3c87285af63bc6d9b93f8eefe46ce9fea063d66c;hp=b38b479cbe8dc024582d78fb55d4a9aecadb0ac9;hpb=559742c0eae0a7368eb5f67cde7a3599eaf82ade;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b38b479cbe8..242fffd1802 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9,22 +9,6 @@ // // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run // both before and after the DAG is legalized. -// -// FIXME: Missing folds -// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into -// a sequence of multiplies, shifts, and adds. This should be controlled by -// some kind of hint from the target that int div is expensive. -// various folds of mulh[s,u] by constants such as -1, powers of 2, etc. -// -// FIXME: select C, pow2, pow2 -> something smart -// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z) -// FIXME: Dead stores -> nuke -// FIXME: shr X, (and Y,31) -> shr X, Y (TRICKY!) -// FIXME: mul (x, const) -> shifts + adds -// FIXME: undef values -// FIXME: divide by zero is currently left unfolded. do we want to turn this -// into an undef? -// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false // //===----------------------------------------------------------------------===// @@ -101,6 +85,10 @@ namespace { WorkList.end()); } + /// visit - call the node-specific routine that knows how to fold each + /// particular type of node. + SDOperand visit(SDNode *N); + public: /// AddToWorkList - Add to the work list making sure it's instance is at the /// the back (next to be processed.) @@ -152,10 +140,10 @@ namespace { /// SimplifyDemandedBits - Check the specified integer node value to see if /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. - bool SimplifyDemandedBits(SDOperand Op) { - TargetLowering::TargetLoweringOpt TLO(DAG); + bool SimplifyDemandedBits(SDOperand Op, uint64_t Demanded = ~0ULL) { + TargetLowering::TargetLoweringOpt TLO(DAG, AfterLegalize); uint64_t KnownZero, KnownOne; - uint64_t Demanded = MVT::getIntVTBitMask(Op.getValueType()); + Demanded &= MVT::getIntVTBitMask(Op.getValueType()); if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -169,7 +157,7 @@ namespace { DOUT << '\n'; std::vector NowDead; - DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, NowDead); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &NowDead); // Push the new node and any (possibly new) users onto the worklist. AddToWorkList(TLO.New.Val); @@ -201,9 +189,10 @@ namespace { bool CombineToPostIndexedLoadStore(SDNode *N); - /// visit - call the node-specific routine that knows how to fold each - /// particular type of node. - SDOperand visit(SDNode *N); + /// combine - call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDOperand combine(SDNode *N); // Visitation implementation - Implement dag node combining for different // node types. The semantics are as follows: @@ -224,6 +213,10 @@ namespace { SDOperand visitUREM(SDNode *N); SDOperand visitMULHU(SDNode *N); SDOperand visitMULHS(SDNode *N); + SDOperand visitSMUL_LOHI(SDNode *N); + SDOperand visitUMUL_LOHI(SDNode *N); + SDOperand visitSDIVREM(SDNode *N); + SDOperand visitUDIVREM(SDNode *N); SDOperand visitAND(SDNode *N); SDOperand visitOR(SDNode *N); SDOperand visitXOR(SDNode *N); @@ -263,6 +256,7 @@ namespace { SDOperand visitLOAD(SDNode *N); SDOperand visitSTORE(SDNode *N); SDOperand visitINSERT_VECTOR_ELT(SDNode *N); + SDOperand visitEXTRACT_VECTOR_ELT(SDNode *N); SDOperand visitBUILD_VECTOR(SDNode *N); SDOperand visitCONCAT_VECTORS(SDNode *N); SDOperand visitVECTOR_SHUFFLE(SDNode *N); @@ -270,6 +264,8 @@ namespace { SDOperand XformToShuffleWithZero(SDNode *N); SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS); + SDOperand visitShiftByConstant(SDNode *N, unsigned Amt); + bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS); SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N); SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2); @@ -278,12 +274,15 @@ namespace { bool NotExtCompare = false); SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1, ISD::CondCode Cond, bool foldBooleans = true); + bool SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDOperand ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT::ValueType); SDOperand BuildSDIV(SDNode *N); SDOperand BuildUDIV(SDNode *N); SDNode *MatchRotate(SDOperand LHS, SDOperand RHS); SDOperand ReduceLoadWidth(SDNode *N); + SDOperand GetDemandedBits(SDOperand V, uint64_t Mask); + /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDOperand OriginalChain, @@ -351,6 +350,10 @@ CombineTo(SDNode *N, SDOperand Res0, SDOperand Res1) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDOperand Op, unsigned Depth = 0) { + // No compile time optimizations on this type. + if (Op.getValueType() == MVT::ppcf128) + return 0; + // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; @@ -410,9 +413,11 @@ static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); switch (Op.getOpcode()) { default: assert(0 && "Unknown code"); - case ISD::ConstantFP: - return DAG.getConstantFP(-cast(Op)->getValue(), - Op.getValueType()); + case ISD::ConstantFP: { + APFloat V = cast(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, Op.getValueType()); + } case ISD::FADD: // FIXME: determine better conditions for this xform. assert(UnsafeFPMath); @@ -432,7 +437,7 @@ static SDOperand GetNegatedExpression(SDOperand Op, SelectionDAG &DAG, // -(0-B) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) - if (N0CFP->getValue() == 0.0) + if (N0CFP->getValueAPF().isZero()) return Op.getOperand(1); // -(A-B) -> B-A @@ -552,10 +557,6 @@ void DAGCombiner::Run(bool RunningAfterLegalize) { // done. Set it to null to avoid confusion. DAG.setRoot(SDOperand()); - /// DagCombineInfo - Expose the DAG combiner to the target combiner impls. - TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, !RunningAfterLegalize, false, this); - // while the worklist isn't empty, inspect the node on the end of it and // try and combine it. while (!WorkList.empty()) { @@ -573,16 +574,7 @@ void DAGCombiner::Run(bool RunningAfterLegalize) { continue; } - SDOperand RV = visit(N); - - // If nothing happened, try a target-specific DAG combine. - if (RV.Val == 0) { - assert(N->getOpcode() != ISD::DELETED_NODE && - "Node was deleted but visit returned NULL!"); - if (N->getOpcode() >= ISD::BUILTIN_OP_END || - TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) - RV = TLI.PerformDAGCombine(N, DagCombineInfo); - } + SDOperand RV = combine(N); if (RV.Val) { ++NodesCombined; @@ -642,6 +634,10 @@ SDOperand DAGCombiner::visit(SDNode *N) { case ISD::UREM: return visitUREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SDIVREM: return visitSDIVREM(N); + case ISD::UDIVREM: return visitUDIVREM(N); case ISD::AND: return visitAND(N); case ISD::OR: return visitOR(N); case ISD::XOR: return visitXOR(N); @@ -680,6 +676,7 @@ SDOperand DAGCombiner::visit(SDNode *N) { case ISD::LOAD: return visitLOAD(N); case ISD::STORE: return visitSTORE(N); case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); @@ -687,6 +684,29 @@ SDOperand DAGCombiner::visit(SDNode *N) { return SDOperand(); } +SDOperand DAGCombiner::combine(SDNode *N) { + + SDOperand RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (RV.Val == 0) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, !AfterLegalize, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + return RV; +} + /// getInputChainForNode - Given a node, return its input chain if it has one, /// otherwise return a null sd operand. static SDOperand getInputChainForNode(SDNode *N) { @@ -1272,15 +1292,17 @@ SDOperand DAGCombiner::visitSREM(SDNode *N) { DAG.MaskedValueIsZero(N0, SignBit)) return DAG.getNode(ISD::UREM, VT, N0, N1); - // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on - // the remainder operation. + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDOperand Div = DAG.getNode(ISD::SDIV, VT, N0, N1); - SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1); - SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); - AddToWorkList(Div.Val); - AddToWorkList(Mul.Val); - return Sub; + SDOperand OptimizedDiv = combine(Div.Val); + if (OptimizedDiv.Val && OptimizedDiv.Val != Div.Val) { + SDOperand Mul = DAG.getNode(ISD::MUL, VT, OptimizedDiv, N1); + SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); + AddToWorkList(Mul.Val); + return Sub; + } } // undef % X -> 0 @@ -1317,15 +1339,17 @@ SDOperand DAGCombiner::visitUREM(SDNode *N) { } } - // Unconditionally lower X%C -> X-X/C*C. This allows the X/C logic to hack on - // the remainder operation. + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDOperand Div = DAG.getNode(ISD::UDIV, VT, N0, N1); - SDOperand Mul = DAG.getNode(ISD::MUL, VT, Div, N1); - SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); - AddToWorkList(Div.Val); - AddToWorkList(Mul.Val); - return Sub; + SDOperand OptimizedDiv = combine(Div.Val); + if (OptimizedDiv.Val && OptimizedDiv.Val != Div.Val) { + SDOperand Mul = DAG.getNode(ISD::MUL, VT, OptimizedDiv, N1); + SDOperand Sub = DAG.getNode(ISD::SUB, VT, N0, Mul); + AddToWorkList(Mul.Val); + return Sub; + } } // undef % X -> 0 @@ -1378,6 +1402,101 @@ SDOperand DAGCombiner::visitMULHU(SDNode *N) { return SDOperand(); } +/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that +/// compute two values. LoOp and HiOp give the opcodes for the two computations +/// that are being performed. Return true if a simplification was made. +/// +bool DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, + unsigned LoOp, unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!AfterLegalize || + TLI.isOperationLegal(LoOp, N->getValueType(0)))) { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), + DAG.getNode(LoOp, N->getValueType(0), + N->op_begin(), + N->getNumOperands())); + return true; + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!AfterLegalize || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), + DAG.getNode(HiOp, N->getValueType(1), + N->op_begin(), + N->getNumOperands())); + return true; + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return false; + + // If the two computed results can be simplified separately, separate them. + bool RetVal = false; + if (LoExists) { + SDOperand Lo = DAG.getNode(LoOp, N->getValueType(0), + N->op_begin(), N->getNumOperands()); + SDOperand LoOpt = combine(Lo.Val); + if (LoOpt.Val && LoOpt != Lo && + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())) { + RetVal = true; + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), LoOpt); + } else + DAG.DeleteNode(Lo.Val); + } + + if (HiExists) { + SDOperand Hi = DAG.getNode(HiOp, N->getValueType(1), + N->op_begin(), N->getNumOperands()); + SDOperand HiOpt = combine(Hi.Val); + if (HiOpt.Val && HiOpt != Hi && + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())) { + RetVal = true; + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), HiOpt); + } else + DAG.DeleteNode(Hi.Val); + } + + return RetVal; +} + +SDOperand DAGCombiner::visitSMUL_LOHI(SDNode *N) { + + if (SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return SDOperand(); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitUMUL_LOHI(SDNode *N) { + + if (SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return SDOperand(); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitSDIVREM(SDNode *N) { + + if (SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM)) + return SDOperand(); + + return SDOperand(); +} + +SDOperand DAGCombiner::visitUDIVREM(SDNode *N) { + + if (SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM)) + return SDOperand(); + + return SDOperand(); +} + /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with /// two operands of the same opcode, try to simplify it. SDOperand DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { @@ -1585,17 +1704,21 @@ SDOperand DAGCombiner::visitAND(SDNode *N) { // For big endian targets, we need to add an offset to the pointer to // load the correct bytes. For little endian systems, we merely need to // read fewer bytes from the same pointer. - unsigned PtrOff = - (MVT::getSizeInBits(LoadedVT) - MVT::getSizeInBits(EVT)) / 8; + unsigned LVTStoreBytes = MVT::getStoreSizeInBits(LoadedVT)/8; + unsigned EVTStoreBytes = MVT::getStoreSizeInBits(EVT)/8; + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + unsigned Alignment = LN0->getAlignment(); SDOperand NewPtr = LN0->getBasePtr(); - if (!TLI.isLittleEndian()) + if (!TLI.isLittleEndian()) { NewPtr = DAG.getNode(ISD::ADD, PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } AddToWorkList(NewPtr.Val); SDOperand Load = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), Alignment); AddToWorkList(N); CombineTo(N0.Val, Load, Load.getValue(1)); return SDOperand(N, 0); // Return N so it doesn't get rechecked! @@ -1939,6 +2062,16 @@ SDOperand DAGCombiner::visitXOR(SDNode *N) { assert(0 && "Unhandled SetCC Equivalent!"); abort(); } + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (N1C && N1C->getValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.Val->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDOperand V = N0.getOperand(0); + V = DAG.getNode(ISD::XOR, V.getValueType(), V, + DAG.getConstant(1, V.getValueType())); + AddToWorkList(V.Val); + return DAG.getNode(ISD::ZERO_EXTEND, VT, V); + } + // fold !(x or y) -> (!x and !y) iff x or y are setcc if (N1C && N1C->getValue() == 1 && VT == MVT::i1 && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { @@ -2000,6 +2133,77 @@ SDOperand DAGCombiner::visitXOR(SDNode *N) { return SDOperand(); } +/// visitShiftByConstant - Handle transforms common to the three shifts, when +/// the shift amount is a constant. +SDOperand DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { + SDNode *LHS = N->getOperand(0).Val; + if (!LHS->hasOneUse()) return SDOperand(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDOperand(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDOperand(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant as well. + ConstantSDNode *BinOpCst = dyn_cast(LHS->getOperand(1)); + if (!BinOpCst) return SDOperand(); + + + // FIXME: disable this for unless the input to the binop is a shift by a + // constant. If it is not a shift, it pessimizes some common cases like: + // + //void foo(int *X, int i) { X[i & 1235] = 1; } + //int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).Val; + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa(BinOpLHSVal->getOperand(1))) + return SDOperand(); + + MVT::ValueType VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified + // by the logical operation, do not perform the transformation. + // The highBitSet boolean indicates the value of the high bit of + // the constant which would cause it to be modified for this + // operation. + if (N->getOpcode() == ISD::SRA) { + uint64_t BinOpRHSSign = BinOpCst->getValue() >> MVT::getSizeInBits(VT)-1; + if ((bool)BinOpRHSSign != HighBitSet) + return SDOperand(); + } + + // Fold the constants, shifting the binop RHS by the shift amount. + SDOperand NewRHS = DAG.getNode(N->getOpcode(), N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + + // Create the new shift. + SDOperand NewShift = DAG.getNode(N->getOpcode(), VT, LHS->getOperand(0), + N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), VT, NewShift, NewRHS); +} + + SDOperand DAGCombiner::visitSHL(SDNode *N) { SDOperand N0 = N->getOperand(0); SDOperand N1 = N->getOperand(1); @@ -2054,7 +2258,8 @@ SDOperand DAGCombiner::visitSHL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) return DAG.getNode(ISD::AND, VT, N0.getOperand(0), DAG.getConstant(~0ULL << N1C->getValue(), VT)); - return SDOperand(); + + return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand(); } SDOperand DAGCombiner::visitSRA(SDNode *N) { @@ -2114,7 +2319,8 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT))) return DAG.getNode(ISD::SRL, VT, N0, N1); - return SDOperand(); + + return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand(); } SDOperand DAGCombiner::visitSRL(SDNode *N) { @@ -2208,7 +2414,7 @@ SDOperand DAGCombiner::visitSRL(SDNode *N) { if (N1C && SimplifyDemandedBits(SDOperand(N, 0))) return SDOperand(N, 0); - return SDOperand(); + return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand(); } SDOperand DAGCombiner::visitCTLZ(SDNode *N) { @@ -2249,6 +2455,7 @@ SDOperand DAGCombiner::visitSELECT(SDNode *N) { ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); MVT::ValueType VT = N->getValueType(0); + MVT::ValueType VT0 = N0.getValueType(); // fold select C, X, X -> X if (N1 == N2) @@ -2262,15 +2469,25 @@ SDOperand DAGCombiner::visitSELECT(SDNode *N) { // fold select C, 1, X -> C | X if (MVT::i1 == VT && N1C && N1C->getValue() == 1) return DAG.getNode(ISD::OR, VT, N0, N2); + // fold select C, 0, 1 -> ~C + if (MVT::isInteger(VT) && MVT::isInteger(VT0) && + N1C && N2C && N1C->isNullValue() && N2C->getValue() == 1) { + SDOperand XORNode = DAG.getNode(ISD::XOR, VT0, N0, DAG.getConstant(1, VT0)); + if (VT == VT0) + return XORNode; + AddToWorkList(XORNode.Val); + if (MVT::getSizeInBits(VT) > MVT::getSizeInBits(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, VT, XORNode); + } // fold select C, 0, X -> ~C & X - // FIXME: this should check for C type == X type, not i1? - if (MVT::i1 == VT && N1C && N1C->isNullValue()) { + if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT)); AddToWorkList(XORNode.Val); return DAG.getNode(ISD::AND, VT, XORNode, N2); } // fold select C, X, 1 -> ~C | X - if (MVT::i1 == VT && N2C && N2C->getValue() == 1) { + if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getValue() == 1) { SDOperand XORNode = DAG.getNode(ISD::XOR, VT, N0, DAG.getConstant(1, VT)); AddToWorkList(XORNode.Val); return DAG.getNode(ISD::OR, VT, XORNode, N1); @@ -2346,6 +2563,74 @@ SDOperand DAGCombiner::visitSETCC(SDNode *N) { cast(N->getOperand(2))->get()); } +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z}ext (load x)) -> ({s|z}ext (truncate ({s|z}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDOperand N0, + unsigned ExtOpc, + SmallVector &ExtendNodes, + TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.Val->use_begin(), UE = N0.Val->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDOperand UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDOperand UseOp = User->getOperand(i); + if (UseOp == N0) { + // If truncate from extended type to original load type is free + // on this target, then it's ok to extend a CopyToReg. + if (isTruncFree && User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + else + return false; + } + } + } + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDOperand UseOp = User->getOperand(i); + if (UseOp.Val == N && UseOp.ResNo == 0) { + BothLiveOut = true; + break; + } + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // good a reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDOperand N0 = N->getOperand(0); MVT::ValueType VT = N->getValueType(0); @@ -2409,19 +2694,40 @@ SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + if (ISD::isNON_EXTLoad(N0.Val) && (!AfterLegalize||TLI.isLoadXLegal(ISD::SEXTLOAD, N0.getValueType()))){ - LoadSDNode *LN0 = cast(N0); - SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), - N0.getValueType(), - LN0->isVolatile(), - LN0->getAlignment()); - CombineTo(N, ExtLoad); - CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDOperand(N, 0); // Return N so it doesn't get rechecked! + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad); + CombineTo(N0.Val, Trunc, ExtLoad.getValue(1)); + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + for (unsigned j = 0; j != 2; ++j) { + SDOperand SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, VT, SOp)); + } + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } } // fold (sext (sextload x)) -> (sext (truncate (sextload x))) @@ -2505,19 +2811,40 @@ SDOperand DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && + if (ISD::isNON_EXTLoad(N0.Val) && (!AfterLegalize||TLI.isLoadXLegal(ISD::ZEXTLOAD, N0.getValueType()))) { - LoadSDNode *LN0 = cast(N0); - SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), - N0.getValueType(), - LN0->isVolatile(), - LN0->getAlignment()); - CombineTo(N, ExtLoad); - CombineTo(N0.Val, DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDOperand(N, 0); // Return N so it doesn't get rechecked! + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), + LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, N0.getValueType(), ExtLoad); + CombineTo(N0.Val, Trunc, ExtLoad.getValue(1)); + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + for (unsigned j = 0; j != 2; ++j) { + SDOperand SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, VT, SOp)); + } + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + return SDOperand(N, 0); // Return N so it doesn't get rechecked! + } } // fold (zext (zextload x)) -> (zext (truncate (zextload x))) @@ -2648,6 +2975,38 @@ SDOperand DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDOperand(); } +/// GetDemandedBits - See if the specified operand can be simplified with the +/// knowledge that only the bits specified by Mask are used. If so, return the +/// simpler operand, otherwise return a null SDOperand. +SDOperand DAGCombiner::GetDemandedBits(SDOperand V, uint64_t Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.Val->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getValue(); + Mask = (Mask << Amt) & MVT::getIntVTBitMask(V.getValueType()); + SDOperand SimplifyLHS = GetDemandedBits(V.getOperand(0), Mask); + if (SimplifyLHS.Val) { + return DAG.getNode(ISD::SRL, V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + } + return SDOperand(); +} + /// ReduceLoadWidth - If the result of a wider load is shifted to right of N /// bits and then truncated to a narrower type and where N is a multiple /// of number of bits of the narrower type, transform it to a narrower load @@ -2697,23 +3056,26 @@ SDOperand DAGCombiner::ReduceLoadWidth(SDNode *N) { MVT::ValueType PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. - if (!TLI.isLittleEndian()) - ShAmt = MVT::getSizeInBits(N0.getValueType()) - ShAmt - EVTBits; + if (!TLI.isLittleEndian()) { + unsigned LVTStoreBits = MVT::getStoreSizeInBits(N0.getValueType()); + unsigned EVTStoreBits = MVT::getStoreSizeInBits(EVT); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDOperand NewPtr = DAG.getNode(ISD::ADD, PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); AddToWorkList(NewPtr.Val); SDOperand Load = (ExtType == ISD::NON_EXTLOAD) ? DAG.getLoad(VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - LN0->isVolatile(), LN0->getAlignment()) + LN0->isVolatile(), NewAlign) : DAG.getExtLoad(ExtType, VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, - LN0->isVolatile(), LN0->getAlignment()); + LN0->isVolatile(), NewAlign); AddToWorkList(N); if (CombineSRL) { - std::vector NowDead; - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), NowDead); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); CombineTo(N->getOperand(0).Val, Load); } else CombineTo(N0.Val, Load, Load.getValue(1)); @@ -2841,6 +3203,13 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. For example "trunc (or (shl x, 8), y)" + // -> trunc y + SDOperand Shorter = GetDemandedBits(N0, MVT::getIntVTBitMask(VT)); + if (Shorter.Val) + return DAG.getNode(ISD::TRUNCATE, VT, Shorter); + // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) return ReduceLoadWidth(N); @@ -2884,9 +3253,8 @@ SDOperand DAGCombiner::visitBIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::BIT_CONVERT, VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) - // If the resultant load doesn't need a higher alignment than the original! - if (ISD::isNON_EXTLoad(N0.Val) && N0.hasOneUse() && - ISD::isUNINDEXEDLoad(N0.Val) && + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.Val) && N0.hasOneUse() && TLI.isOperationLegal(ISD::LOAD, VT)) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getTargetMachine().getTargetData()-> @@ -3031,7 +3399,7 @@ SDOperand DAGCombiner::visitFADD(SDNode *N) { } // fold (fadd c1, c2) -> c1+c2 - if (N0CFP && N1CFP) + if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FADD, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -3066,10 +3434,10 @@ SDOperand DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub c1, c2) -> c1-c2 - if (N0CFP && N1CFP) + if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FSUB, VT, N0, N1); // fold (0-B) -> -B - if (UnsafeFPMath && N0CFP && N0CFP->getValue() == 0.0) { + if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { if (isNegatibleForFree(N1)) return GetNegatedExpression(N1, DAG); return DAG.getNode(ISD::FNEG, VT, N1); @@ -3095,7 +3463,7 @@ SDOperand DAGCombiner::visitFMUL(SDNode *N) { } // fold (fmul c1, c2) -> c1*c2 - if (N0CFP && N1CFP) + if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FMUL, VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) @@ -3141,7 +3509,7 @@ SDOperand DAGCombiner::visitFDIV(SDNode *N) { } // fold (fdiv c1, c2) -> c1/c2 - if (N0CFP && N1CFP) + if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, VT, N0, N1); @@ -3167,7 +3535,7 @@ SDOperand DAGCombiner::visitFREM(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) - if (N0CFP && N1CFP) + if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FREM, VT, N0, N1); return SDOperand(); @@ -3180,18 +3548,14 @@ SDOperand DAGCombiner::visitFCOPYSIGN(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast(N1); MVT::ValueType VT = N->getValueType(0); - if (N0CFP && N1CFP) // Constant fold + if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, VT, N0, N1); if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); // copysign(x, c1) -> fabs(x) iff ispos(c1) // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) - union { - double d; - int64_t i; - } u; - u.d = N1CFP->getValue(); - if (u.i >= 0) + if (!V.isNegative()) return DAG.getNode(ISD::FABS, VT, N0); else return DAG.getNode(ISD::FNEG, VT, DAG.getNode(ISD::FABS, VT, N0)); @@ -3228,7 +3592,7 @@ SDOperand DAGCombiner::visitSINT_TO_FP(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (sint_to_fp c1) -> c1fp - if (N0C) + if (N0C && N0.getValueType() != MVT::ppcf128) return DAG.getNode(ISD::SINT_TO_FP, VT, N0); return SDOperand(); } @@ -3239,7 +3603,7 @@ SDOperand DAGCombiner::visitUINT_TO_FP(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (uint_to_fp c1) -> c1fp - if (N0C) + if (N0C && N0.getValueType() != MVT::ppcf128) return DAG.getNode(ISD::UINT_TO_FP, VT, N0); return SDOperand(); } @@ -3261,7 +3625,7 @@ SDOperand DAGCombiner::visitFP_TO_UINT(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 - if (N0CFP) + if (N0CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FP_TO_UINT, VT, N0); return SDOperand(); } @@ -3272,7 +3636,7 @@ SDOperand DAGCombiner::visitFP_ROUND(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP) + if (N0CFP && N0.getValueType() != MVT::ppcf128) return DAG.getNode(ISD::FP_ROUND, VT, N0); // fold (fp_round (fp_extend x)) -> x @@ -3297,7 +3661,7 @@ SDOperand DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP) { - SDOperand Round = DAG.getConstantFP(N0CFP->getValue(), EVT); + SDOperand Round = DAG.getConstantFP(N0CFP->getValueAPF(), EVT); return DAG.getNode(ISD::FP_EXTEND, VT, Round); } return SDOperand(); @@ -3309,7 +3673,7 @@ SDOperand DAGCombiner::visitFP_EXTEND(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (fp_extend c1fp) -> c1fp - if (N0CFP) + if (N0CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FP_EXTEND, VT, N0); // fold (fpext (load x)) -> (fpext (fpround (extload x))) @@ -3347,7 +3711,7 @@ SDOperand DAGCombiner::visitFABS(SDNode *N) { MVT::ValueType VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) - if (N0CFP) + if (N0CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FABS, VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) @@ -3513,12 +3877,12 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { std::vector NowDead; if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0), - NowDead); + &NowDead); DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2), - NowDead); + &NowDead); } else { DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1), - NowDead); + &NowDead); } // Nodes can end up on the worklist more than once. Make sure we do @@ -3530,7 +3894,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), - NowDead); + &NowDead); removeFromWorkList(Ptr.Val); for (unsigned i = 0, e = NowDead.size(); i != e; ++i) removeFromWorkList(NowDead[i]); @@ -3644,12 +4008,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { std::vector NowDead; if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(0), - NowDead); + &NowDead); DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 1), Result.getValue(2), - NowDead); + &NowDead); } else { DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result.getValue(1), - NowDead); + &NowDead); } // Nodes can end up on the worklist more than once. Make sure we do @@ -3662,7 +4026,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDOperand(Op, 0), Result.getValue(isLoad ? 1 : 0), - NowDead); + &NowDead); removeFromWorkList(Op); for (unsigned i = 0, e = NowDead.size(); i != e; ++i) removeFromWorkList(NowDead[i]); @@ -3725,8 +4089,8 @@ SDOperand DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), BetterChain, Ptr, - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), @@ -3779,9 +4143,14 @@ SDOperand DAGCombiner::visitSTORE(SDNode *N) { SDOperand Tmp; switch (CFP->getValueType(0)) { default: assert(0 && "Unknown FP type"); + case MVT::f80: // We don't do this for these yet. + case MVT::f128: + case MVT::ppcf128: + break; case MVT::f32: if (!AfterLegalize || TLI.isTypeLegal(MVT::i32)) { - Tmp = DAG.getConstant(FloatToBits(CFP->getValue()), MVT::i32); + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + convertToAPInt().getZExtValue(), MVT::i32); return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), ST->getAlignment()); @@ -3789,15 +4158,16 @@ SDOperand DAGCombiner::visitSTORE(SDNode *N) { break; case MVT::f64: if (!AfterLegalize || TLI.isTypeLegal(MVT::i64)) { - Tmp = DAG.getConstant(DoubleToBits(CFP->getValue()), MVT::i64); + Tmp = DAG.getConstant(CFP->getValueAPF().convertToAPInt(). + getZExtValue(), MVT::i64); return DAG.getStore(Chain, Tmp, Ptr, ST->getSrcValue(), ST->getSrcValueOffset(), ST->isVolatile(), ST->getAlignment()); } else if (TLI.isTypeLegal(MVT::i32)) { - // Many FP stores are not make apparent until after legalize, e.g. for + // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. - uint64_t Val = DoubleToBits(CFP->getValue()); + uint64_t Val = CFP->getValueAPF().convertToAPInt().getZExtValue(); SDOperand Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); SDOperand Hi = DAG.getConstant(Val >> 32, MVT::i32); if (!TLI.isLittleEndian()) std::swap(Lo, Hi); @@ -3812,8 +4182,7 @@ SDOperand DAGCombiner::visitSTORE(SDNode *N) { Ptr = DAG.getNode(ISD::ADD, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SVOffset += 4; - if (Alignment > 4) - Alignment = 4; + Alignment = MinAlign(Alignment, 4U); SDOperand St1 = DAG.getStore(Chain, Hi, Ptr, ST->getSrcValue(), SVOffset, isVolatile, Alignment); return DAG.getNode(ISD::TokenFactor, MVT::Other, St0, St1); @@ -3854,6 +4223,37 @@ SDOperand DAGCombiner::visitSTORE(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDOperand(N, 0); + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->getAddressingMode() == ISD::UNINDEXED && + MVT::isInteger(Value.getValueType())) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDOperand Shorter = + GetDemandedBits(Value, MVT::getIntVTBitMask(ST->getStoredVT())); + AddToWorkList(Value.Val); + if (Shorter.Val) + return DAG.getTruncStore(Chain, Shorter, Ptr, ST->getSrcValue(), + ST->getSrcValueOffset(), ST->getStoredVT(), + ST->isVolatile(), ST->getAlignment()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, MVT::getIntVTBitMask(ST->getStoredVT()))) + return SDOperand(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast(Value)) { + if (Chain.Val == Ld && Ld->getBasePtr() == Ptr && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getStoredVT() == Ld->getLoadedVT()) { + // The store is dead, remove it. + return Chain; + } + } + return SDOperand(); } @@ -3876,6 +4276,55 @@ SDOperand DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDOperand(); } +SDOperand DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + SDOperand InVec = N->getOperand(0); + SDOperand EltNo = N->getOperand(1); + + // (vextract (v4f32 s2v (f32 load $addr)), 0) -> (f32 load $addr) + // (vextract (v4i32 bc (v4f32 s2v (f32 load $addr))), 0) -> (i32 load $addr) + if (isa(EltNo)) { + unsigned Elt = cast(EltNo)->getValue(); + bool NewLoad = false; + if (Elt == 0) { + MVT::ValueType VT = InVec.getValueType(); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + MVT::ValueType LVT = EVT; + unsigned NumElts = MVT::getVectorNumElements(VT); + if (InVec.getOpcode() == ISD::BIT_CONVERT) { + MVT::ValueType BCVT = InVec.getOperand(0).getValueType(); + if (!MVT::isVector(BCVT) || + NumElts != MVT::getVectorNumElements(BCVT)) + return SDOperand(); + InVec = InVec.getOperand(0); + EVT = MVT::getVectorElementType(BCVT); + NewLoad = true; + } + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == EVT && + ISD::isNormalLoad(InVec.getOperand(0).Val) && + InVec.getOperand(0).hasOneUse()) { + LoadSDNode *LN0 = cast(InVec.getOperand(0)); + unsigned Align = LN0->getAlignment(); + if (NewLoad) { + // Check the resultant load doesn't need a higher alignment than the + // original load. + unsigned NewAlign = TLI.getTargetMachine().getTargetData()-> + getABITypeAlignment(MVT::getTypeForValueType(LVT)); + if (!TLI.isOperationLegal(ISD::LOAD, LVT) || NewAlign > Align) + return SDOperand(); + Align = NewAlign; + } + + return DAG.getLoad(LVT, LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->isVolatile(), Align); + } + } + } + return SDOperand(); +} + + SDOperand DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); MVT::ValueType VT = N->getValueType(0); @@ -4068,8 +4517,7 @@ SDOperand DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (!Base.Val) return N0; for (unsigned i = 0; i != NumElems; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF && - V->getOperand(i) != Base) { + if (V->getOperand(i) != Base) { AllSame = false; break; } @@ -4200,7 +4648,7 @@ SDOperand DAGCombiner::SimplifyVBinOp(SDNode *N) { if ((RHSOp.getOpcode() == ISD::Constant && cast(RHSOp.Val)->isNullValue()) || (RHSOp.getOpcode() == ISD::ConstantFP && - !cast(RHSOp.Val)->getValue())) + cast(RHSOp.Val)->getValueAPF().isZero())) break; } Ops.push_back(DAG.getNode(N->getOpcode(), EltType, LHSOp, RHSOp)); @@ -4354,7 +4802,7 @@ SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1, // Check to see if we can simplify the select into an fabs node if (ConstantFPSDNode *CFP = dyn_cast(N1)) { // Allow either -0.0 or 0.0 - if (CFP->getValue() == 0.0) { + if (CFP->getValueAPF().isZero()) { // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs if ((CC == ISD::SETGE || CC == ISD::SETGT) && N0 == N2 && N3.getOpcode() == ISD::FNEG && @@ -4599,8 +5047,9 @@ bool DAGCombiner::isAlias(SDOperand Ptr1, int64_t Size1, if (CombinerGlobalAA) { // Use alias analysis information. - int Overlap1 = Size1 + SrcValueOffset1; - int Overlap2 = Size2 + SrcValueOffset2; + int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); + int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; + int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; AliasAnalysis::AliasResult AAResult = AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); if (AAResult == AliasAnalysis::NoAlias)