X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FDAGCombiner.cpp;h=f119023d217b03ea185eb29990623f5f498b1469;hp=10fb334c4c6027fa090553ce86efdd9028488e3d;hb=20a42bb20d43b80e322c95dd99b64a5a4566fe08;hpb=9f51f8f7e70fb7fbaff887224ac4ebec3125ae45 diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 10fb334c4c6..f119023d217 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -924,6 +924,62 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; + + // XXX-disabled: + auto Opcode = Op.getOpcode(); + if (Opcode == ISD::AND || Opcode == ISD::OR) { + auto* Op1 = Op.getOperand(0).getNode(); + auto* Op2 = Op.getOperand(1).getNode(); + auto* Op1C = dyn_cast(Op1); + auto* Op2C = dyn_cast(Op2); + + // and X, 0 + if (Opcode == ISD::AND && !Op1C && Op2C && Op2C->isNullValue()) { + return false; + } + + // or (and X, 0), Y + if (Opcode == ISD::OR) { + if (Op1->getOpcode() == ISD::AND) { + auto* Op11 = Op1->getOperand(0).getNode(); + auto* Op12 = Op1->getOperand(1).getNode(); + auto* Op11C = dyn_cast(Op11); + auto* Op12C = dyn_cast(Op12); + if (!Op11C && Op12C && Op12C->isNullValue()) { + return false; + } + } + if (Op1->getOpcode() == ISD::TRUNCATE) { + // or (trunc (and %0, 0)), Y + auto* Op11 = Op1->getOperand(0).getNode(); + if (Op11->getOpcode() == ISD::AND) { + auto* Op111 = Op11->getOperand(0).getNode(); + auto* Op112 = Op11->getOperand(1).getNode(); + auto* Op111C = dyn_cast(Op111); + auto* Op112C = dyn_cast(Op112); + if (!Op111C && Op112C && Op112C->isNullValue()) { + // or (and X, 0), Y + return false; + } + } + } + } + } + + // trunc (and X, 0) + if (Opcode == ISD::TRUNCATE) { + auto* Op1 = Op.getOperand(0).getNode(); + if (Op1->getOpcode() == ISD::AND) { + auto* Op11 = Op1->getOperand(0).getNode(); + auto* Op12 = Op1->getOperand(1).getNode(); + auto* Op11C = dyn_cast(Op11); + auto* Op12C = dyn_cast(Op12); + if (!Op11C && Op12C && Op12C->isNullValue()) { + return false; + } + } + } + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -3042,6 +3098,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and c1, c2) -> c1&c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast(N1); + + // XXX-disabled: (and x, 0) should not be folded. + // (and (and x, 0), y) shouldn't either. + if (!N0C && N1C && N1C->isNullValue()) { + return SDValue(); + } + if (!N0C) { + if (N0.getOpcode() == ISD::AND) { + auto* N01 = N0.getOperand(1).getNode(); + auto* N01C = dyn_cast(N01); + if (N01C && N01C->isNullValue()) { + return SDValue(); + } + } + } + if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS @@ -3888,10 +3960,10 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; + return false; ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with @@ -6843,9 +6915,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType), + &Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -7244,6 +7320,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { return SDValue(); } +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7310,6 +7392,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7320,6 +7411,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7333,6 +7447,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7361,6 +7482,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); @@ -8722,20 +8867,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +/// copysign(x, fp_extend(y)) -> copysign(x, y) +/// copysign(x, fp_round(y)) -> copysign(x, y) static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { - // copysign(x, fp_extend(y)) -> copysign(x, y) - // copysign(x, fp_round(y)) -> copysign(x, y) - // Do not optimize out type conversion of f128 type yet. - // For some target like x86_64, configuration is changed - // to keep one f128 value in one SSE register, but - // instruction selection cannot handle FCOPYSIGN on - // SSE registers yet. SDValue N1 = N->getOperand(1); - EVT N1VT = N1->getValueType(0); - EVT N1Op0VT = N1->getOperand(0)->getValueType(0); - return (N1.getOpcode() == ISD::FP_EXTEND || - N1.getOpcode() == ISD::FP_ROUND) && - (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + if ((N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND)) { + // Do not optimize out type conversion of f128 type yet. + // For some targets like x86_64, configuration is changed to keep one f128 + // value in one SSE register, but instruction selection cannot handle + // FCOPYSIGN on SSE registers yet. + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + } + return false; } SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {