X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=a7744b8f7a4eb27516ebd4f64e5407bfc922b0ed;hb=c06441e5ea55d1e48a85e99ed2615ff4f459b4c2;hp=9170f61f0fe9ce6715fad2475c9ff94f3f37a4c4;hpb=5480c0469e5c0323ffb12f1ead2abd169d6cc0e7;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9170f61f0fe..a7744b8f7a4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -32,32 +32,33 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/DerivedTypes.h" using namespace llvm; -static cl::opt EnablePPCPreinc("enable-ppc-preinc", +static cl::opt EnablePPCPreinc("enable-ppc-preinc", cl::desc("enable preincrement load/store generation on PPC (experimental)"), cl::Hidden); PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { - + setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); - + // Set up the register classes. addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); - + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - + // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); @@ -70,9 +71,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); - // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) - setConvertAction(MVT::ppcf128, MVT::f64, Expand); - setConvertAction(MVT::ppcf128, MVT::f32, Expand); // This is used in the ppcf128->int sequence. Note it has different semantics // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); @@ -92,7 +90,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - + // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); @@ -104,16 +102,16 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); - + // If we're enabling GP optimizations, use hardware square root if (!TM.getSubtarget().hasFSQRT()) { setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); } - + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - + // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i32 , Expand); @@ -121,29 +119,29 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); - + // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - + // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); - + // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit setOperationAction(ISD::SETCC, MVT::i32, Custom); - + // PowerPC does not have BRCOND which requires SetCC setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); - + // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); @@ -162,14 +160,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Support label based line numbers. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - - - // We want to legalize GlobalAddress and ConstantPool nodes into the + + + // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); @@ -179,7 +177,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); - + // RET must be custom lowered, to meet ABI requirements. setOperationAction(ISD::RET , MVT::Other, Custom); @@ -191,24 +189,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - + // VAARG is custom lowered with ELF 32 ABI if (TM.getSubtarget().isELF32_ABI()) setOperationAction(ISD::VAARG, MVT::Other, Custom); else setOperationAction(ISD::VAARG, MVT::Other, Expand); - + // Use the default implementation. setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -222,7 +220,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - + if (TM.getSubtarget().has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); @@ -230,12 +228,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - + // FIXME: disable this lowered code. This generates 64-bit register values, // and we don't model the fact that the top part is clobbered by calls. We // need to flag these together so that the value isn't live across a call. //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); } else { @@ -269,7 +267,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD , VT, Legal); setOperationAction(ISD::SUB , VT, Legal); - + // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); @@ -287,7 +285,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); - + // No other operations are legal. setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); @@ -320,12 +318,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); - + addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); - + setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); @@ -333,16 +331,16 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); - + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - + setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrOneBooleanContent); - + if (TM.getSubtarget().isPPC64()) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); @@ -352,13 +350,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setExceptionPointerRegister(PPC::R3); setExceptionSelectorRegister(PPC::R4); } - + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); setTargetDAGCombine(ISD::BSWAP); - + // Darwin long double math library functions have $LDBL128 appended. if (TM.getSubtarget().isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -456,22 +454,21 @@ static bool isFloatingPointZero(SDValue Op) { /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. -static bool isConstantOrUndef(SDValue Op, unsigned Val) { - return Op.getOpcode() == ISD::UNDEF || - cast(Op)->getZExtValue() == Val; +static bool isConstantOrUndef(int Op, int Val) { + return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else { for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(N->getOperand(i), i*2+1) || - !isConstantOrUndef(N->getOperand(i+8), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) return false; } return true; @@ -479,18 +476,18 @@ bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { if (!isUnary) { for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else { for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || - !isConstantOrUndef(N->getOperand(i+1), i*2+3) || - !isConstantOrUndef(N->getOperand(i+8), i*2+2) || - !isConstantOrUndef(N->getOperand(i+9), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) return false; } return true; @@ -498,27 +495,28 @@ bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { /// isVMerge - Common function, used to match vmrg* shuffles. /// -static bool isVMerge(SDNode *N, unsigned UnitSize, +static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); - + for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit - if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), + if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || - !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), + !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } - return true; + return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 8, 24); return isVMerge(N, UnitSize, 8, 8); @@ -526,7 +524,8 @@ bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, + bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 0, 16); return isVMerge(N, UnitSize, 0, 0); @@ -536,91 +535,90 @@ bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); + assert(N->getValueType(0) == MVT::v16i8 && + "PPC only supports shuffles by bytes!"); + + ShuffleVectorSDNode *SVOp = cast(N); + // Find the first non-undef value in the shuffle mask. unsigned i; - for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) + for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; - + if (i == 16) return -1; // all undef. - - // Otherwise, check to see if the rest of the elements are consequtively + + // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. - unsigned ShiftAmt = cast(N->getOperand(i))->getZExtValue(); + unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; if (!isUnary) { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else { - // Check the rest of the elements to see if they are consequtive. + // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) - if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } - return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. -bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && - N->getNumOperands() == 16 && +bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + assert(N->getValueType(0) == MVT::v16i8 && (EltSize == 1 || EltSize == 2 || EltSize == 4)); - + // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. - unsigned ElementBase = 0; - SDValue Elt = N->getOperand(0); - if (ConstantSDNode *EltV = dyn_cast(Elt)) - ElementBase = EltV->getZExtValue(); - else - return false; // FIXME: Handle UNDEF elements too! - - if (cast(Elt)->getZExtValue() >= 16) - return false; + unsigned ElementBase = N->getMaskElt(0); - // Check that they are consequtive. - for (unsigned i = 1; i != EltSize; ++i) { - if (!isa(N->getOperand(i)) || - cast(N->getOperand(i))->getZExtValue() != i+ElementBase) + // FIXME: Handle UNDEF elements too! + if (ElementBase >= 16) + return false; + + // Check that the indices are consecutive, in the case of a multi-byte element + // splatted with a v16i8 mask. + for (unsigned i = 1; i != EltSize; ++i) + if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; - } - - assert(isa(Elt) && "Invalid VECTOR_SHUFFLE mask!"); + for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - assert(isa(N->getOperand(i)) && - "Invalid VECTOR_SHUFFLE mask!"); + if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) - if (N->getOperand(i+j) != N->getOperand(j)) + if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } - return true; } /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool PPC::isAllNegativeZeroVector(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - if (PPC::isSplatShuffleMask(N, N->getNumOperands())) - if (ConstantFPSDNode *CFP = dyn_cast(N)) + BuildVectorSDNode *BV = cast(N); + + APInt APVal, APUndef; + unsigned BitSize; + bool HasAnyUndefs; + + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (ConstantFPSDNode *CFP = dyn_cast(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); + return false; } /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { - assert(isSplatShuffleMask(N, EltSize)); - return cast(N->getOperand(0))->getZExtValue() / EltSize; + ShuffleVectorSDNode *SVOp = cast(N); + assert(isSplatShuffleMask(SVOp, EltSize)); + return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -639,31 +637,31 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. SDValue UniquedVals[4]; assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); - + // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; // If the element isn't a constant, bail fully out. if (!isa(N->getOperand(i))) return SDValue(); - + if (UniquedVals[i&(Multiple-1)].getNode() == 0) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. } - + // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains // either constant or undef values that are identical for each chunk. See // if these chunks can form into a larger vspltis*. - + // Check to see if all of the leading entries are either 0 or -1. If // neither, then this won't fit into the immediate field. bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. - + LeadingZero &= cast(UniquedVals[i])->isNullValue(); LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); } @@ -682,10 +680,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) return DAG.getTargetConstant(Val, MVT::i32); } - + return SDValue(); } - + // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; @@ -694,31 +692,29 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { else if (OpVal != N->getOperand(i)) return SDValue(); } - + if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. - - unsigned ValSizeInBytes = 0; + + unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; if (ConstantSDNode *CN = dyn_cast(OpVal)) { Value = CN->getZExtValue(); - ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8; } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); Value = FloatToBits(CN->getValueAPF().convertToFloat()); - ValSizeInBytes = 4; } // If the splat value is larger than the element value, then we can never do // this splat. The only case that we could fit the replicated bits into our // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); - + // If the element value is larger than the splat value, cut it in half and // check to see if the two halves are equal. Continue doing this until we // get to ByteSize. This allows us to handle 0x01010101 as 0x01. while (ValSizeInBytes > ByteSize) { ValSizeInBytes >>= 1; - + // If the top half equals the bottom half, we're still ok. if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != (Value & ((1 << (8*ValSizeInBytes))-1))) @@ -728,7 +724,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Properly sign extend the value. int ShAmt = (4-ByteSize)*8; int MaskVal = ((int)Value << ShAmt) >> ShAmt; - + // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); @@ -749,7 +745,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { static bool isIntS16Immediate(SDNode *N, short &Imm) { if (N->getOpcode() != ISD::Constant) return false; - + Imm = (short)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); @@ -766,21 +762,21 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { /// can be more efficiently represented with [r+imm]. bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { short imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i - + Base = N.getOperand(0); Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i can fold it if we can. - + // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. @@ -790,7 +786,7 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, APInt::getAllOnesValue(N.getOperand(0) .getValueSizeInBits()), LHSKnownZero, LHSKnownOne); - + if (LHSKnownZero.getBoolValue()) { DAG.ComputeMaskedBits(N.getOperand(1), APInt::getAllOnesValue(N.getOperand(1) @@ -805,7 +801,7 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, } } } - + return false; } @@ -813,11 +809,14 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, - SDValue &Base, SelectionDAG &DAG){ + SDValue &Base, + SelectionDAG &DAG) const { + // FIXME dl should come from parent load or store, not from address + DebugLoc dl = N.getDebugLoc(); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; - + if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm)) { @@ -861,7 +860,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, } } else if (ConstantSDNode *CN = dyn_cast(N)) { // Loading from a constant address. - + // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" short Imm; @@ -875,17 +874,17 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, if (CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { int Addr = (int)CN->getZExtValue(); - + // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr, MVT::i32); - + Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); + Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } - + Disp = DAG.getTargetConstant(0, getPointerTy()); if (FrameIndexSDNode *FI = dyn_cast(N)) Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -898,13 +897,13 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. if (SelectAddressRegReg(N, Base, Index, DAG)) return true; - + // If the operand is an addition, always emit this as [r+r], since this is // better (for code size, and execution, as the memop does the add for free) // than emitting an explicit add. @@ -913,7 +912,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, Index = N.getOperand(1); return true; } - + // Otherwise, do it the hard way, using R0 as the base register. Base = DAG.getRegister(PPC::R0, N.getValueType()); Index = N; @@ -925,11 +924,13 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, /// [r+imm*4]. Suitable for use by STD and friends. bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { + // FIXME dl should come from the parent load or store, not the address + DebugLoc dl = N.getDebugLoc(); // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG)) return false; - + if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { @@ -981,23 +982,22 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); return true; } - + // Fold the low-part of 32-bit absolute addresses into addr mode. if (CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { int Addr = (int)CN->getZExtValue(); - + // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); - Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); + Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0); return true; } } } - + Disp = DAG.getTargetConstant(0, getPointerTy()); if (FrameIndexSDNode *FI = dyn_cast(N)) Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); @@ -1013,16 +1013,16 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Disabled by default for now. if (!EnablePPCPreinc) return false; - + SDValue Ptr; MVT VT; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + } else if (StoreSDNode *ST = dyn_cast(N)) { ST = ST; Ptr = ST->getBasePtr(); @@ -1033,9 +1033,9 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // PowerPC doesn't have preinc load/store instructions for vectors. if (VT.isVector()) return false; - + // TODO: Check reg+reg first. - + // LDU/STU use reg+imm*4, others use reg+imm. if (VT != MVT::i64) { // reg + imm @@ -1054,8 +1054,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; - } - + } + AM = ISD::PRE_INC; return true; } @@ -1064,18 +1064,20 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // LowerOperation implementation //===----------------------------------------------------------------------===// -SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, +SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { MVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there isn't really any debug info here + DebugLoc dl = Op.getDebugLoc(); const TargetMachine &TM = DAG.getTarget(); - - SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); + + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero); // If this is a non-darwin platform, we don't support non-static relo models // yet. @@ -1083,16 +1085,17 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, !TM.getSubtarget().isDarwin()) { // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); } - + if (TM.getRelocationModel() == Reloc::PIC_) { // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); } - - Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); return Lo; } @@ -1101,11 +1104,13 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); - + // FIXME there isn't really any debug loc here + DebugLoc dl = Op.getDebugLoc(); + const TargetMachine &TM = DAG.getTarget(); - SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero); // If this is a non-darwin platform, we don't support non-static relo models // yet. @@ -1113,40 +1118,40 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { !TM.getSubtarget().isDarwin()) { // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); } - + if (TM.getRelocationModel() == Reloc::PIC_) { // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); } - - Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); return Lo; } -SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, +SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { assert(0 && "TLS not implemented for PPC."); return SDValue(); // Not reached } -SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) { MVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - // If it's a debug information descriptor, don't mess with it. - if (DAG.isVerifiedDebugInfoDesc(Op)) - return GA; SDValue Zero = DAG.getConstant(0, PtrVT); - + // FIXME there isn't really any debug info here + DebugLoc dl = GSDN->getDebugLoc(); + const TargetMachine &TM = DAG.getTarget(); - SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero); // If this is a non-darwin platform, we don't support non-static relo models // yet. @@ -1154,28 +1159,30 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, !TM.getSubtarget().isDarwin()) { // Generate non-pic code that has direct accesses to globals. // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); + return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); } - + if (TM.getRelocationModel() == Reloc::PIC_) { // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); + Hi = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), PtrVT), Hi); } - - Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); - + + Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + if (!TM.getSubtarget().hasLazyResolverStub(GV)) return Lo; - + // If the global is weak or external, we have to go through the lazy // resolution stub. - return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0); } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode CC = cast(Op.getOperand(2))->get(); - + DebugLoc dl = Op.getDebugLoc(); + // If we're comparing for equality to zero, expose the fact that this is // implented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. @@ -1185,21 +1192,21 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { VT = MVT::i32; - Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); - } + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); + } unsigned Log2b = Log2_32(VT.getSizeInBits()); - SDValue Clz = DAG.getNode(ISD::CTLZ, VT, Zext); - SDValue Scc = DAG.getNode(ISD::SRL, VT, Clz, + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, DAG.getConstant(Log2b, MVT::i32)); - return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); } - // Leave comparisons against 0 and -1 alone for now, since they're usually + // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. if (C->isAllOnesValue() || C->isNullValue()) return SDValue(); } - + // If we have an integer seteq/setne, turn it into a compare against zero // by xor'ing the rhs with the lhs, which is faster than setting a // condition register, reading it back out, and masking the correct bit. The @@ -1208,9 +1215,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { MVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { MVT VT = Op.getValueType(); - SDValue Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), + SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); - return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); + return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); } return SDValue(); } @@ -1221,7 +1228,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget) { - + assert(0 && "VAARG in ELF32 ABI not implemented yet!"); return SDValue(); // Not reached } @@ -1231,13 +1238,14 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value + DebugLoc dl = Op.getDebugLoc(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); const Type *IntPtrTy = DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = IntPtrTy; @@ -1250,18 +1258,18 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { Entry.Node = FPtr; Args.push_back(Entry); Entry.Node = Nest; Args.push_back(Entry); - + // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair CallResult = LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false, false, false, CallingConv::C, false, DAG.getExternalSymbol("__trampoline_setup", PtrVT), - Args, DAG); + Args, DAG, dl); SDValue Ops[] = { CallResult.first, CallResult.second }; - return DAG.getMergeValues(Ops, 2); + return DAG.getMergeValues(Ops, 2, dl); } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, @@ -1270,6 +1278,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget) { + DebugLoc dl = Op.getDebugLoc(); if (Subtarget.isMachoABI()) { // vastart just stores the address of the VarArgsFrameIndex slot into the @@ -1277,7 +1286,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } // For ELF 32 ABI we follow the layout of the va_list struct. @@ -1307,13 +1316,13 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); - + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - + SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - + uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); @@ -1322,30 +1331,30 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, uint64_t FPROffset = 1; SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); - + const Value *SV = cast(Op.getOperand(2))->getValue(); - + // Store first byte : number of int regs - SDValue firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, + SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), SV, 0); uint64_t nextOffset = FPROffset; - SDValue nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), + SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); - + // Store second byte : number of float regs SDValue secondStore = - DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); + DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset); nextOffset += StackOffset; - nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); - + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); + // Store second word : arguments given on stack SDValue thirdStore = - DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset); nextOffset += FrameOffset; - nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); + nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers - return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); + return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset); } @@ -1361,8 +1370,8 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { }; return FPR; } - - + + static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1384,7 +1393,7 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, } SDValue -PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, +PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex, int &VarArgsStackOffset, @@ -1399,7 +1408,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SmallVector ArgValues; SDValue Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - + DebugLoc dl = Op.getDebugLoc(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; bool isMachoABI = Subtarget.isMachoABI(); @@ -1421,9 +1431,9 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - + static const unsigned *FPR = GetFPR(Subtarget); - + static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -1434,13 +1444,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; - + // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have // too many vectors to fit in registers, something that only occurs in - // constructed examples:), but we have to walk the arglist to figure + // constructed examples:), but we have to walk the arglist to figure // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. @@ -1448,7 +1458,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // to handle Elf here. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; + for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) { MVT ObjectVT = Op.getValue(ArgNo).getValueType(); unsigned ObjSize = ObjectVT.getSizeInBits()/8; @@ -1458,7 +1468,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. ObjSize = Flags.getByValSize(); - unsigned ArgSize = + unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; continue; @@ -1491,7 +1501,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. - // + // // In the ELF 32 ABI, GPRs and stack are double word align: an argument // represented with two words (long long or double) must be copied to an // even GPR_idx value or to an even ArgOffset value. @@ -1508,7 +1518,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, ISD::ArgFlagsTy Flags = cast(Op.getOperand(ArgNo+3))->getArgFlags(); // See if next argument requires stack alignment in ELF - bool Align = Flags.isSplit(); + bool Align = Flags.isSplit(); unsigned CurArgOffset = ArgOffset; @@ -1551,8 +1561,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (GPR_idx != Num_GPR_Regs) { unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); RegInfo.addLiveIn(GPR[GPR_idx], VReg); - SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); - SDValue Store = DAG.getTruncStore(Val.getValue(1), Val, FIN, + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); MemOps.push_back(Store); ++GPR_idx; @@ -1571,8 +1581,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, RegInfo.addLiveIn(GPR[GPR_idx], VReg); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); ++GPR_idx; if (isMachoABI) ArgOffset += PtrByteSize; @@ -1594,14 +1604,14 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (GPR_idx != Num_GPR_Regs) { unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); RegInfo.addLiveIn(GPR[GPR_idx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } // Stack align in ELF - if (needsLoad && Align && isELF32_ABI) + if (needsLoad && Align && isELF32_ABI) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; // All int arguments reserve stack space in Macho ABI. if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; @@ -1612,19 +1622,19 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (GPR_idx != Num_GPR_Regs) { unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); RegInfo.addLiveIn(GPR[GPR_idx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. if (Flags.isSExt()) - ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal, + ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) - ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal, + ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); - ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal); + ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } ++GPR_idx; @@ -1635,7 +1645,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // All int arguments reserve stack space in Macho ABI. if (isMachoABI || needsLoad) ArgOffset += 8; break; - + case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for @@ -1652,12 +1662,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, else VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); RegInfo.addLiveIn(FPR[FPR_idx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } - + // Stack align in ELF if (needsLoad && Align && isELF32_ABI) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; @@ -1673,7 +1683,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (VR_idx != Num_VR_Regs) { unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); RegInfo.addLiveIn(VR[VR_idx], VReg); - ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { ArgOffset += PtrByteSize; @@ -1699,7 +1709,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, } break; } - + // We need to load the argument to a virtual register if we determined above // that we ran out of physical registers of the appropriate type. if (needsLoad) { @@ -1707,9 +1717,9 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); } - + ArgValues.push_back(ArgVal); } @@ -1735,39 +1745,39 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - + int depth; if (isELF32_ABI) { VarArgsNumGPR = GPR_idx; VarArgsNumFPR = FPR_idx; - + // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame // pointer. depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + PtrVT.getSizeInBits()/8); - + VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, ArgOffset); } else depth = ArgOffset; - + VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, depth); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - + // In ELF 32 ABI, the fixed integer arguments of a variadic function are // stored to the VarArgsFrameIndex on the stack. if (isELF32_ABI) { for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT); - SDValue Store = DAG.getStore(Root, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } @@ -1782,12 +1792,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); RegInfo.addLiveIn(GPR[GPR_idx], VReg); - SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex @@ -1795,12 +1805,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, if (isELF32_ABI) { for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); - SDValue Store = DAG.getStore(Root, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { @@ -1808,24 +1818,25 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); RegInfo.addLiveIn(FPR[FPR_idx], VReg); - SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } } - + if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); + Root = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); ArgValues.push_back(Root); - + // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(), + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), &ArgValues[0], ArgValues.size()); } @@ -1958,12 +1969,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return 0; - + int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. (Addr << 6 >> 6) != Addr) return 0; // Top 6 bits have to be sext of immediate. - + return DAG.getConstant((int)C->getZExtValue() >> 2, DAG.getTargetLoweringInfo().getPointerTy()).getNode(); } @@ -1985,13 +1996,14 @@ static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVector &TailCallArgs, - SmallVector &MemOpChains) { + SmallVector &MemOpChains, + DebugLoc dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. - MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), 0)); } @@ -2006,7 +2018,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue OldFP, int SPDiff, bool isPPC64, - bool isMachoABI) { + bool isMachoABI, + DebugLoc dl) { if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; @@ -2020,10 +2033,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, MVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); - Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx, + Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewRetAddr), 0); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx, + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0); } return Chain; @@ -2051,36 +2064,37 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, - int SPDiff, - SDValue Chain, - SDValue &LROpOut, - SDValue &FPOpOut) { + int SPDiff, + SDValue Chain, + SDValue &LROpOut, + SDValue &FPOpOut, + DebugLoc dl) { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); Chain = SDValue(LROpOut.getNode(), 1); FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); Chain = SDValue(FPOpOut.getNode(), 1); } return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified -/// by "Src" to address "Dst" of size "Size". Alignment information is +/// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. -static SDValue +static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - unsigned Size) { + unsigned Size, DebugLoc dl) { SDValue SizeNode = DAG.getConstant(Size, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, - NULL, 0, NULL, 0); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + false, NULL, 0, NULL, 0); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of @@ -2090,7 +2104,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVector &MemOpChains, - SmallVector& TailCallArguments) { + SmallVector& TailCallArguments, + DebugLoc dl) { MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { if (isVector) { @@ -2099,10 +2114,10 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); @@ -2119,20 +2134,21 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, && CC == CallingConv::Fast && PerformTailCallOpt; SDValue Callee = TheCall->getCallee(); unsigned NumOps = TheCall->getNumArgs(); - + DebugLoc dl = TheCall->getDebugLoc(); + bool isMachoABI = Subtarget.isMachoABI(); bool isELF32_ABI = Subtarget.isELF32_ABI(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; - + MachineFunction &MF = DAG.getMachineFunction(); // args_to_use will accumulate outgoing args for the PPCISD::CALL case in // SelectExpr to use to put the arguments in the appropriate registers. std::vector args_to_use; - + // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is @@ -2153,16 +2169,16 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); - + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue CallSeqStart = Chain; - + // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -2172,14 +2188,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); - + // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - + static const unsigned GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -2189,7 +2205,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const unsigned *FPR = GetFPR(Subtarget); - + static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 @@ -2197,7 +2213,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, const unsigned NumGPRs = array_lengthof(GPR_32); const unsigned NumFPRs = isMachoABI ? 13 : 8; const unsigned NumVRs = array_lengthof( VR); - + const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; std::vector > RegsToPass; @@ -2214,7 +2230,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; - + // Stack align in ELF 32 if (isELF32_ABI && Align) PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, @@ -2222,13 +2238,13 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, else PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); // On PPC64, promote integers to 64-bit values. if (isPPC64 && Arg.getValueType() == MVT::i32) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - Arg = DAG.getNode(ExtOp, MVT::i64, Arg); + Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME Elf untested, what are alignment rules? @@ -2241,7 +2257,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Everything else is passed left-justified. MVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, NULL, 0, VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -2249,10 +2265,10 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size); + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, Size, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2267,8 +2283,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, - CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size); + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, Size, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2277,9 +2293,9 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // And copy the pieces of it that fit into registers. for (unsigned j=0; j CallSeqOps; - SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - CallSeqOps.push_back(Chain); - CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes, true)); - CallSeqOps.push_back(DAG.getIntPtrConstant(0, true)); - if (InFlag.getNode()) - CallSeqOps.push_back(InFlag); - Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0], - CallSeqOps.size()); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); } @@ -2504,7 +2513,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, SmallVector Ops; unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; - + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. @@ -2519,14 +2528,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 2 + (InFlag.getNode() != 0)); InFlag = Chain.getValue(1); - + // Copy the callee address into R12/X12 on darwin. if (isMachoABI) { unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; - Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag); InFlag = Chain.getValue(1); } @@ -2553,7 +2562,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, + Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // When performing tail call optimization the callee pops its arguments off @@ -2569,12 +2578,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, if (isTailCall) { assert(InFlag.getNode() && "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(PPCISD::TAILCALL, + Chain = DAG.getNode(PPCISD::TAILCALL, dl, TheCall->getVTList(), &Ops[0], Ops.size()); return SDValue(Chain.getNode(), Op.getResNo()); } - Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -2588,13 +2597,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC); - + // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; MVT VT = VA.getValVT(); assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1); + Chain = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VT, InFlag).getValue(1); ResultVals.push_back(Chain.getValue(0)); InFlag = Chain.getValue(2); } @@ -2602,22 +2612,23 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // If the function returns void, just return the chain. if (RVLocs.empty()) return Chain; - + // Otherwise, merge everything together with a MERGE_VALUES node. ResultVals.push_back(Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), &ResultVals[0], ResultVals.size()); return Res.getValue(Op.getResNo()); } -SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, +SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + DebugLoc dl = Op.getDebugLoc(); CCState CCInfo(CC, isVarArg, TM, RVLocs); CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC); - + // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { @@ -2652,30 +2663,32 @@ SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { Operands.push_back(Chain.getOperand(i)); } - return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0], + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0], Operands.size()); } SDValue Flag; - + // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Op.getOperand(i*2+1), Flag); Flag = Chain.getValue(1); } if (Flag.getNode()) - return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); else - return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { // When we pop the dynamic allocation we need to restore the SP link. - + DebugLoc dl = Op.getDebugLoc(); + // Get the corect type for pointers. MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -2687,15 +2700,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, // Get the operands for the STACKRESTORE. SDValue Chain = Op.getOperand(0); SDValue SaveSP = Op.getOperand(1); - + // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); - + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0); + // Restore the stack pointer. - Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); - + Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); + // Store the old link SP. - return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0); } @@ -2740,11 +2753,11 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { if (!FPSI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); - + // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); // Save the result. - FI->setFramePointerSaveIndex(FPSI); + FI->setFramePointerSaveIndex(FPSI); } return DAG.getFrameIndex(FPSI, PtrVT); } @@ -2755,18 +2768,19 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); - + DebugLoc dl = Op.getDebugLoc(); + // Get the corect type for pointers. MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Negate the size. - SDValue NegSize = DAG.getNode(ISD::SUB, PtrVT, + SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, PtrVT), Size); // Construct a node for the frame pointer save index. SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); - return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); + return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when @@ -2775,18 +2789,19 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) - return SDValue(); - + return Op; + ISD::CondCode CC = cast(Op.getOperand(4))->get(); - + // Cannot handle SETEQ/SETNE. - if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDValue(); - + if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; + MVT ResVT = Op.getValueType(); MVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); - + DebugLoc dl = Op.getDebugLoc(); + // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. if (isFloatingPointZero(RHS)) @@ -2798,65 +2813,66 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits - LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); - return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits - LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); - return DAG.getNode(PPCISD::FSEL, ResVT, - DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, + DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); } - + SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits - Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits - Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits - Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits - Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); - return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); + Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); + return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } - return SDValue(); + return Op; } // FIXME: Split this code up when LegalizeDAGTypes lands. -SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, + DebugLoc dl) { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; switch (Op.getValueType().getSimpleVT()) { default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); case MVT::i32: - Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src); break; case MVT::i64: - Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); + Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); break; } @@ -2864,29 +2880,32 @@ SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. if (Op.getValueType() == MVT::i32) - FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, + FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); - SDValue FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); + SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl, + MVT::f64, Op.getOperand(0)); + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) - FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); + FP = DAG.getNode(ISD::FP_ROUND, dl, + MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } - + assert(Op.getOperand(0).getValueType() == MVT::i32 && "Unhandled SINT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of @@ -2897,27 +2916,28 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { int FrameIdx = FrameInfo->CreateStackObject(8, 8); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - - SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, + + SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, Op.getOperand(0)); - + // STD the extended value into the stack slot. MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx), MachineMemOperand::MOStore, 0, 8, 8); - SDValue Store = DAG.getNode(PPCISD::STD_32, MVT::Other, + SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other, DAG.getEntryNode(), Ext64, FIdx, DAG.getMemOperand(MO)); // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); - + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); + // FCFID it and return it. - SDValue FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); + SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); if (Op.getValueType() == MVT::f32) - FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); + FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: @@ -2946,224 +2966,133 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { // Save FP Control Word to register NodeTys.push_back(MVT::f64); // return register NodeTys.push_back(MVT::Flag); // unused in this context - SDValue Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); + SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), Chain, + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, NULL, 0); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, PtrVT); - SDValue Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0); // Transform as necessary SDValue CWD1 = - DAG.getNode(ISD::AND, MVT::i32, + DAG.getNode(ISD::AND, dl, MVT::i32, CWD, DAG.getConstant(3, MVT::i32)); SDValue CWD2 = - DAG.getNode(ISD::SRL, MVT::i32, - DAG.getNode(ISD::AND, MVT::i32, - DAG.getNode(ISD::XOR, MVT::i32, + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::XOR, dl, MVT::i32, CWD, DAG.getConstant(3, MVT::i32)), DAG.getConstant(3, MVT::i32)), DAG.getConstant(1, MVT::i32)); SDValue RetVal = - DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); + DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); return DAG.getNode((VT.getSizeInBits() < 16 ? - ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); + ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); - + // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); MVT AmtVT = Amt.getValueType(); - - SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); - SDValue Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt); - SDValue Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1); - SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); - SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, + SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, AmtVT)); - SDValue Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5); - SDValue OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); - SDValue OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt); + SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); + SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2); + return DAG.getMergeValues(OutOps, 2, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"); - + // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); MVT AmtVT = Amt.getValueType(); - - SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); - SDValue Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); - SDValue Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); - SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); - SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, AmtVT)); - SDValue Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5); - SDValue OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); - SDValue OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt); + SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); + SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); + SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2); + return DAG.getMergeValues(OutOps, 2, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); MVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"); - + // Expand into a bunch of logical ops, followed by a select_cc. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); MVT AmtVT = Amt.getValueType(); - - SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, + + SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); - SDValue Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); - SDValue Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); - SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); - SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, + SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); + SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); + SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); + SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, AmtVT)); - SDValue Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5); - SDValue OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt); - SDValue OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT), + SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); + SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); + SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2); + return DAG.getMergeValues(OutOps, 2, dl); } //===----------------------------------------------------------------------===// // Vector related lowering. // -// If this is a vector of constants or undefs, get the bits. A bit in -// UndefBits is set if the corresponding element of the vector is an -// ISD::UNDEF value. For undefs, the corresponding VectorBits values are -// zero. Return true if this is not an array of constants, false if it is. -// -static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], - uint64_t UndefBits[2]) { - // Start with zero'd results. - VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; - - unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); - for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { - SDValue OpVal = BV->getOperand(i); - - unsigned PartNo = i >= e/2; // In the upper 128 bits? - unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. - - uint64_t EltBits = 0; - if (OpVal.getOpcode() == ISD::UNDEF) { - uint64_t EltUndefBits = ~0U >> (32-EltBitSize); - UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); - continue; - } else if (ConstantSDNode *CN = dyn_cast(OpVal)) { - EltBits = CN->getZExtValue() & (~0U >> (32-EltBitSize)); - } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { - assert(CN->getValueType(0) == MVT::f32 && - "Only one legal FP vector type!"); - EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); - } else { - // Nonconstant element. - return true; - } - - VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); - } - - //printf("%llx %llx %llx %llx\n", - // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); - return false; -} - -// If this is a splat (repetition) of a value across the whole vector, return -// the smallest size that splats it. For example, "0x01010101010101..." is a -// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and -// SplatSize = 1 byte. -static bool isConstantSplat(const uint64_t Bits128[2], - const uint64_t Undef128[2], - unsigned &SplatBits, unsigned &SplatUndef, - unsigned &SplatSize) { - - // Don't let undefs prevent splats from matching. See if the top 64-bits are - // the same as the lower 64-bits, ignoring undefs. - if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) - return false; // Can't be a splat if two pieces don't match. - - uint64_t Bits64 = Bits128[0] | Bits128[1]; - uint64_t Undef64 = Undef128[0] & Undef128[1]; - - // Check that the top 32-bits are the same as the lower 32-bits, ignoring - // undefs. - if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) - return false; // Can't be a splat if two pieces don't match. - - uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); - uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); - - // If the top 16-bits are different than the lower 16-bits, ignoring - // undefs, we have an i32 splat. - if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { - SplatBits = Bits32; - SplatUndef = Undef32; - SplatSize = 4; - return true; - } - - uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); - uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); - - // If the top 8-bits are different than the lower 8-bits, ignoring - // undefs, we have an i16 splat. - if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { - SplatBits = Bits16; - SplatUndef = Undef16; - SplatSize = 2; - return true; - } - - // Otherwise, we have an 8-bit splat. - SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); - SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); - SplatSize = 1; - return true; -} - /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, - SelectionDAG &DAG) { + SelectionDAG &DAG, DebugLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); static const MVT VTys[] = { // canonical VT to use for each size. @@ -3171,39 +3100,39 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, }; MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; - + // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. if (Val == -1) SplatSize = 1; - + MVT CanonicalVT = VTys[SplatSize-1]; - + // Build a canonical splat for this value. - SDValue Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType()); + SDValue Elt = DAG.getConstant(Val, MVT::i32); SmallVector Ops; Ops.assign(CanonicalVT.getVectorNumElements(), Elt); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], Ops.size()); - return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); + return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, - MVT DestVT = MVT::Other) { + SelectionDAG &DAG, DebugLoc dl, + MVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, - SDValue Op2, SelectionDAG &DAG, - MVT DestVT = MVT::Other) { + SDValue Op2, SelectionDAG &DAG, + DebugLoc dl, MVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); } @@ -3211,17 +3140,16 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, - MVT VT, SelectionDAG &DAG) { + MVT VT, SelectionDAG &DAG, DebugLoc dl) { // Force LHS/RHS to be the right type. - LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); - RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); + RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(i+Amt, MVT::i8); - SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); - return DAG.getNode(ISD::BIT_CONVERT, VT, T); + Ops[i] = i + Amt; + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } // If this is a case we can't handle, return null and let the default @@ -3229,177 +3157,175 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. -SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) { - // If this is a vector of constants or undefs, get the bits. A bit in - // UndefBits is set if the corresponding element of the vector is an - // ISD::UNDEF value. For undefs, the corresponding VectorBits values are - // zero. - uint64_t VectorBits[2]; - uint64_t UndefBits[2]; - if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)) - return SDValue(); // Not a constant vector. - - // If this is a splat (repetition) of a value across the whole vector, return - // the smallest size that splats it. For example, "0x01010101010101..." is a - // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and - // SplatSize = 1 byte. - unsigned SplatBits, SplatUndef, SplatSize; - if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ - bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; - - // First, handle single instruction cases. - - // All zeros? - if (SplatBits == 0) { - // Canonicalize all zero vectors to be v4i32. - if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { - SDValue Z = DAG.getConstant(0, MVT::i32); - Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); - Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); - } - return Op; +SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + + // Check if this is a splat of a constant value. + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs) || SplatBitSize > 32) + return SDValue(); + + unsigned SplatBits = APSplatBits.getZExtValue(); + unsigned SplatUndef = APSplatUndef.getZExtValue(); + unsigned SplatSize = SplatBitSize / 8; + + // First, handle single instruction cases. + + // All zeros? + if (SplatBits == 0) { + // Canonicalize all zero vectors to be v4i32. + if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { + SDValue Z = DAG.getConstant(0, MVT::i32); + Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); + Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z); } + return Op; + } + + // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. + int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> + (32-SplatBitSize)); + if (SextVal >= -16 && SextVal <= 15) + return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); + + + // Two instruction sequences. + + // If this value is in the range [-32,30] and is even, use: + // tmp = VSPLTI[bhw], result = add tmp, tmp + if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { + SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); + Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is + // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important + // for fneg/fabs. + if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { + // Make -1 and vspltisw -1: + SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); + + // Make the VSLW intrinsic, computing 0x8000_0000. + SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, + OnesV, DAG, dl); + + // xor by OnesV to invert it. + Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // Check to see if this is a wide variety of vsplti*, binop self cases. + static const signed char SplatCsts[] = { + -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, + -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 + }; - // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. - int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); - if (SextVal >= -16 && SextVal <= 15) - return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); - - - // Two instruction sequences. - - // If this value is in the range [-32,30] and is even, use: - // tmp = VSPLTI[bhw], result = add tmp, tmp - if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG); - Res = DAG.getNode(ISD::ADD, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); + for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { + // Indirect through the SplatCsts array so that we favor 'vsplti -1' for + // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' + int i = SplatCsts[idx]; + + // Figure out what shift amount will be used by altivec if shifted by i in + // this splat size. + unsigned TypeShiftAmt = i & (SplatBitSize-1); + + // vsplti + shl self. + if (SextVal == (i << (int)TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, + Intrinsic::ppc_altivec_vslw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); } - - // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is - // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important - // for fneg/fabs. - if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { - // Make -1 and vspltisw -1: - SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); - - // Make the VSLW intrinsic, computing 0x8000_0000. - SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, - OnesV, DAG); - - // xor by OnesV to invert it. - Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); + + // vsplti + srl self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, + Intrinsic::ppc_altivec_vsrw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); } - // Check to see if this is a wide variety of vsplti*, binop self cases. - unsigned SplatBitSize = SplatSize*8; - static const signed char SplatCsts[] = { - -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, - -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 - }; - - for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { - // Indirect through the SplatCsts array so that we favor 'vsplti -1' for - // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' - int i = SplatCsts[idx]; - - // Figure out what shift amount will be used by altivec if shifted by i in - // this splat size. - unsigned TypeShiftAmt = i & (SplatBitSize-1); - - // vsplti + shl self. - if (SextVal == (i << (int)TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); - static const unsigned IIDs[] = { // Intrinsic to use for each size. - Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, - Intrinsic::ppc_altivec_vslw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); - } - - // vsplti + srl self. - if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); - static const unsigned IIDs[] = { // Intrinsic to use for each size. - Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, - Intrinsic::ppc_altivec_vsrw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); - } - - // vsplti + sra self. - if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); - static const unsigned IIDs[] = { // Intrinsic to use for each size. - Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, - Intrinsic::ppc_altivec_vsraw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); - } - - // vsplti + rol self. - if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | - ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { - SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); - static const unsigned IIDs[] = { // Intrinsic to use for each size. - Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, - Intrinsic::ppc_altivec_vrlw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); - } + // vsplti + sra self. + if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, + Intrinsic::ppc_altivec_vsraw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } - // t = vsplti c, result = vsldoi t, t, 1 - if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); - return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); - } - // t = vsplti c, result = vsldoi t, t, 2 - if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); - return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); - } - // t = vsplti c, result = vsldoi t, t, 3 - if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { - SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); - return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); - } + // vsplti + rol self. + if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | + ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { + SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); + static const unsigned IIDs[] = { // Intrinsic to use for each size. + Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, + Intrinsic::ppc_altivec_vrlw + }; + Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + } + + // t = vsplti c, result = vsldoi t, t, 1 + if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); } - - // Three instruction sequences. - - // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - if (SextVal >= 0 && SextVal <= 31) { - SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); - LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); + // t = vsplti c, result = vsldoi t, t, 2 + if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); } - // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - if (SextVal >= -31 && SextVal <= 0) { - SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); - LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); + // t = vsplti c, result = vsldoi t, t, 3 + if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { + SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); + return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); } } - + + // Three instruction sequences. + + // Odd, in range [17,31]: (vsplti C)-(vsplti -16). + if (SextVal >= 0 && SextVal <= 31) { + SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + } + // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). + if (SextVal >= -31 && SextVal <= 0) { + SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); + SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); + LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + } + return SDValue(); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, - SDValue RHS, SelectionDAG &DAG) { + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); - + enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VMRGHW, @@ -3412,18 +3338,18 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, OP_VSLDOI8, OP_VSLDOI12 }; - + if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } - + SDValue OpLHS, OpRHS; - OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); - OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); - - unsigned ShufIdxs[16]; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + + int ShufIdxs[16]; switch (OpNum) { default: assert(0 && "Unknown i32 permute!"); case OP_VMRGHW: @@ -3455,81 +3381,84 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, ShufIdxs[i] = (i&3)+12; break; case OP_VSLDOI4: - return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); + return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI8: - return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); + return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI12: - return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); + return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } - SDValue Ops[16]; - for (unsigned i = 0; i != 16; ++i) - Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); - - return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); + MVT VT = OpLHS.getValueType(); + OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); + SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. -SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - SDValue PermMask = Op.getOperand(2); - + ShuffleVectorSDNode *SVOp = cast(Op); + MVT VT = Op.getValueType(); + // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.getOpcode() == ISD::UNDEF) { - if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || - PPC::isSplatShuffleMask(PermMask.getNode(), 2) || - PPC::isSplatShuffleMask(PermMask.getNode(), 4) || - PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { + if (PPC::isSplatShuffleMask(SVOp, 1) || + PPC::isSplatShuffleMask(SVOp, 2) || + PPC::isSplatShuffleMask(SVOp, 4) || + PPC::isVPKUWUMShuffleMask(SVOp, true) || + PPC::isVPKUHUMShuffleMask(SVOp, true) || + PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true) || + PPC::isVMRGLShuffleMask(SVOp, 2, true) || + PPC::isVMRGLShuffleMask(SVOp, 4, true) || + PPC::isVMRGHShuffleMask(SVOp, 1, true) || + PPC::isVMRGHShuffleMask(SVOp, 2, true) || + PPC::isVMRGHShuffleMask(SVOp, 4, true)) { return Op; } } - + // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || - PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || - PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || - PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) + if (PPC::isVPKUWUMShuffleMask(SVOp, false) || + PPC::isVPKUHUMShuffleMask(SVOp, false) || + PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false) || + PPC::isVMRGLShuffleMask(SVOp, 2, false) || + PPC::isVMRGLShuffleMask(SVOp, 4, false) || + PPC::isVMRGHShuffleMask(SVOp, 1, false) || + PPC::isVMRGHShuffleMask(SVOp, 2, false) || + PPC::isVMRGHShuffleMask(SVOp, 4, false)) return Op; - + // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. + SmallVector PermMask; + SVOp->getMask(PermMask); + unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. - if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) + if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. - - unsigned ByteSource = - cast(PermMask.getOperand(i*4+j))->getZExtValue(); + + unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; } - + if (EltNo == 8) { EltNo = ByteSource/4; } else if (EltNo != ByteSource/4) { @@ -3539,18 +3468,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } PFIndexes[i] = EltNo; } - - // If this shuffle can be expressed as a shuffle of 4-byte elements, use the + + // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. if (isFourElementShuffle) { // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = + unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; - + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); - + // Determining when to avoid vperm is tricky. Many things affect the cost // of vperm, particularly how many times the perm mask needs to be computed. // For example, if the perm mask can be hoisted out of a loop or is already @@ -3559,38 +3488,34 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // the loop requires an extra register. // // As a compromise, we only emit discrete instructions if the shuffle can be - // generated in 3 or fewer operations. When we have loop information + // generated in 3 or fewer operations. When we have loop information // available, if this block is within a loop, we should avoid using vperm // for 3-operation perms and use a constant pool load instead. - if (Cost < 3) - return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); + if (Cost < 3) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } - + // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. if (V2.getOpcode() == ISD::UNDEF) V2 = V1; - + // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. MVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; - + SmallVector ResultMask; - for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { - unsigned SrcElt; - if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) - SrcElt = 0; - else - SrcElt = cast(PermMask.getOperand(i))->getZExtValue(); - + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; + for (unsigned j = 0; j != BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i8)); + MVT::i32)); } - - SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, + + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); - return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an @@ -3618,7 +3543,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; - + // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; @@ -3639,23 +3564,24 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. -SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, +SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. + DebugLoc dl = Op.getDebugLoc(); int CompareOpc; bool isDot; if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) return SDValue(); // Don't custom lower most intrinsics. - + // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { - SDValue Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), + SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp); } - + // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { Op.getOperand(2), // LHS @@ -3665,14 +3591,14 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, std::vector VTs; VTs.push_back(Op.getOperand(2).getValueType()); VTs.push_back(MVT::Flag); - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); - + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. - SDValue Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, + SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), - CompNode.getValue(1)); - + CompNode.getValue(1)); + // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? @@ -3691,89 +3617,91 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, BitNo = 2; InvertBit = true; break; } - + // Shift the bit into the low position. - Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, + Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, DAG.getConstant(8-(3-BitNo), MVT::i32)); // Isolate the bit. - Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, + Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, DAG.getConstant(1, MVT::i32)); - + // If we are supposed to, toggle the bit. if (InvertBit) - Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, + Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, DAG.getConstant(1, MVT::i32)); return Flags; } -SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, +SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - + // Store the input value into Value#0 of the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, NULL, 0); // Load it out. - return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); - - SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); - SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. - + + SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); + SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. + SDValue RHSSwap = // = vrlw RHS, 16 - BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); - + BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); + // Shrinkify inputs to v8i16. - LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); - RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); - RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); - + LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS); + RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS); + RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap); + // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, - LHS, RHS, DAG, MVT::v4i32); - + LHS, RHS, DAG, dl, MVT::v4i32); + SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, - LHS, RHSSwap, Zero, DAG, MVT::v4i32); + LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); // Shift the high parts up 16 bits. - HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); - return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); + HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, + Neg16, DAG, dl); + return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); } else if (Op.getValueType() == MVT::v8i16) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); - - SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); + + SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, - LHS, RHS, Zero, DAG); + LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); - + // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, - LHS, RHS, DAG, MVT::v8i16); - EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); - + LHS, RHS, DAG, dl, MVT::v8i16); + EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts); + // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, - LHS, RHS, DAG, MVT::v8i16); - OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); - + LHS, RHS, DAG, dl, MVT::v8i16); + OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); + // Merge the results together. - SDValue Ops[16]; + int Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); - Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; } - return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, - DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { assert(0 && "Unknown mul to lower!"); abort(); @@ -3784,23 +3712,23 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Wasn't expecting to be able to lower this!"); + default: assert(0 && "Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); - case ISD::VASTART: + case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); - - case ISD::VAARG: + + case ISD::VAARG: return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); @@ -3812,7 +3740,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, + Op.getDebugLoc()); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -3827,7 +3756,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); - + // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -3838,6 +3767,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) { + DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: assert(false && "Do not know how to custom type legalize this operation!"); @@ -3845,9 +3775,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::FP_ROUND_INREG: { assert(N->getValueType(0) == MVT::ppcf128); assert(N->getOperand(0).getValueType() == MVT::ppcf128); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::f64, N->getOperand(0), + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(0)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::f64, N->getOperand(0), + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, + MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1)); // This sequence changes FPSCR to do round-to-zero, adds the two halves @@ -3858,7 +3790,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, NodeTys.push_back(MVT::f64); // Return register NodeTys.push_back(MVT::Flag); // Returns a flag for later insns - Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); + Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); MFFSreg = Result.getValue(0); InFlag = Result.getValue(1); @@ -3866,14 +3798,14 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, NodeTys.push_back(MVT::Flag); // Returns a flag Ops[0] = DAG.getConstant(31, MVT::i32); Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); + Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); InFlag = Result.getValue(0); NodeTys.clear(); NodeTys.push_back(MVT::Flag); // Returns a flag Ops[0] = DAG.getConstant(30, MVT::i32); Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); + Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); InFlag = Result.getValue(0); NodeTys.clear(); @@ -3882,7 +3814,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, Ops[0] = Lo; Ops[1] = Hi; Ops[2] = InFlag; - Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); + Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); FPreg = Result.getValue(0); InFlag = Result.getValue(1); @@ -3892,16 +3824,17 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, Ops[1] = MFFSreg; Ops[2] = FPreg; Ops[3] = InFlag; - Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); + Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); FPreg = Result.getValue(0); // We know the low half is about to be thrown away, so just use something // convenient. - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::ppcf128, FPreg, FPreg)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, + FPreg, FPreg)); return; } case ISD::FP_TO_SINT: - Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG)); + Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl)); return; } } @@ -3913,7 +3846,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) { + bool is64bit, unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -3926,6 +3859,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -3951,14 +3885,14 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) - BuildMI(BB, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); - BuildMI(BB, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -3969,10 +3903,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, } MachineBasicBlock * -PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, +PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, bool is8bit, // operation - unsigned BinOpcode) { + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -3990,6 +3924,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); unsigned incr = MI->getOperand(3).getReg(); + DebugLoc dl = MI->getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -3998,7 +3933,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, exitMBB->transferSuccessors(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); - const TargetRegisterClass *RC = + const TargetRegisterClass *RC = is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : (const TargetRegisterClass *) &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); @@ -4042,61 +3977,61 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, if (ptrA!=PPC::R0) { Ptr1Reg = RegInfo.createVirtualRegister(RC); - BuildMI(BB, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } - BuildMI(BB, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); - BuildMI(BB, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) - BuildMI(BB, TII->get(PPC::RLDICR), PtrReg) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else - BuildMI(BB, TII->get(PPC::RLWINM), PtrReg) + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); - BuildMI(BB, TII->get(PPC::SLW), Incr2Reg) + BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) .addReg(incr).addReg(ShiftReg); if (is8bit) - BuildMI(BB, TII->get(PPC::LI), Mask2Reg).addImm(255); + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { - BuildMI(BB, TII->get(PPC::LI), Mask3Reg).addImm(0); - BuildMI(BB, TII->get(PPC::ORI), Mask2Reg).addReg(Mask3Reg).addImm(65535); + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); } - BuildMI(BB, TII->get(PPC::SLW), MaskReg) + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); BB = loopMBB; - BuildMI(BB, TII->get(PPC::LWARX), TmpDestReg) + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(PPC::R0).addReg(PtrReg); if (BinOpcode) - BuildMI(BB, TII->get(BinOpcode), TmpReg) + BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); - BuildMI(BB, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg).addReg(MaskReg); - BuildMI(BB, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); - BuildMI(BB, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); - BuildMI(BB, TII->get(PPC::STWCX)) + BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg); - BuildMI(BB, TII->get(PPC::BCC)) - .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(BB, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); return BB; } MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -4127,7 +4062,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); unsigned SelectPred = MI->getOperand(4).getImm(); - BuildMI(BB, TII->get(PPC::BCC)) + DebugLoc dl = MI->getDebugLoc(); + BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); @@ -4137,20 +4073,20 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; - + // Update machine-CFG edges BB->addSuccessor(sinkMBB); - + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) + BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); } @@ -4226,6 +4162,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned ptrB = MI->getOperand(2).getReg(); unsigned oldval = MI->getOperand(3).getReg(); unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -4254,26 +4191,26 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // st[wd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) .addReg(ptrA).addReg(ptrB); - BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) + BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); - BuildMI(BB, TII->get(PPC::BCC)) + BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) .addReg(newval).addReg(ptrA).addReg(ptrB); - BuildMI(BB, TII->get(PPC::BCC)) + BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); - BuildMI(BB, TII->get(PPC::B)).addMBB(exitMBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); - + BB = midMBB; - BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); @@ -4293,6 +4230,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned ptrB = MI->getOperand(2).getReg(); unsigned oldval = MI->getOperand(3).getReg(); unsigned newval = MI->getOperand(4).getReg(); + DebugLoc dl = MI->getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -4305,7 +4243,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, exitMBB->transferSuccessors(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); - const TargetRegisterClass *RC = + const TargetRegisterClass *RC = is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : (const TargetRegisterClass *) &PPC::GPRCRegClass; unsigned PtrReg = RegInfo.createVirtualRegister(RC); @@ -4357,69 +4295,73 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // srw dest, tmpDest, shift if (ptrA!=PPC::R0) { Ptr1Reg = RegInfo.createVirtualRegister(RC); - BuildMI(BB, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); } else { Ptr1Reg = ptrB; } - BuildMI(BB, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) + BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); - BuildMI(BB, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) + BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); if (is64bit) - BuildMI(BB, TII->get(PPC::RLDICR), PtrReg) + BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(61); else - BuildMI(BB, TII->get(PPC::RLWINM), PtrReg) + BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); - BuildMI(BB, TII->get(PPC::SLW), NewVal2Reg) + BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) .addReg(newval).addReg(ShiftReg); - BuildMI(BB, TII->get(PPC::SLW), OldVal2Reg) + BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) .addReg(oldval).addReg(ShiftReg); if (is8bit) - BuildMI(BB, TII->get(PPC::LI), Mask2Reg).addImm(255); + BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { - BuildMI(BB, TII->get(PPC::LI), Mask3Reg).addImm(0); - BuildMI(BB, TII->get(PPC::ORI), Mask2Reg).addReg(Mask3Reg).addImm(65535); + BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); + BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) + .addReg(Mask3Reg).addImm(65535); } - BuildMI(BB, TII->get(PPC::SLW), MaskReg) + BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg).addReg(ShiftReg); - BuildMI(BB, TII->get(PPC::AND), NewVal3Reg) + BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) .addReg(NewVal2Reg).addReg(MaskReg); - BuildMI(BB, TII->get(PPC::AND), OldVal3Reg) + BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) .addReg(OldVal2Reg).addReg(MaskReg); BB = loop1MBB; - BuildMI(BB, TII->get(PPC::LWARX), TmpDestReg) + BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(PPC::R0).addReg(PtrReg); - BuildMI(BB, TII->get(PPC::AND),TmpReg).addReg(TmpDestReg).addReg(MaskReg); - BuildMI(BB, TII->get(PPC::CMPW), PPC::CR0) + BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) .addReg(TmpReg).addReg(OldVal3Reg); - BuildMI(BB, TII->get(PPC::BCC)) + BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, TII->get(PPC::ANDC),Tmp2Reg).addReg(TmpDestReg).addReg(MaskReg); - BuildMI(BB, TII->get(PPC::OR),Tmp4Reg).addReg(Tmp2Reg).addReg(NewVal3Reg); - BuildMI(BB, TII->get(PPC::STWCX)).addReg(Tmp4Reg) + BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) + .addReg(TmpDestReg).addReg(MaskReg); + BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) + .addReg(Tmp2Reg).addReg(NewVal3Reg); + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) .addReg(PPC::R0).addReg(PtrReg); - BuildMI(BB, TII->get(PPC::BCC)) + BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); - BuildMI(BB, TII->get(PPC::B)).addMBB(exitMBB); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); - + BB = midMBB; - BuildMI(BB, TII->get(PPC::STWCX)).addReg(TmpDestReg) + BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) .addReg(PPC::R0).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(BB, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); + BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); } else { assert(0 && "Unexpected instr type to insert"); } @@ -4436,6 +4378,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: break; case PPCISD::SHL: @@ -4457,7 +4400,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; - + case ISD::SINT_TO_FP: if (TM.getSubtarget().has64BitSupport()) { if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { @@ -4468,16 +4411,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { SDValue Val = N->getOperand(0).getOperand(0); if (Val.getValueType() == MVT::f32) { - Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } - - Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); + + Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); + Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); if (N->getValueType(0) == MVT::f32) { - Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, + Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, DAG.getIntPtrConstant(0)); DCI.AddToWorklist(Val.getNode()); } @@ -4498,18 +4441,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { SDValue Val = N->getOperand(1).getOperand(0); if (Val.getValueType() == MVT::f32) { - Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); + Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } - Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); + Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, + Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, N->getOperand(2), N->getOperand(3)); DCI.AddToWorklist(Val.getNode()); return Val; } - + // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && @@ -4518,10 +4461,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) - BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); - return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, - N->getOperand(2), N->getOperand(3), + return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0), + BSwapOp, N->getOperand(2), N->getOperand(3), DAG.getValueType(N->getOperand(1).getValueType())); } break; @@ -4543,13 +4486,13 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, MO, // MemOperand DAG.getValueType(N->getValueType(0)) // VT }; - SDValue BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); + SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4); - // If this is an i16 load, insert the truncate. + // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) - ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); - + ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); + // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); @@ -4557,11 +4500,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); - + // Return N so it doesn't get rechecked! return SDValue(N, 0); } - + break; case PPCISD::VCMP: { // If a VCMPo node already exists with exactly the same operands as this @@ -4571,10 +4514,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { - + // Scan all of the users of the LHS, looking for VCMPo's that match. SDNode *VCMPoNode = 0; - + SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) @@ -4585,17 +4528,17 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, VCMPoNode = *UI; break; } - + // If there is no VCMPo node, or if the flag value has a single use, don't // transform this. if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) break; - - // Look at the (necessarily single) use of the flag value. If it has a + + // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = 0; - for (SDNode::use_iterator UI = VCMPoNode->use_begin(); + for (SDNode::use_iterator UI = VCMPoNode->use_begin(); FlagUser == 0; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; @@ -4606,7 +4549,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } - + // If the user is a MFCR instruction, we know this is safe. Otherwise we // give up for right now. if (FlagUser->getOpcode() == PPCISD::MFCR) @@ -4623,12 +4566,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); int CompareOpc; bool isDot; - + if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && getAltivecCompareInfo(LHS, CompareOpc, isDot)) { assert(isDot && "Can't compare against a vector result!"); - + // If this is a comparison against something other than 0/1, then we know // that the condition is never/always true. unsigned Val = cast(RHS)->getZExtValue(); @@ -4636,12 +4579,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (CC == ISD::SETEQ) // Cond never true, remove branch. return N->getOperand(0); // Always !=, turn it into an unconditional branch. - return DAG.getNode(ISD::BR, MVT::Other, + return DAG.getNode(ISD::BR, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } - + bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); - + // Create the PPCISD altivec 'dot' comparison node. std::vector VTs; SDValue Ops[] = { @@ -4651,8 +4594,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; VTs.push_back(LHS.getOperand(2).getValueType()); VTs.push_back(MVT::Flag); - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); - + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; switch (cast(LHS.getOperand(1))->getZExtValue()) { @@ -4671,7 +4614,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; } - return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), + return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), DAG.getConstant(CompOpc, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); @@ -4679,7 +4622,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; } } - + return SDValue(); } @@ -4689,7 +4632,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { @@ -4720,7 +4663,7 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpgtuw_p: KnownZero = ~1U; // All bits but the low one are known to be zero. break; - } + } } } } @@ -4728,7 +4671,7 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. -PPCTargetLowering::ConstraintType +PPCTargetLowering::ConstraintType PPCTargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -4744,7 +4687,7 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } -std::pair +std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { @@ -4761,13 +4704,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, else if (VT == MVT::f64) return std::make_pair(0U, PPC::F8RCRegisterClass); break; - case 'v': + case 'v': return std::make_pair(0U, PPC::VRRCRegisterClass); case 'y': // crrc return std::make_pair(0U, PPC::CRRCRegisterClass); } } - + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -4817,7 +4760,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, if ((int)Value > 0 && isPowerOf2_32(Value)) Result = DAG.getTargetConstant(Value, Op.getValueType()); break; - case 'O': // "O" is the constant zero. + case 'O': // "O" is the constant zero. if (Value == 0) Result = DAG.getTargetConstant(Value, Op.getValueType()); break; @@ -4829,31 +4772,31 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, break; } } - + if (Result.getNode()) { Ops.push_back(Result); return; } - + // Handle standard constraint letters. TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. -bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, +bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // FIXME: PPC does not allow r+i addressing modes for vectors! - + // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; - + // No global is ever allowed as a base. if (AM.BaseGV) return false; - - // PPC only support r+r, + + // PPC only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; @@ -4871,7 +4814,7 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, // No other scales are supported. return false; } - + return true; } @@ -4884,11 +4827,12 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ } bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { - return false; + return false; } SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { - // Depths > 0 not supported yet! + DebugLoc dl = Op.getDebugLoc(); + // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); @@ -4901,27 +4845,29 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { // Make sure the function really does not optimize away the store of the RA // to the stack. FuncInfo->setLRStoreRequired(); - return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); + return DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), RetAddrFI, NULL, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { - // Depths > 0 not supported yet! + DebugLoc dl = Op.getDebugLoc(); + // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; - + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) + bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) && MFI->getStackSize(); if (isPPC64) - return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1, MVT::i64); else - return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1, MVT::i32); }