#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCCallingConv.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM)
- : TargetLowering(TM),
- Subtarget(*TM.getSubtargetImpl()) {
+PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
+ const PPCSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
// Use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ }
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (ANDIGlueBug)
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setTruncStoreAction(MVT::i64, MVT::i1, Expand);
- setTruncStoreAction(MVT::i32, MVT::i1, Expand);
- setTruncStoreAction(MVT::i16, MVT::i1, Expand);
- setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+ setTruncStoreAction(VT, MVT::i1, Expand);
+ }
addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
}
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath &&
- Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
+ Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
if (!Subtarget.hasFSQRT() &&
- !(TM.Options.UnsafeFPMath &&
- Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
+ !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
+ Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
if (Subtarget.hasFCPSGN()) {
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
- for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-
+ for (MVT VT : MVT::vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
setOperationAction(ISD::ADD , VT, Legal);
setOperationAction(ISD::SUB , VT, Legal);
+ // Vector instructions introduced in P8
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::CTPOP, VT, Legal);
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ }
+ else {
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
// We promote all shuffles to v16i8.
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
- for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
- MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
- setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
}
+
+ if (Subtarget.hasP8Altivec())
+ addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
}
if (Subtarget.has64BitSupport())
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ if (Subtarget.hasFPCVT())
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
- if (Subtarget.useCRBits())
+ if (Subtarget.useCRBits()) {
setHasMultipleConditionRegisters();
+ setJumpIsExpensive();
+ }
setMinFunctionAlignment(2);
if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ setPrefFunctionAlignment(4);
+ setPrefLoopAlignment(4);
+ break;
+ }
+
setInsertFencesForAtomic(true);
if (Subtarget.enableMachineScheduler())
computeRegisterProperties();
- // The Freescale cores does better with aggressive inlining of memcpy and
- // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ // The Freescale cores do better with aggressive inlining of memcpy and
+ // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
-
- setPrefFunctionAlignment(4);
}
}
default: return nullptr;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
+ case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
+ case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
+ case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::CMPB: return "PPCISD::CMPB";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::LOAD: return "PPCISD::LOAD";
- case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
- case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
- case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
+ case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG) {
- bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian();
+ bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if (ShuffleKind == 0) {
if (IsLE)
return false;
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 0, 0);
else if (ShuffleKind == 2) // swapped
/// the input operands are swapped (see PPCInstrAltivec.td).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG) {
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
if (ShuffleKind == 1) // unary
return isVMerge(N, UnitSize, 8, 8);
else if (ShuffleKind == 2) // swapped
if (ShiftAmt < i) return -1;
ShiftAmt -= i;
- bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()->
- isLittleEndian();
+ bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
// Check the rest of the elements to see if they are consecutive.
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- if (DAG.getSubtarget().getDataLayout()->isLittleEndian())
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
else
return SVOp->getMaskElt(0) / EltSize;
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
-static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
- unsigned &LoOpFlags,
+static bool GetLabelAccessInfo(const TargetMachine &TM,
+ const PPCSubtarget &Subtarget,
+ unsigned &HiOpFlags, unsigned &LoOpFlags,
const GlobalValue *GV = nullptr) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
// If this is a reference to a global value that requires a non-lazy-ptr, make
// sure that instruction lowering adds it.
- if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ if (GV && Subtarget.hasLazyResolverStub(GV)) {
HiOpFlags |= PPCII::MO_NLP_FLAG;
LoOpFlags |= PPCII::MO_NLP_FLAG;
return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
}
+static void setUsesTOCBasePtr(MachineFunction &MF) {
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setUsesTOCBasePtr();
+}
+
+static void setUsesTOCBasePtr(SelectionDAG &DAG) {
+ setUsesTOCBasePtr(DAG.getMachineFunction());
+}
+
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
-// Generate a call to __tls_get_addr for the given GOT entry Op.
-std::pair<SDValue,SDValue>
-PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
- SelectionDAG &DAG) const {
-
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, IntPtrTy,
- DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
- std::move(Args), 0);
-
- return LowerCallTo(CLI);
-}
-
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
PPCII::MO_TLS);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
}
if (Model == TLSModel::GeneralDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSGD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
GOTReg, TGA);
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- return CallResult.first;
+ return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
+ GOTPtr, TGA, TGA);
}
if (Model == TLSModel::LocalDynamic) {
- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TLSLD);
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
+ setUsesTOCBasePtr(DAG);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
GOTReg, TGA);
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
- SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
- GOTPtr, TGA);
- std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
- SDValue TLSAddr = CallResult.first;
- SDValue Chain = CallResult.second;
- SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
- Chain, TLSAddr, TGA);
+ SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
+ PtrVT, GOTPtr, TGA, TGA);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
+ PtrVT, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
unsigned MOHiFlag, MOLoFlag;
- bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+ bool isPIC =
+ GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
if (isPIC && Subtarget.isSVR4ABI()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
/// ensure minimum alignment required for target.
-static unsigned EnsureStackAlignment(const TargetMachine &Target,
+static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
unsigned NumBytes) {
- unsigned TargetAlign =
- Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment();
+ unsigned TargetAlign = Lowering->getStackAlignment();
unsigned AlignMask = TargetAlign - 1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
return NumBytes;
*DAG.getContext());
// Reserve space for the linkage area on the stack.
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
-
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
- isELFv2ABI);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
// although the first ones are often in registers.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
- unsigned CurArgOffset = ArgOffset;
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ unsigned CurArgOffset, Align;
+ auto ComputeArgOffset = [&]() {
+ /* Respect alignment of argument on the stack. */
+ Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ CurArgOffset = ArgOffset;
+ };
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ if (CallConv != CallingConv::Fast) {
+ ComputeArgOffset();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+ }
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
InVals.push_back(Arg);
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store;
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
ArgSize = PtrByteSize;
}
- ArgOffset += 8;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 8;
break;
case MVT::f32:
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
- &PPC::VSFRCRegClass :
- &PPC::F8RCRegClass);
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
+ ? &PPC::VSFRCRegClass
+ : &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
- } else if (GPR_idx != Num_GPR_Regs) {
+ } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
// This can only ever happen in the presence of f32 array types,
// since otherwise we never run out of FPRs before running out
// of GPRs.
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::f32) {
ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
- ArgOffset += ArgSize;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || needsLoad) {
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
case MVT::v4f32:
case MVT::v4i32:
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++VR_idx;
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+
needsLoad = true;
}
- ArgOffset += 16;
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
break;
}
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
- false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
- int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
- isDarwinABI);
+ const PPCFrameLowering *FL =
+ MF.getSubtarget<PPCSubtarget>().getFrameLowering();
+ int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
if (isDarwinABI) {
- int NewFPLoc =
- SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
InFlag = Chain.getValue(1);
}
+// Is this global address that of a function that can be called by name? (as
+// opposed to something that must hold a descriptor for an indirect call).
+static bool isFunctionGlobalAddress(SDValue Callee) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
+ Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
+ return false;
+
+ return G->getGlobal()->getType()->getElementType()->isFunctionTy();
+ }
+
+ return false;
+}
+
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
- SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
+ bool isTailCall, bool IsPatchPoint,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &Subtarget) {
+ ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
needIndirectCall = false;
}
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (isFunctionGlobalAddress(Callee)) {
+ GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ // A call to a TLS address is actually an indirect call to a
+ // thread-specific pointer.
unsigned OpFlags = 0;
if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
(Subtarget.getTargetTriple().isMacOSX() &&
(Subtarget.getTargetTriple().isMacOSX() &&
Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
(Subtarget.isTargetELF() && !isPPC64 &&
- DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
needIndirectCall = false;
}
+ if (IsPatchPoint) {
+ // We'll form an invalid direct call when lowering a patchpoint; the full
+ // sequence for an indirect call is complicated, and many of the
+ // instructions introduced might have side effects (and, thus, can't be
+ // removed later). The call itself will be removed as soon as the
+ // argument/return lowering is complete, so the fact that it has the wrong
+ // kind of operands should not really matter.
+ needIndirectCall = false;
+ }
+
if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
// 6. On return of the callee, the TOC of the caller needs to be
// restored (this is done in FinishCall()).
//
- // All those operations are flagged together to ensure that no other
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
// operations can be scheduled in between. E.g. without flagging the
- // operations together, a TOC access in the caller could be scheduled
- // between the load of the callee TOC and the branch to the callee, which
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which
// results in the TOC access going through the TOC of the callee instead
// of going through the TOC of the caller, which leads to incorrect code.
// Load the address of the function entry point from the function
// descriptor.
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- Chain = LoadFuncPtr.getValue(1);
- InFlag = LoadFuncPtr.getValue(2);
+ SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
+ if (LDChain.getValueType() == MVT::Glue)
+ LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
+
+ bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
+
+ MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
+ SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
+ false, false, LoadsInv, 8);
// Load environment pointer into r11.
- // Offset of the environment pointer within the function descriptor.
SDValue PtrOff = DAG.getIntPtrConstant(16);
-
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
- SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
- InFlag);
- Chain = LoadEnvPtr.getValue(1);
- InFlag = LoadEnvPtr.getValue(2);
+ SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
+ MPI.getWithOffset(16), false, false,
+ LoadsInv, 8);
+
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+ SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
+ MPI.getWithOffset(8), false, false,
+ LoadsInv, 8);
+
+ setUsesTOCBasePtr(DAG);
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
+ InFlag);
+ Chain = TOCVal.getValue(0);
+ InFlag = TOCVal.getValue(1);
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
InFlag);
+
Chain = EnvVal.getValue(0);
InFlag = EnvVal.getValue(1);
- // Load TOC of the callee into r2. We are using a target-specific load
- // with r2 hard coded, because the result of a target-independent load
- // would never go directly into r2, since r2 is a reserved register (which
- // prevents the register allocator from allocating it), resulting in an
- // additional register being allocated and an unnecessary move instruction
- // being generated.
- VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue TOCOff = DAG.getIntPtrConstant(8);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
- SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- AddTOC, InFlag);
- Chain = LoadTOCPtr.getValue(0);
- InFlag = LoadTOCPtr.getValue(1);
-
MTCTROps[0] = Chain;
MTCTROps[1] = LoadFuncPtr;
MTCTROps[2] = InFlag;
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
-
- // If this is a call to __tls_get_addr, find the symbol whose address
- // is to be taken and add it to the list. This will be used to
- // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
- // We find the symbol by walking the chain to the CopyFromReg, walking
- // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
- // pulling the symbol from that node.
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
- assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
- SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
- SDValue TGTAddr = AddI->getOperand(1);
- assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
- "Didn't find target global TLS address where we expected one");
- Ops.push_back(TGTAddr);
- CallOpc = PPCISD::CALL_TLS;
- }
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // Direct calls in the ELFv2 ABI need the TOC register live into the call.
- if (Callee.getNode() && isELFv2ABI)
+ // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
+ // into the call.
+ if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
+ setUsesTOCBasePtr(DAG);
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+ }
return CallOpc;
}
SDValue
PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
- bool isTailCall, bool isVarArg,
+ bool isTailCall, bool isVarArg, bool IsPatchPoint,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
&RegsToPass,
SDValue InFlag, SDValue Chain,
- SDValue &Callee,
+ SDValue CallSeqStart, SDValue &Callee,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
- bool isELFv2ABI = Subtarget.isELFv2ABI();
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
- isTailCall, RegsToPass, Ops, NodeTys,
- Subtarget);
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
+ SPDiff, isTailCall, IsPatchPoint, RegsToPass,
+ Ops, NodeTys, CS, Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
- if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
+ !IsPatchPoint) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
- DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_))
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
- } else if (CallOpc == PPCISD::CALL_TLS)
- // For 64-bit SVR4, TLS calls are always non-local.
- CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
bool &isTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
+ bool IsPatchPoint = CLI.IsPatchPoint;
+ ImmutableCallSite *CS = CLI.CS;
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
- if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ if (!isTailCall && CS && CS->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
if (Subtarget.isSVR4ABI()) {
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
}
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ isTailCall, IsPatchPoint, Outs, OutVals, Ins,
+ dl, DAG, InVals, CS);
}
SDValue
PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
*DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
PtrByteSize);
if (isVarArg) {
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
// Copy an argument into memory, being careful to do this outside the
SDValue
PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+ assert(!(CallConv == CallingConv::Fast && isVarArg) &&
+ "fastcc not supported on varargs functions");
+
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
// area is 32 bytes reserved space for [SP][CR][LR][TOC].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
- isELFv2ABI);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ // When using the fast calling convention, we don't provide backing for
+ // arguments that will be in registers.
+ unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
+ if (CallConv == CallingConv::Fast) {
+ if (Flags.isByVal())
+ NumGPRsUsed += (Flags.getByValSize()+7)/8;
+ else
+ switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (++NumGPRsUsed <= NumGPRs)
+ continue;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ break;
+ }
+ }
+
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
unsigned ArgOffset = LinkageSize;
- unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
-
- static const MCPhysReg GPR[] = {
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10,
- };
- static const MCPhysReg *FPR = GetFPR();
-
- static const MCPhysReg VR[] = {
- PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
- PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
- };
- static const MCPhysReg VSRH[] = {
- PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
- PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
- };
-
- const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
- const unsigned NumVRs = array_lengthof(VR);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
EVT ArgVT = Outs[i].VT;
EVT OrigVT = Outs[i].ArgVT;
- /* Respect alignment of argument on the stack. */
- unsigned Align =
- CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
- ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
-
- /* Compute GPR index associated with argument offset. */
- GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
- GPR_idx = std::min(GPR_idx, NumGPRs);
-
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
- PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ // We re-align the argument offset for each argument, except when using the
+ // fast calling convention, when we need to make sure we do that only when
+ // we'll actually use a stack slot.
+ auto ComputePtrOff = [&]() {
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ };
+
+ if (CallConv != CallingConv::Fast) {
+ ComputePtrOff();
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+ }
// Promote integers to 64-bit values.
if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
if (Size == 0)
continue;
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
MachinePointerInfo(), VT,
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
continue;
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
// passed directly. Clang may use those instead of "byval" aggregate
// types to avoid forcing arguments to memory unnecessarily.
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += PtrByteSize;
}
- ArgOffset += PtrByteSize;
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += PtrByteSize;
break;
case MVT::f32:
case MVT::f64: {
// then the parameter save area. For now, put all arguments to vararg
// routines always in both locations (FPR *and* GPR or stack slot).
bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+ bool NeededLoad = false;
// First load the argument into the next available FPR.
if (FPR_idx != NumFPRs)
// Next, load the argument into GPR or stack slot if needed.
if (!NeedGPROrStack)
;
- else if (GPR_idx != NumGPRs) {
+ else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
+ // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
+ // once we support fp <-> gpr moves.
+
// In the non-vararg case, this can only ever happen in the
// presence of f32 array types, since otherwise we never run
// out of FPRs before running out of GPRs.
ArgVal = SDValue();
if (ArgVal.getNode())
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
if (Arg.getValueType() == MVT::f32 &&
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
+
+ NeededLoad = true;
}
// When passing an array of floats, the array occupies consecutive
// space in the argument area; only round up to the next doubleword
// at the end of the array. Otherwise, each float takes 8 bytes.
- ArgOffset += (Arg.getValueType() == MVT::f32 &&
- Flags.isInConsecutiveRegs()) ? 4 : 8;
- if (Flags.isInConsecutiveRegsLast())
- ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ if (CallConv != CallingConv::Fast || NeededLoad) {
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
break;
}
case MVT::v4f32:
RegsToPass.push_back(std::make_pair(VReg, Arg));
} else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += 16;
}
- ArgOffset += 16;
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += 16;
break;
}
}
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
// pointers in the 64-bit SVR4 ABI.
- if (!isTailCall &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (!isTailCall && !IsPatchPoint &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
+ setUsesTOCBasePtr(DAG);
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(TOCSaveOffset),
false, false, 0);
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
- if (isELFv2ABI)
+ if (isELFv2ABI && !IsPatchPoint)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
SDValue
PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
+ bool isTailCall, bool IsPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ ImmutableCallSite *CS) const {
unsigned NumOps = Outs.size();
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
- false);
+ unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
// Add up all the space actually used.
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
CallConv == CallingConv::Fast)
- NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+ NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
// not mean the MTCTR instruction must use R12; it's easier to model this as
// an extra parameter, so do that.
if (!isTailCall &&
- !dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isFunctionGlobalAddress(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee) &&
!isBLACompatibleAddress(Callee, DAG))
RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
PPC::R12), Callee));
PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
- RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
- Ins, InVals);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+ RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
+ NumBytes, Ins, InVals, CS);
}
bool
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
// If the frame pointer save index hasn't been defined yet.
if (!RASI) {
// Find out what the fix offset of the frame pointer save area.
- int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
// Allocate the frame index for frame pointer save area.
RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
// Save the result.
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
// If the frame pointer save index hasn't been defined yet.
if (!FPSI) {
// Find out what the fix offset of the frame pointer save area.
- int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
- isDarwinABI);
-
+ int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
// Allocate the frame index for frame pointer save area.
FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
return Op;
}
-// FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- SDLoc dl) const {
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
- Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
- PPCISD::FCTIDZ),
- dl, MVT::f64, Src);
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
break;
case MVT::i64:
assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
if (Op.getValueType() == MVT::i32 && !i32Stack) {
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
- MPI = MachinePointerInfo();
+ MPI = MPI.getWithOffset(4);
}
- return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
- false, false, false, 0);
+ RLI.Chain = Chain;
+ RLI.Ptr = FIPtr;
+ RLI.MPI = MPI;
+}
+
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const {
+ ReuseLoadInfo RLI;
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+
+ return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+}
+
+// We're trying to insert a regular store, S, and then a load, L. If the
+// incoming value, O, is a load, we might just be able to have our load use the
+// address used by O. However, we don't know if anything else will store to
+// that address before we can load from it. To prevent this situation, we need
+// to insert our load, L, into the chain as a peer of O. To do this, we give L
+// the same chain operand as O, we create a token factor from the chain results
+// of O and L, and we replace all uses of O's chain result with that token
+// factor (see spliceIntoChain below for this last part).
+bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
+ ReuseLoadInfo &RLI,
+ SelectionDAG &DAG,
+ ISD::LoadExtType ET) const {
+ SDLoc dl(Op);
+ if (ET == ISD::NON_EXTLOAD &&
+ (Op.getOpcode() == ISD::FP_TO_UINT ||
+ Op.getOpcode() == ISD::FP_TO_SINT) &&
+ isOperationLegalOrCustom(Op.getOpcode(),
+ Op.getOperand(0).getValueType())) {
+
+ LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+ return true;
+ }
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
+ if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
+ LD->isNonTemporal())
+ return false;
+ if (LD->getMemoryVT() != MemVT)
+ return false;
+
+ RLI.Ptr = LD->getBasePtr();
+ if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
+ assert(LD->getAddressingMode() == ISD::PRE_INC &&
+ "Non-pre-inc AM on PPC?");
+ RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
+ LD->getOffset());
+ }
+
+ RLI.Chain = LD->getChain();
+ RLI.MPI = LD->getPointerInfo();
+ RLI.IsInvariant = LD->isInvariant();
+ RLI.Alignment = LD->getAlignment();
+ RLI.AAInfo = LD->getAAInfo();
+ RLI.Ranges = LD->getRanges();
+
+ RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
+ return true;
+}
+
+// Given the head of the old chain, ResChain, insert a token factor containing
+// it and NewResChain, and make users of ResChain now be users of that token
+// factor.
+void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
+ SDValue NewResChain,
+ SelectionDAG &DAG) const {
+ if (!ResChain)
+ return;
+
+ SDLoc dl(NewResChain);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ NewResChain, DAG.getUNDEF(MVT::Other));
+ assert(TF.getNode() != NewResChain.getNode() &&
+ "A new TF really is required here");
+
+ DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
+ DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDUS : PPCISD::FCFIDS) :
- (Op.getOpcode() == ISD::UINT_TO_FP ?
- PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
- MVT::f32 : MVT::f64;
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
SDValue SINT = Op.getOperand(0);
SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
}
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+ ReuseLoadInfo RLI;
+ SDValue Bits;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
+ Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+ false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+ RLI.Ranges);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasLFIWAX() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (Subtarget.hasFPCVT() &&
+ canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+ } else if (((Subtarget.hasLFIWAX() &&
+ SINT.getOpcode() == ISD::SIGN_EXTEND) ||
+ (Subtarget.hasFPCVT() &&
+ SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
+ SINT.getOperand(0).getValueType() == MVT::i32) {
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.Alignment = 4;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
+ Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ } else
+ Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
SDValue Ld;
if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
- int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
- SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ ReuseLoadInfo RLI;
+ bool ReusingLoad;
+ if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
+ DAG))) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+
+ RLI.Ptr = FIdx;
+ RLI.Chain = Store;
+ RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.Alignment = 4;
+ }
- assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
- "Expected an i32 store");
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad, 4, 4);
- SDValue Ops[] = { Store, FIdx };
+ MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+ RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+ SDValue Ops[] = { RLI.Chain, RLI.Ptr };
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
+ if (ReusingLoad)
+ spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
} else {
assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
- SDLoc(Op));
+ SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
- const TargetMachine &TM = getTargetMachine();
SDLoc dl(N);
switch (N->getOpcode()) {
default:
break;
}
case ISD::VAARG: {
- if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
- || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
return;
EVT VT = N->getValueType(0);
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
bool is64bit, unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
bool is8bit, // operation
unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned BufReg = MI->getOperand(1).getReg();
if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
MIB = BuildMI(*thisMBB, MI, DL,
TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
- .addReg(BaseReg)
- .addImm(BPOffset)
- .addReg(BufReg);
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- const PPCRegisterInfo *TRI =
- getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo();
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
// mainMBB:
// mainDstReg = 0
- MIB = BuildMI(mainMBB, DL,
- TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ MIB =
+ BuildMI(mainMBB, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
if (Subtarget.isPPC64()) {
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
- unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
- (Subtarget.isSVR4ABI() &&
- MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
- PPC::R29 : PPC::R30);
+ unsigned BP =
+ (PVT == MVT::i64)
+ ? PPC::X30
+ : (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_
+ ? PPC::R29
+ : PPC::R30);
MachineInstrBuilder MIB;
// Reload TOC
if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+ setUsesTOCBasePtr(*MBB->getParent());
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
+ MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ // Call lowering should have added an r2 operand to indicate a dependence
+ // on the TOC base pointer value. It can't however, because there is no
+ // way to mark the dependence as implicit there, and so the stackmap code
+ // will confuse it with a regular operand. Instead, add the dependence
+ // here.
+ setUsesTOCBasePtr(*BB->getParent());
+ MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
+ }
+
+ return emitPatchPoint(MI, BB);
+ }
+
if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
return emitEHSjLjSetJmp(MI, BB);
return emitEHSjLjLongJmp(MI, BB);
}
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
MachineFunction *F = BB->getParent();
if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_I4 ||
- MI->getOpcode() == PPC::SELECT_I8)) {
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8)
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- const TargetInstrInfo *TII =
- getTargetMachine().getSubtargetImpl()->getInstrInfo();
TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
Cond, MI->getOperand(2).getReg(),
MI->getOperand(3).getReg());
BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
// Restore FPSCR value.
- BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
} else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
bool &UseOneConstNR) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Convergence is quadratic, so we essentially double the number of digits
unsigned &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
- (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Convergence is quadratic, so we essentially double the number of digits
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- assert(Subtarget.useCRBits() &&
- "Expecting to be tracking CR bits");
+ assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
// If we're tracking CR bits, we need to be careful that we don't have:
// trunc(binary-ops(zext(x), zext(y)))
// or
N->getValueType(0) != MVT::i64)
return SDValue();
- if (!((N->getOperand(0).getValueType() == MVT::i1 &&
- Subtarget.useCRBits()) ||
- (N->getOperand(0).getValueType() == MVT::i32 &&
- Subtarget.isPPC64())))
+ if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
return SDValue();
if (N->getOperand(0).getOpcode() != ISD::AND &&
N->getOperand(0), ShiftCst), ShiftCst);
}
+SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert((N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::UINT_TO_FP) &&
+ "Need an int -> FP conversion node here");
+
+ if (!Subtarget.has64BitSupport())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue Op(N, 0);
+
+ // Don't handle ppc_fp128 here or i1 conversions.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return SDValue();
+
+ // For i32 intermediate values, unfortunately, the conversion functions
+ // leave the upper 32 bits of the value are undefined. Within the set of
+ // scalar instructions, we have no method for zero- or sign-extending the
+ // value. Thus, we cannot handle i32 intermediate values here.
+ if (Op.getOperand(0).getValueType() == MVT::i32)
+ return SDValue();
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
+ : PPCISD::FCFIDS)
+ : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
+ : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
+ ? MVT::f32
+ : MVT::f64;
+
+ // If we're converting from a float, to an int, and back to a float again,
+ // then we don't need the store/load pair at all.
+ if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
+ Subtarget.hasFPCVT()) ||
+ (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
+ SDValue Src = Op.getOperand(0).getOperand(0);
+ if (Src.getValueType() == MVT::f32) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+ DCI.AddToWorklist(Src.getNode());
+ }
+
+ unsigned FCTOp =
+ Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ;
+
+ SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(FP.getNode());
+ }
+
+ return FP;
+ }
+
+ return SDValue();
+}
+
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
// builtins) into loads with swaps.
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
- const TargetMachine &TM = getTargetMachine();
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
switch (N->getOpcode()) {
case ISD::SELECT_CC:
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
- if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
- // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
- // We allow the src/dst to be either f32/f64, but the intermediate
- // type must be i64.
- if (N->getOperand(0).getValueType() == MVT::i64 &&
- N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
- SDValue Val = N->getOperand(0).getOperand(0);
- if (Val.getValueType() == MVT::f32) {
- Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- }
-
- Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- if (N->getValueType(0) == MVT::f32) {
- Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
- DAG.getIntPtrConstant(0));
- DCI.AddToWorklist(Val.getNode());
- }
- return Val;
- } else if (N->getOperand(0).getValueType() == MVT::i32) {
- // If the intermediate type is i32, we can avoid the load/store here
- // too.
- }
- }
- }
- break;
+ case ISD::UINT_TO_FP:
+ return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
// Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
- if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
- !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
N->getOperand(1).getValueType() == MVT::i32 &&
N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i16 ||
- (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
- TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
N->getOperand(1).getValueType() == MVT::i64))) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
// Do an any-extend to 32-bits if this is a half-word input.
EVT VT = N->getOperand(1).getValueType();
if (VT.isSimple()) {
MVT StoreVT = VT.getSimpleVT();
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
return expandVSXStoreForLE(N, DCI);
// For little endian, VSX loads require generating lxvd2x/xxswapd.
if (VT.isSimple()) {
MVT LoadVT = VT.getSimpleVT();
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian() &&
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
return expandVSXLoadForLE(N, DCI);
Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
- if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
- TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() &&
// P8 and later hardware should just use LOAD.
- !TM.getSubtarget<PPCSubtarget>().hasP8Vector() &&
- (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN: {
- bool isLittleEndian = Subtarget.isLittleEndian();
- Intrinsic::ID Intr = (isLittleEndian ?
- Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl);
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
- N->getOperand(1)->getOpcode() == ISD::ADD) {
- SDValue Add = N->getOperand(1);
-
- if (DAG.MaskedValueIsZero(Add->getOperand(1),
- APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
- Add.getValueType().getScalarType().getSizeInBits()))) {
- SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intr) {
- // We've found another LVSL/LVSR, and this address is an aligned
- // multiple of that one. The results will be the same, so use the
- // one we've just found instead.
-
- return SDValue(*UI, 0);
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
+ : Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ SDValue Add = N->getOperand(1);
+
+ if (DAG.MaskedValueIsZero(
+ Add->getOperand(1),
+ APInt::getAllOnesValue(4 /* 16 byte alignment */)
+ .zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
+ UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
}
}
}
}
- }
break;
case ISD::INTRINSIC_W_CHAIN: {
// For little endian, VSX loads require generating lxvd2x/xxswapd.
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
}
case ISD::INTRINSIC_VOID: {
// For little endian, VSX stores require generating xxswapd/stxvd2x.
- if (TM.getSubtarget<PPCSubtarget>().hasVSX() &&
- TM.getSubtarget<PPCSubtarget>().isLittleEndian()) {
+ if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default:
break;
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
- (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
- TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
N->getValueType(0) == MVT::i64))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
}
}
+unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+ switch (Subtarget.getDarwinDirective()) {
+ default: break;
+ case PPC::DIR_970:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8: {
+ if (!ML)
+ break;
+
+ const PPCInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // For small loops (between 5 and 8 instructions), align to a 32-byte
+ // boundary so that the entire loop fits in one instruction-cache line.
+ uint64_t LoopSize = 0;
+ for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
+ for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+ LoopSize += TII->GetInstSizeInBytes(J);
+
+ if (LoopSize > 16 && LoopSize <= 32)
+ return 5;
+
+ break;
+ }
+ }
+
+ return TargetLowering::getPrefLoopAlignment(ML);
+}
/// getConstraintType - Given a constraint, return the type of
/// constraint it is for this target.
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = Subtarget.isPPC64();
- bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
-
- DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
- isPPC64? MVT::i64 : MVT::i32);
+ DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(),
+ isPPC64 ? MVT::i64 : MVT::i32);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
bool is64Bit = isPPC64 && VT == MVT::i64;
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
- .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+ .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
.Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
(is64Bit ? PPC::X13 : PPC::R13))
.Default(0);
return NumBits1 == 64 && NumBits2 == 32;
}
+bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ // Generally speaking, zexts are not free, but they are free when they can be
+ // folded with other operations.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
+ EVT MemVT = LD->getMemoryVT();
+ if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
+ (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
+ (LD->getExtensionType() == ISD::NON_EXTLOAD ||
+ LD->getExtensionType() == ISD::ZEXTLOAD))
+ return true;
+ }
+
+ // FIXME: Add other cases...
+ // - 32-bit shifts with a zext to i64
+ // - zext after ctlz, bswap, etc.
+ // - zext after and by a constant mask
+
+ return TargetLowering::isZExtFree(Val, VT2);
+}
+
+bool PPCTargetLowering::isFPExtFree(EVT VT) const {
+ assert(VT.isFloatingPoint());
+ return true;
+}
+
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<16>(Imm) || isUInt<16>(Imm);
}
return false;
}
+const MCPhysReg *
+PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
+ // LR is a callee-save register, but we must treat it as clobbered by any call
+ // site. Hence we include LR in the scratch registers, which are in turn added
+ // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
+ // to CTR, which is used by any indirect call.
+ static const MCPhysReg ScratchRegs[] = {
+ PPC::X12, PPC::LR8, PPC::CTR8, 0
+ };
+
+ return ScratchRegs;
+}
+
bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {