#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
-#include "PPCPredicates.h"
#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
-cl::desc("enable preincrement load/store generation on PPC (experimental)"),
- cl::Hidden);
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
setPow2DivIsCheap();
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+ bool isPPC64 = Subtarget->isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
- addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
- addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
- addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
// from FP_ROUND: that rounds to nearest, this rounds to zero.
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+ // We do not currently implement these libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
// PowerPC has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+ if (!Subtarget->hasFSQRT()) {
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
}
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
// PowerPC does not have ROTR
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
// TRAMPOLINE is custom lowered.
- setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- // VAARG is custom lowered with the 32-bit SVR4 ABI.
- if ( TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
- && !TM.getSubtarget<PPCSubtarget>().isPPC64())
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- else
+ if (Subtarget->isSVR4ABI()) {
+ if (isPPC64) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ }
+ } else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
// Use the default implementation.
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (Subtarget->has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
- if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+ if (Subtarget->use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
- addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// 64-bit PowerPC wants to expand i128 shifts itself.
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+ if (Subtarget->hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
}
// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
- addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
- addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
+ if (Subtarget->has64BitSupport()) {
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
setExceptionPointerRegister(PPC::X3);
setExceptionSelectorRegister(PPC::X4);
setTargetDAGCombine(ISD::BSWAP);
// Darwin long double math library functions have $LDBL128 appended.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+ if (Subtarget->isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
+ setMinFunctionAlignment(2);
+ if (PPCSubTarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ if (isPPC64 && Subtarget->isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
+ setInsertFencesForAtomic(true);
+
+ setSchedulingPreference(Sched::Hybrid);
+
computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ maxStoresPerMemset = 32;
+ maxStoresPerMemsetOptSize = 16;
+ maxStoresPerMemcpy = 32;
+ maxStoresPerMemcpyOptSize = 8;
+ maxStoresPerMemmove = 32;
+ maxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ benefitFromCodePlacementOpt = true;
+ }
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
if (TM.getSubtarget<PPCSubtarget>().isDarwin())
return 4;
- // FIXME SVR4 TBD
+
+ // 16byte and wider vectors are passed on 16byte boundary.
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+ if (VTy->getBitWidth() >= 128)
+ return 16;
+
+ // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+ if (PPCSubTarget.isPPC64())
+ return 8;
+
return 4;
}
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
+ case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
case PPCISD::NOP: return "PPCISD::NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::MTFSF: return "PPCISD::MTFSF";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
}
}
-MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i32;
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
- if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
- return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
- else
- return 2;
-}
-
//===----------------------------------------------------------------------===//
// Node matching predicates, for use by the tblgen matching code.
//===----------------------------------------------------------------------===//
}
// Properly sign extend the value.
- int ShAmt = (4-ByteSize)*8;
- int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
if (MaskVal == 0) return SDValue();
// Finally, if this value fits in a 5 bit sext field, return it
- if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+ if (SignExtend32<5>(MaskVal) == MaskVal)
return DAG.getTargetConstant(MaskVal, MVT::i32);
return SDValue();
}
APInt LHSKnownZero, LHSKnownOne;
APInt RHSKnownZero, RHSKnownOne;
DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
LHSKnownZero, LHSKnownOne);
if (LHSKnownZero.getBoolValue()) {
DAG.ComputeMaskedBits(N.getOperand(1),
- APInt::getAllOnesValue(N.getOperand(1)
- .getValueSizeInBits()),
RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
Disp.getOpcode() == ISD::TargetConstantPool ||
Disp.getOpcode() == ISD::TargetJumpTable);
Base = N.getOperand(0);
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
- LHSKnownZero, LHSKnownOne);
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+ Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
} else {
return true; // [r+i]
} else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
// Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
&& "Cannot handle constant offsets yet!");
Disp = N.getOperand(1).getOperand(0); // The global address.
assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- APInt::getAllOnesValue(N.getOperand(0)
- .getValueSizeInBits()),
- LHSKnownZero, LHSKnownOne);
+ DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
return true;
}
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
- // Disabled by default for now.
- if (!EnablePPCPreinc) return false;
+ if (DisablePPCPreinc) return false;
SDValue Ptr;
EVT VT;
if (VT.isVector())
return false;
- // TODO: Check reg+reg first.
+ if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
// LDU/STU use reg+imm*4, others use reg+imm.
if (VT != MVT::i64) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue CPIHi =
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
unsigned MOHiFlag, MOLoFlag;
bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
- SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
- SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+ SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+ SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ DebugLoc dl = GA->getDebugLoc();
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+ bool is64bit = PPCSubTarget.isPPC64();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL16_LO);
+
+ if (model != TLSModel::LocalExec)
+ llvm_unreachable("only local-exec TLS mode supported");
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+}
+
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// extra load to get the address of the global.
if (MOHiFlag & PPCII::MO_NLP_FLAG)
Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
return Ptr;
}
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
- llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
- return SDValue(); // Not reached
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ false, false, false, 0);
}
-SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- const Type *IntPtrTy =
- DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+ Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
*DAG.getContext());
TargetLowering::ArgListTy Args;
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- std::pair<SDValue, SDValue> CallResult =
- LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
- /*isReturnValueUsed=*/true,
+ TargetLowering::CallLoweringInfo CLI(Chain,
+ Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C,
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false,
+ /*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
- SDValue Ops[] =
- { CallResult.first, CallResult.second };
-
- return DAG.getMergeValues(Ops, 2, dl);
+ return CallResult.second;
}
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const unsigned *GetFPR() {
- static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+ static const uint16_t FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
- return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
+ if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isPPC64())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
} else {
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
}
SDValue
-PPCTargetLowering::LowerFormalArguments_SVR4(
+PPCTargetLowering::LowerFormalArguments_32SVR4(
SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = 4;
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
// Arguments stored in registers.
if (VA.isRegLoc()) {
- TargetRegisterClass *RC;
+ const TargetRegisterClass *RC;
EVT ValVT = VA.getValVT();
switch (ValVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("ValVT not supported by formal arguments Lowering");
case MVT::i32:
- RC = PPC::GPRCRegisterClass;
+ RC = &PPC::GPRCRegClass;
break;
case MVT::f32:
- RC = PPC::F4RCRegisterClass;
+ RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- RC = PPC::F8RCRegisterClass;
+ RC = &PPC::F8RCRegClass;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v4f32:
- RC = PPC::VRRCRegisterClass;
+ RC = &PPC::VRRCRegClass;
break;
}
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
MachinePointerInfo(),
- false, false, 0));
+ false, false, false, 0));
}
}
// Aggregates passed by value are stored in the local variable space of the
// caller's stack frame, right above the parameter list area.
SmallVector<CCValAssign, 16> ByValArgLocs;
- CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(),
- ByValArgLocs, *DAG.getContext());
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const unsigned GPArgRegs[] = {
+ static const uint16_t GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const unsigned FPArgRegs[] = {
+ static const uint16_t FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
return Chain;
}
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 8;
+
+ unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const uint16_t GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const uint16_t *FPR = GetFPR();
+
+ static const uint16_t VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof(VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else
+ nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg,
+ CurArgOffset),
+ ObjType, false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Shifted = Val;
+
+ // For 64-bit SVR4, small structs come in right-adjusted.
+ // Shift them left so the following logic works as expected.
+ if (ObjSize < 8) {
+ SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT);
+ Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt);
+ }
+
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN,
+ MachinePointerInfo(FuncArg, ArgOffset),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32) {
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 8 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ ArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ // Add the Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+ MinReservedArea =
+ std::max(MinReservedArea,
+ PPCFrameLowering::getMinCallFrameSize(true, true));
+ unsigned TargetAlign
+ = DAG.getMachineFunction().getTarget().getFrameLowering()->
+ getStackAlignment();
+ unsigned AlignMask = TargetAlign-1;
+ MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+ FI->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl,
+ MVT::Other, &MemOps[0], MemOps.size());
+
+ return Chain;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Chain,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = PtrVT == MVT::i64;
// Potential tail calls could cause overwriting of argument stack slots.
- bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
- unsigned ObjSize = ObjectVT.getSizeInBits()/8;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
if (Flags.isByVal()) {
// ObjSize is the true size, ArgSize rounded up to multiple of regs.
- ObjSize = Flags.getByValSize();
+ unsigned ObjSize = Flags.getByValSize();
unsigned ArgSize =
((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
VecArgOffset += ArgSize;
default: llvm_unreachable("Unhandled argument type!");
case MVT::i32:
case MVT::f32:
- VecArgOffset += isPPC64 ? 8 : 4;
+ VecArgOffset += 4;
break;
case MVT::i64: // PPC64
case MVT::f64:
+ // FIXME: We are guaranteed to be !isPPC64 at this point.
+ // Does MVT::i64 apply?
VecArgOffset += 8;
break;
case MVT::v4f32:
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(),
- ObjSize==1 ? MVT::i8 : MVT::i16,
- false, false, 0);
+ MachinePointerInfo(FuncArg,
+ CurArgOffset),
+ ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
}
for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
// Store whatever pieces of the object are in registers
- // to memory. ArgVal will be address of the beginning of
- // the object.
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(),
+ MachinePointerInfo(FuncArg, ArgOffset),
false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
InVals.push_back(ArgVal);
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area for the Darwin ABI.
+/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
+/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
static unsigned
CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
bool isPPC64,
PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
// Tail call needs the stack to be aligned.
- if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
+ if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+ unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+ getFrameLowering()->getStackAlignment();
unsigned AlignMask = TargetAlign-1;
NumBytes = (NumBytes + AlignMask) & ~AlignMask;
}
}
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
-/// adjusted to accomodate the arguments for the tailcall.
+/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
unsigned ParamSize) {
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- if (!GuaranteedTailCallOpt)
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
// Variable argument functions are not supported.
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
- (Addr << 6 >> 6) != Addr)
+ SignExtend32<26>(Addr) != Addr)
return 0; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
Chain = SDValue(LROpOut.getNode(), 1);
// When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
Chain = SDValue(FPOpOut.getNode(), 1);
}
}
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
// might overwrite each other in case of tail call optimization.
SmallVector<SDValue, 8> MemOpChains2;
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- PPCSubTarget.getDarwinVers() < 9 &&
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- PPCSubTarget.getDarwinVers() < 9) {
+ (PPCSubTarget.getTargetTriple().isMacOSX() &&
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
// Thus for a call through a function pointer, the following actions need
// to be performed:
// 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin()).
+ // frame (this is done in LowerCall_Darwin_Or_64SVR4()).
// 2. Load the address of the function entry point from the function
// descriptor.
// 3. Load the TOC of the callee from the function descriptor into r2.
Callee.setNode(0);
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
- Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
}
// If this is a direct call, pass the chain and the callee.
return CallOpc;
}
+static
+bool isLocalCall(const SDValue &Callee)
+{
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return !G->getGlobal()->isDeclaration() &&
+ !G->getGlobal()->isWeakForLinker();
+ return false;
+}
+
SDValue
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
isTailCall, RegsToPass, Ops, NodeTys,
PPCSubTarget);
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
// When performing tail call optimization the callee pops its arguments off
// the stack. Account for this here so these bytes can be pushed back on in
// PPCRegisterInfo::eliminateCallFramePseudoInstr.
int BytesCalleePops =
- (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
// to the liveout set for the function.
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
for (unsigned i = 0; i != RVLocs.size(); ++i)
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
// Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
+
+ bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
if (CallOpc == PPCISD::BCTRL_SVR4) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
- InFlag = Chain.getValue(1);
- } else {
- // Otherwise insert NOP.
- InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+ needsTOCRestore = true;
+ } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) {
+ // Otherwise insert NOP for non-local calls.
+ CallOpc = PPCISD::CALL_NOP_SVR4;
}
}
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ if (needsTOCRestore) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag);
}
SDValue
-PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool &isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &dl = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
if (isTailCall)
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
- return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
- return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, OutVals, Ins,
- dl, DAG, InVals);
+ return LowerCall_Darwin_Or_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
}
SDValue
-PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg,
- bool isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
assert((CallConv == CallingConv::C ||
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
// Count how many bytes are to be pushed on the stack, including the linkage
// Assign locations to all of the outgoing arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
// Assign locations to all of the outgoing aggregate by value arguments.
SmallVector<CCValAssign, 16> ByValArgLocs;
- CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
- *DAG.getContext());
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
SmallVector<SDValue, 8> MemOpChains;
+ bool seenFloatArg = false;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, j = 0, e = ArgLocs.size();
i != e;
}
if (VA.isRegLoc()) {
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
// Put argument in a physical register.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
InFlag = Chain.getValue(1);
}
- // Set CR6 to true if this is a vararg call.
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
- Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+
InFlag = Chain.getValue(1);
}
}
SDValue
-PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// and restoring the callers stack pointer in this functions epilog. This is
// done because by tail calling the called function might overwrite the value
// in this function's (MF) stack pointer stack slot 0(SP).
- if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
unsigned nAltivecParamsAtEnd = 0;
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
}
// FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
if (Flags.isByVal()) {
+ // Note: Size includes alignment padding, so
+ // struct x { short a; char b; }
+ // will have Size = 4. With #pragma pack(1), it will have Size = 3.
+ // These are the proper values we need for right-justifying the
+ // aggregate in a parameter register for 64-bit SVR4.
unsigned Size = Flags.getByValSize();
- if (Size==1 || Size==2) {
- // Very small objects are passed right-justified.
- // Everything else is passed left-justified.
- EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ // FOR DARWIN ONLY: Very small objects are passed right-justified.
+ // Everything else is passed left-justified.
+ // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must
+ // be passed right-justified.
+ if (Size==1 || Size==2 ||
+ (Size==4 && isSVR4ABI)) {
+ EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
MachinePointerInfo(), VT,
// Copy entire object into memory. There are cases where gcc-generated
// code assumes it is there, even if it could be put entirely into
// registers. (This is not what the doc says.)
- SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
- CallSeqStart.getNode()->getOperand(0),
- Flags, DAG, dl);
- // This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
- DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
- Chain = CallSeqStart = NewCallSeqStart;
- // And copy the pieces of it that fit into registers.
+
+ // FIXME: The above statement is likely due to a misunderstanding of the
+ // documents. At least for 64-bit SVR4, all arguments must be copied
+ // into the parameter area BY THE CALLEE in the event that the callee
+ // takes the address of any formal argument. That has not yet been
+ // implemented. However, it is reasonable to use the stack area as a
+ // staging area for the register load.
+
+ // Skip this for small aggregates under 64-bit SVR4, as we will use
+ // the same slot for a right-justified copy, below.
+ if (Size >= 8 || !isSVR4ABI) {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+ }
+
+ // FOR 64-BIT SVR4: When a register is available, pass the
+ // aggregate right-justified.
+ if (isSVR4ABI && Size < 8 && GPR_idx != NumGPRs) {
+ // The easiest way to get this right-justified in a register
+ // is to copy the structure into the rightmost portion of a
+ // local variable slot, then load the whole slot into the
+ // register.
+ // FIXME: The memcpy seems to produce pretty awful code for
+ // small aggregates, particularly for packed ones.
+ // FIXME: It would be preferable to use the slot in the
+ // parameter save area instead of a new local variable.
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // Place the memcpy outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Load the slot into the register.
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ // Done with this argument.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
for (unsigned j=0; j<Size; j+=PtrByteSize) {
SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
ArgOffset += PtrByteSize;
// Float varargs are always shadowed in available integer registers
if (GPR_idx != NumGPRs) {
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false,
+ false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
if (VR_idx != NumVRs) {
SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
}
SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
DAG.getConstant(i, PtrVT));
SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
}
Ins, InVals);
}
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
// If this is the first return lowered for this function, add the regs to the
// Load the old link SP.
SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Restore the stack pointer.
Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
DAG.getConstant(4, FIPtr.getValueType()));
return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
Ops, 4, MVT::i64, MMO);
// Load the value as a double.
SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// FCFID it and return it.
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
SDValue Four = DAG.getConstant(4, PtrVT);
SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
// Transform as necessary
SDValue CWD1 =
unsigned TypeShiftAmt = i & (SplatBitSize-1);
// vsplti + shl self.
- if (SextVal == (i << (int)TypeShiftAmt)) {
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
static const unsigned IIDs[] = { // Intrinsic to use for each size.
Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
// perfect shuffle table to emit an optimal matching sequence.
- SmallVector<int, 16> PermMask;
- SVOp->getMask(PermMask);
+ ArrayRef<int> PermMask = SVOp->getMask();
unsigned PFIndexes[4];
bool isFourElementShuffle = true;
false, false, 0);
// Load it out.
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
- false, false, 0);
+ false, false, false, 0);
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC");
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG, PPCSubTarget);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
}
- return SDValue();
}
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
+ const TargetMachine &TM = getTargetMachine();
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
default:
- assert(false && "Do not know how to custom type legalize this operation!");
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::VAARG: {
+ if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
return;
+ }
case ISD::FP_ROUND_INREG: {
assert(N->getValueType(0) == MVT::ppcf128);
assert(N->getOperand(0).getValueType() == MVT::ppcf128);
.addReg(TmpReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
- BuildMI(BB, dl, TII->get(PPC::STWCX))
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
.addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
MachineFunction *F = BB->getParent();
- if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
- MI->getOpcode() == PPC::SELECT_CC_I8 ||
- MI->getOpcode() == PPC::SELECT_CC_F4 ||
- MI->getOpcode() == PPC::SELECT_CC_F8 ||
- MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+ if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)) {
+ unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
+ PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ DebugLoc dl = MI->getDebugLoc();
+
+ // The SelectPred is ((BI << 5) | BO) for a BCC
+ unsigned BO = SelectPred & 0xF;
+ assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel");
+
+ unsigned TrueOpNo, FalseOpNo;
+ if (BO == 12) {
+ TrueOpNo = 2;
+ FalseOpNo = 3;
+ } else {
+ TrueOpNo = 3;
+ FalseOpNo = 2;
+ SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred);
+ }
+
+ BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(TrueOpNo).getReg())
+ .addReg(MI->getOperand(FalseOpNo).getReg())
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
//===----------------------------------------------------------------------===//
void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- const APInt &Mask,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
- KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
case PPCISD::LBRX: {
// but allow it at the lowest weight.
if (CallOperandVal == NULL)
return CW_Default;
- const Type *type = CallOperandVal->getType();
+ Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
case 'b': // R1-R31
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
- return std::make_pair(0U, PPC::G8RCRegisterClass);
- return std::make_pair(0U, PPC::GPRCRegisterClass);
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
if (VT == MVT::f32)
- return std::make_pair(0U, PPC::F4RCRegisterClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, PPC::F8RCRegisterClass);
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
break;
case 'v':
- return std::make_pair(0U, PPC::VRRCRegisterClass);
+ return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
- return std::make_pair(0U, PPC::CRRCRegisterClass);
+ return std::make_pair(0U, &PPC::CRRCRegClass);
}
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
-void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0,0);
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
switch (Letter) {
default: break;
case 'I':
}
// Handle standard constraint letters.
- TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- const Type *Ty) const {
+ Type *Ty) const {
// FIXME: PPC does not allow r+i addressing modes for vectors!
// PPC allows a sign-extended 16-bit immediate field.
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
// PPC allows a sign-extended 16-bit immediate field.
return (V > -(1 << 16) && V < (1 << 16)-1);
}
-bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
return false;
}
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
- MachinePointerInfo(), false, false, 0);
+ MachinePointerInfo(), false, false, false, 0);
}
// Just load the return address off the stack.
SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- RetAddrFI, MachinePointerInfo(), false, false, 0);
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+ bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects()) &&
MFI->getStackSize() &&
- !MF.getFunction()->hasFnAttr(Attribute::Naked);
+ !MF.getFunction()->getFnAttributes().hasNakedAttr();
unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
(is31 ? PPC::R31 : PPC::R1);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
- FrameAddr, MachinePointerInfo(), false, false, 0);
+ FrameAddr, MachinePointerInfo(), false, false,
+ false, 0);
return FrameAddr;
}
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
/// target-independent logic.
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
- bool NonScalarIntSafe,
+ bool IsZeroVal,
bool MemcpyStrSrc,
MachineFunction &MF) const {
if (this->PPCSubTarget.isPPC64()) {
return MVT::i32;
}
}
+
+/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+/// is expanded to mul + add.
+bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f32:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+ if (DisableILPPref)
+ return TargetLowering::getSchedulingPreference(N);
+
+ return Sched::ILP;
+}
+