#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
static cl::opt<bool>
-EnableARMFastISel("arm-fast-isel",
- cl::desc("Turn on experimental ARM fast-isel support"),
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
cl::init(false), cl::Hidden);
namespace {
// Instruction selection routines.
private:
- virtual bool SelectLoad(const Instruction *I);
- virtual bool SelectStore(const Instruction *I);
- virtual bool SelectBranch(const Instruction *I);
- virtual bool SelectCmp(const Instruction *I);
- virtual bool SelectFPExt(const Instruction *I);
- virtual bool SelectFPTrunc(const Instruction *I);
- virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
- virtual bool SelectSIToFP(const Instruction *I);
- virtual bool SelectFPToSI(const Instruction *I);
- virtual bool SelectSDiv(const Instruction *I);
- virtual bool SelectSRem(const Instruction *I);
- virtual bool SelectCall(const Instruction *I);
- virtual bool SelectSelect(const Instruction *I);
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
// Utility routines.
private:
bool isTypeLegal(const Type *Ty, EVT &VT);
bool isLoadTypeLegal(const Type *Ty, EVT &VT);
- bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
- bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
- bool ARMLoadAlloca(const Instruction *I, EVT VT);
- bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT);
- bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Base, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Base, int Offset);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Base, int &Offset);
+ void ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT);
unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
unsigned ARMMaterializeInt(const Constant *C, EVT VT);
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
// Call handling routines.
private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
SmallVectorImpl<unsigned> &ArgRegs,
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::LDRcp), DestReg)
.addConstantPoolIndex(Idx)
- .addReg(0).addImm(0));
+ .addImm(0));
return DestReg;
}
if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
EVT VT;
- if (!isTypeLegal(AI->getType(), VT)) return false;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
}
// Computes the Reg+Offset to get to an object.
-bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Base,
int &Offset) {
// Some boilerplate from the X86 FastISel.
const User *U = NULL;
switch (Opcode) {
default:
break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ int SavedOffset = Offset;
+ unsigned SavedBase = Base;
+ int TmpOffset = Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else
+ goto unsupported_gep;
+ } while (!Worklist.empty());
+ }
+ }
+
+ // Try to grab the base operand now.
+ Offset = TmpOffset;
+ if (ARMComputeRegOffset(U->getOperand(0), Base, Offset)) return true;
+
+ // We failed, restore everything and try the other options.
+ Offset = SavedOffset;
+ Base = SavedBase;
+
+ unsupported_gep:
+ break;
+ }
case Instruction::Alloca: {
- assert(false && "Alloca should have been handled earlier!");
- return false;
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ unsigned Reg = TargetMaterializeAlloca(AI);
+
+ if (Reg == 0) return false;
+
+ Base = Reg;
+ return true;
}
}
- // FIXME: Handle global variables.
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
- (void)GV;
- return false;
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Base = Tmp;
+ return true;
}
// Try to get this in a register if nothing else has worked.
- Reg = getRegForValue(Obj);
- if (Reg == 0) return false;
+ if (Base == 0) Base = getRegForValue(Obj);
+ return Base != 0;
+}
- // Since the offset may be too large for the load instruction
+void ARMFastISel::ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Offset & 0xfff) != Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Offset & 0xff) != Offset);
+ break;
+ }
+
+ // Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
- // TODO: Verify the additions work, otherwise we'll need to add the
- // offset instead of 0 to the instructions and do all sorts of operand
- // munging.
- // TODO: Optimize this somewhat.
- if (Offset != 0) {
+ if (needsLowering) {
ARMCC::CondCodes Pred = ARMCC::AL;
unsigned PredReg = 0;
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned BaseReg = createResultReg(RC);
+
if (!isThumb)
emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- Reg, Reg, Offset, Pred, PredReg,
+ BaseReg, Base, Offset, Pred, PredReg,
static_cast<const ARMBaseInstrInfo&>(TII));
else {
assert(AFI->isThumb2Function());
emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- Reg, Reg, Offset, Pred, PredReg,
+ BaseReg, Base, Offset, Pred, PredReg,
static_cast<const ARMBaseInstrInfo&>(TII));
}
+ Offset = 0;
+ Base = BaseReg;
}
- return true;
-}
-
-bool ARMFastISel::ARMLoadAlloca(const Instruction *I, EVT VT) {
- Value *Op0 = I->getOperand(0);
-
- // Promote load/store types.
- if (VT == MVT::i8 || VT == MVT::i16) VT = MVT::i32;
-
- // Verify it's an alloca.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
-
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(VT);
- unsigned ResultReg = createResultReg(RC);
- TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
- ResultReg, SI->second, RC,
- TM.getRegisterInfo());
- UpdateValueMap(I, ResultReg);
- return true;
- }
- }
- return false;
}
bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
- unsigned Reg, int Offset) {
+ unsigned Base, int Offset) {
assert(VT.isSimple() && "Non-simple types are invalid here!");
unsigned Opc;
// This is mostly going to be Neon/vector support.
return false;
case MVT::i16:
- Opc = isThumb ? ARM::t2LDRHi8 : ARM::LDRH;
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
RC = ARM::GPRRegisterClass;
- VT = MVT::i32;
break;
case MVT::i8:
- Opc = isThumb ? ARM::t2LDRBi8 : ARM::LDRB;
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
RC = ARM::GPRRegisterClass;
- VT = MVT::i32;
break;
case MVT::i32:
- Opc = isThumb ? ARM::t2LDRi8 : ARM::LDR;
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
RC = ARM::GPRRegisterClass;
break;
case MVT::f32:
ResultReg = createResultReg(RC);
- // For now with the additions above the offset should be zero - thus we
- // can always fit into an i8.
- assert(Offset == 0 && "Offset not zero!");
+ ARMSimplifyRegOffset(Base, Offset, VT);
- // The thumb and floating point instructions both take 2 operands, ARM takes
- // another register.
- if (isFloat || isThumb)
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg)
- .addReg(Reg).addImm(Offset));
- else
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(Opc), ResultReg)
- .addReg(Reg).addReg(0).addImm(Offset));
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (isFloat)
+ Offset /= 4;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Base).addImm(Offset));
return true;
}
if (!isLoadTypeLegal(I->getType(), VT))
return false;
- // If we're an alloca we know we have a frame index and can emit the load
- // directly in short order.
- if (ARMLoadAlloca(I, VT))
- return true;
-
// Our register and offset with innocuous defaults.
- unsigned Reg = 0;
+ unsigned Base = 0;
int Offset = 0;
// See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ if (!ARMComputeRegOffset(I->getOperand(0), Base, Offset))
return false;
unsigned ResultReg;
- if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
+ if (!ARMEmitLoad(VT, ResultReg, Base, Offset)) return false;
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT){
- Value *Op1 = I->getOperand(1);
-
- // Promote load/store types.
- if (VT == MVT::i8 || VT == MVT::i16) VT = MVT::i32;
-
- // Verify it's an alloca.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
-
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(VT);
- assert(SrcReg != 0 && "Nothing to store!");
- TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
- SrcReg, true /*isKill*/, SI->second, RC,
- TM.getRegisterInfo());
- return true;
- }
- }
- return false;
-}
-
bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
- unsigned DstReg, int Offset) {
+ unsigned Base, int Offset) {
unsigned StrOpc;
bool isFloat = false;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
- case MVT::i8: StrOpc = isThumb ? ARM::t2STRBi8 : ARM::STRB; break;
- case MVT::i16: StrOpc = isThumb ? ARM::t2STRHi8 : ARM::STRH; break;
- case MVT::i32: StrOpc = isThumb ? ARM::t2STRi8 : ARM::STR; break;
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRB;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STR;
+ break;
case MVT::f32:
if (!Subtarget->hasVFP2()) return false;
StrOpc = ARM::VSTRS;
break;
}
+ ARMSimplifyRegOffset(Base, Offset, VT);
+
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (isFloat)
+ Offset /= 4;
+
// The thumb addressing mode has operands swapped from the arm addressing
// mode, the floating point one only has two operands.
if (isFloat || isThumb)
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
- .addReg(SrcReg).addReg(DstReg).addImm(Offset));
+ .addReg(SrcReg).addReg(Base).addImm(Offset));
else
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
- .addReg(SrcReg).addReg(DstReg).addReg(0).addImm(Offset));
+ .addReg(SrcReg).addReg(Base).addReg(0).addImm(Offset));
return true;
}
if (SrcReg == 0)
return false;
- // If we're an alloca we know we have a frame index and can emit the store
- // quickly.
- if (ARMStoreAlloca(I, SrcReg, VT))
- return true;
-
// Our register and offset with innocuous defaults.
- unsigned Reg = 0;
+ unsigned Base = 0;
int Offset = 0;
// See if we can handle this as Reg + Offset
- if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
+ if (!ARMComputeRegOffset(I->getOperand(1), Base, Offset))
return false;
- if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
+ if (!ARMEmitStore(VT, SrcReg, Base, Offset)) return false;
return true;
}
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
-
+
return ARMEmitLibcall(I, LC);
}
// Call Handling Code
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
// This is largely taken directly from CCAssignFnForNode - we don't support
// varargs in FastISel so that part has been removed.
// TODO: We may not support all of this.
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::C:
case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() &&
// Issue CALLSEQ_START
unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
- .addImm(NumBytes);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
unsigned Arg = ArgRegs[VA.getValNo()];
EVT ArgVT = ArgVTs[VA.getValNo()];
+ // We don't handle NEON parameters yet.
+ if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() > 64)
+ return false;
+
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
case CCValAssign::Full: break;
- default:
- // TODO: Handle arg promotion.
- return false;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(),
+ VA.getLocVT().getSimpleVT(),
+ ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
}
// Now copy/store arg to correct locations.
- if (VA.isRegLoc()) {
+ if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
VA.getLocReg())
.addReg(Arg);
RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
} else {
- // Need to store
- return false;
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ unsigned Base = ARM::SP;
+ int Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Base, Offset)) return false;
}
}
-
return true;
}
unsigned &NumBytes) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
// Now the return value.
if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) {
// For this move we copy into two registers and then move into the
// double fp reg we want.
- // TODO: Are the copies necessary?
- TargetRegisterClass *CopyRC = TLI.getRegClassFor(MVT::i32);
- unsigned Copy1 = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- Copy1).addReg(RVLocs[0].getLocReg());
- UsedRegs.push_back(RVLocs[0].getLocReg());
-
- unsigned Copy2 = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
- Copy2).addReg(RVLocs[1].getLocReg());
- UsedRegs.push_back(RVLocs[1].getLocReg());
-
EVT DestVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
unsigned ResultReg = createResultReg(DstRC);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVDRR), ResultReg)
- .addReg(Copy1).addReg(Copy2));
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
// Finally update the result.
UpdateValueMap(I, ResultReg);
} else {
- assert(RVLocs.size() == 1 && "Can't handle non-double multi-reg retvals!");
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
return true;
}
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
// A quick function that will emit a call for a named libcall in F with the
// vector of passed arguments for the Instruction in I. We can assume that we
// can emit a call for any libcall we can produce. This is an abridged version
// Check the calling convention.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
+
// TODO: Avoid some calling conventions?
- if (CC != CallingConv::C) {
- // errs() << "Can't handle calling convention: " << CC << "\n";
- return false;
- }
// Let SDISel handle vararg functions.
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
return SelectCall(I);
case Instruction::Select:
return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
default: break;
}
return false;
// Completely untested on non-darwin.
const TargetMachine &TM = funcInfo.MF->getTarget();
const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
- if (Subtarget->isTargetDarwin() && EnableARMFastISel)
+ if (Subtarget->isTargetDarwin() && !DisableARMFastISel)
return new ARMFastISel(funcInfo);
return 0;
}