X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMFastISel.cpp;h=3eac44bc8d7098a08c87ac6277f4ac97069f15a0;hb=6b15639e267575a2c95f89d6b266e0fcd9231d91;hp=450a9bc605c53a5ffea1b26ef9c4b936d576b2fb;hpb=47650ece374315ce4ff5e483f6165ae37752f230;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 450a9bc605c..3eac44bc8d7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -33,7 +33,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -46,19 +48,14 @@ using namespace llvm; static cl::opt -EnableARMFastISel("arm-fast-isel", - cl::desc("Turn on experimental ARM fast-isel support"), +DisableARMFastISel("disable-arm-fast-isel", + cl::desc("Turn off experimental ARM fast-isel support"), cl::init(false), cl::Hidden); namespace { class ARMFastISel : public FastISel { - typedef struct AddrBase { - unsigned Reg; - unsigned FrameIndex; - } AddrBase; - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; @@ -122,28 +119,29 @@ class ARMFastISel : public FastISel { // Instruction selection routines. private: - virtual bool SelectLoad(const Instruction *I); - virtual bool SelectStore(const Instruction *I); - virtual bool SelectBranch(const Instruction *I); - virtual bool SelectCmp(const Instruction *I); - virtual bool SelectFPExt(const Instruction *I); - virtual bool SelectFPTrunc(const Instruction *I); - virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); - virtual bool SelectSIToFP(const Instruction *I); - virtual bool SelectFPToSI(const Instruction *I); - virtual bool SelectSDiv(const Instruction *I); - virtual bool SelectSRem(const Instruction *I); - virtual bool SelectCall(const Instruction *I); - virtual bool SelectSelect(const Instruction *I); + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); + bool SelectBranch(const Instruction *I); + bool SelectCmp(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); + bool SelectSIToFP(const Instruction *I); + bool SelectFPToSI(const Instruction *I); + bool SelectSDiv(const Instruction *I); + bool SelectSRem(const Instruction *I); + bool SelectCall(const Instruction *I); + bool SelectSelect(const Instruction *I); + bool SelectRet(const Instruction *I); // Utility routines. private: bool isTypeLegal(const Type *Ty, EVT &VT); bool isLoadTypeLegal(const Type *Ty, EVT &VT); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, AddrBase Base, int Offset); - bool ARMEmitStore(EVT VT, unsigned SrcReg, AddrBase Base, int Offset); - bool ARMComputeRegOffset(const Value *Obj, AddrBase &Base, int &Offset); - void ARMSimplifyRegOffset(AddrBase &Base, int &Offset, EVT VT); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Base, int Offset); + bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Base, int Offset); + bool ARMComputeRegOffset(const Value *Obj, unsigned &Base, int &Offset); + void ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); @@ -152,6 +150,8 @@ class ARMFastISel : public FastISel { // Call handling routines. private: + bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, + unsigned &ResultReg); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); bool ProcessCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, @@ -449,7 +449,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) - .addReg(0).addImm(0)); + .addImm(0)); return DestReg; } @@ -521,7 +521,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; EVT VT; - if (!isTypeLegal(AI->getType(), VT)) return false; + if (!isLoadTypeLegal(AI->getType(), VT)) return false; DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); @@ -565,7 +565,7 @@ bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { } // Computes the Reg+Offset to get to an object. -bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, AddrBase &Base, +bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Base, int &Offset) { // Some boilerplate from the X86 FastISel. const User *U = NULL; @@ -610,7 +610,7 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, AddrBase &Base, } case Instruction::GetElementPtr: { int SavedOffset = Offset; - AddrBase SavedBase = Base; + unsigned SavedBase = Base; int TmpOffset = Offset; // Iterate through the GEP folding the constants into offsets where @@ -632,7 +632,7 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, AddrBase &Base, if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; - } else if (0 && isa(Op) && + } else if (isa(Op) && isa(cast(Op)->getOperand(1))) { // An add with a constant operand. Fold the constant. ConstantInt *CI = @@ -658,21 +658,13 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, AddrBase &Base, break; } case Instruction::Alloca: { - // TODO: Fix this to do intermediate loads, etc. - if (Offset != 0) return false; - const AllocaInst *AI = cast(Obj); - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - Base.Reg = ARM::SP; - Base.FrameIndex = SI->second; - return true; - } - // Don't handle dynamic allocas. - assert(!FuncInfo.StaticAllocaMap.count(cast(Obj)) && - "Alloca should have been handled earlier!"); - return false; + unsigned Reg = TargetMaterializeAlloca(AI); + + if (Reg == 0) return false; + + Base = Reg; + return true; } } @@ -682,20 +674,40 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, AddrBase &Base, unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); if (Tmp == 0) return false; - Base.Reg = Tmp; + Base = Tmp; return true; } // Try to get this in a register if nothing else has worked. - if (Base.Reg == 0) Base.Reg = getRegForValue(Obj); - return Base.Reg != 0; + if (Base == 0) Base = getRegForValue(Obj); + return Base != 0; } -void ARMFastISel::ARMSimplifyRegOffset(AddrBase &Base, int &Offset, EVT VT) { +void ARMFastISel::ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT) { - // Since the offset may be too large for the load instruction + assert(VT.isSimple() && "Non-simple types are invalid here!"); + + bool needsLowering = false; + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Unhandled load/store type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + // Integer loads/stores handle 12-bit offsets. + needsLowering = ((Offset & 0xfff) != Offset); + break; + case MVT::f32: + case MVT::f64: + // Floating point operands handle 8-bit offsets. + needsLowering = ((Offset & 0xff) != Offset); + break; + } + + // Since the offset is too large for the load/store instruction // get the reg+offset into a register. - if (Base.Reg != ARM::SP && Offset != 0) { + if (needsLowering) { ARMCC::CondCodes Pred = ARMCC::AL; unsigned PredReg = 0; @@ -705,21 +717,21 @@ void ARMFastISel::ARMSimplifyRegOffset(AddrBase &Base, int &Offset, EVT VT) { if (!isThumb) emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Base.Reg, Offset, Pred, PredReg, + BaseReg, Base, Offset, Pred, PredReg, static_cast(TII)); else { assert(AFI->isThumb2Function()); emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Base.Reg, Offset, Pred, PredReg, + BaseReg, Base, Offset, Pred, PredReg, static_cast(TII)); } Offset = 0; - Base.Reg = BaseReg; + Base = BaseReg; } } bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, - AddrBase Base, int Offset) { + unsigned Base, int Offset) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; @@ -730,17 +742,15 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, // This is mostly going to be Neon/vector support. return false; case MVT::i16: - Opc = isThumb ? ARM::t2LDRHi8 : ARM::LDRH; + Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; RC = ARM::GPRRegisterClass; - VT = MVT::i32; break; case MVT::i8: - Opc = isThumb ? ARM::t2LDRBi8 : ARM::LDRB; + Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; RC = ARM::GPRRegisterClass; - VT = MVT::i32; break; case MVT::i32: - Opc = isThumb ? ARM::t2LDRi8 : ARM::LDR; + Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; RC = ARM::GPRRegisterClass; break; case MVT::f32: @@ -757,25 +767,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, ResultReg = createResultReg(RC); - // For now with the additions above the offset should be zero - thus we - // can always fit into an i8. - assert((Base.Reg == ARM::SP || Offset == 0) && - "Offset not zero and not a stack load!"); - - if (Base.Reg == ARM::SP && Offset == 0) - TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, - ResultReg, Base.FrameIndex, RC, - TM.getRegisterInfo()); - // The thumb and floating point instructions both take 2 operands, ARM takes - // another register. - else if (isFloat || isThumb) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), ResultReg) - .addReg(Base.Reg).addImm(Offset)); - else - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), ResultReg) - .addReg(Base.Reg).addReg(0).addImm(Offset)); + ARMSimplifyRegOffset(Base, Offset, VT); + + // addrmode5 output depends on the selection dag addressing dividing the + // offset by 4 that it then later multiplies. Do this here as well. + if (isFloat) + Offset /= 4; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Base).addImm(Offset)); return true; } @@ -786,15 +787,13 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { return false; // Our register and offset with innocuous defaults. - AddrBase Base = { 0, 0 }; + unsigned Base = 0; int Offset = 0; // See if we can handle this as Reg + Offset if (!ARMComputeRegOffset(I->getOperand(0), Base, Offset)) return false; - ARMSimplifyRegOffset(Base, Offset, VT); - unsigned ResultReg; if (!ARMEmitLoad(VT, ResultReg, Base, Offset)) return false; @@ -803,24 +802,20 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { } bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, - AddrBase Base, int Offset) { + unsigned Base, int Offset) { unsigned StrOpc; bool isFloat = false; - // VT is set here only for use in the alloca stores below - those are promoted - // to reg size always. switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: - VT = MVT::i32; - StrOpc = isThumb ? ARM::t2STRBi8 : ARM::STRB; + StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRB; break; case MVT::i16: - VT = MVT::i32; - StrOpc = isThumb ? ARM::t2STRHi8 : ARM::STRH; + StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; break; case MVT::i32: - StrOpc = isThumb ? ARM::t2STRi8 : ARM::STR; + StrOpc = isThumb ? ARM::t2STRi12 : ARM::STR; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -834,20 +829,23 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, break; } - if (Base.Reg == ARM::SP && Offset == 0) - TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, - SrcReg, true /*isKill*/, Base.FrameIndex, - TLI.getRegClassFor(VT), TM.getRegisterInfo()); + ARMSimplifyRegOffset(Base, Offset, VT); + + // addrmode5 output depends on the selection dag addressing dividing the + // offset by 4 that it then later multiplies. Do this here as well. + if (isFloat) + Offset /= 4; + // The thumb addressing mode has operands swapped from the arm addressing // mode, the floating point one only has two operands. - else if (isFloat || isThumb) + if (isFloat || isThumb) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg).addReg(Base.Reg).addImm(Offset)); + .addReg(SrcReg).addReg(Base).addImm(Offset)); else AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg).addReg(Base.Reg).addReg(0).addImm(Offset)); + .addReg(SrcReg).addReg(Base).addReg(0).addImm(Offset)); return true; } @@ -867,15 +865,13 @@ bool ARMFastISel::SelectStore(const Instruction *I) { return false; // Our register and offset with innocuous defaults. - AddrBase Base = { 0, 0 }; + unsigned Base = 0; int Offset = 0; // See if we can handle this as Reg + Offset if (!ARMComputeRegOffset(I->getOperand(1), Base, Offset)) return false; - ARMSimplifyRegOffset(Base, Offset, VT); - if (!ARMEmitStore(VT, SrcReg, Base, Offset)) return false; return true; @@ -1242,6 +1238,18 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code +bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, + EVT SrcVT, unsigned &ResultReg) { + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); + + if (RR != 0) { + ResultReg = RR; + return true; + } else + return false; +} + // This is largely taken directly from CCAssignFnForNode - we don't support // varargs in FastISel so that part has been removed. // TODO: We may not support all of this. @@ -1249,8 +1257,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::C: case CallingConv::Fast: + // Ignore fastcc. Silence compiler warnings. + (void)RetFastCC_ARM_APCS; + (void)FastCC_ARM_APCS; + // Fallthrough + case CallingConv::C: // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && @@ -1295,27 +1307,85 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, unsigned Arg = ArgRegs[VA.getValNo()]; EVT ArgVT = ArgVTs[VA.getValNo()]; + // We don't handle NEON parameters yet. + if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() > 64) + return false; + // Handle arg promotion, etc. switch (VA.getLocInfo()) { case CCValAssign::Full: break; - default: - // TODO: Handle arg promotion. - return false; + case CCValAssign::SExt: { + bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::ZExt: { + bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::AExt: { + bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + + assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::BCvt: { + unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), + VA.getLocVT().getSimpleVT(), + ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); + assert(BC != 0 && "Failed to emit a bitcast!"); + Arg = BC; + ArgVT = VA.getLocVT(); + break; + } + default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. - // TODO: We need custom lowering for f64 args. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), VA.getLocReg()) .addReg(Arg); RegArgs.push_back(VA.getLocReg()); + } else if (VA.needsCustom()) { + // TODO: We need custom lowering for vector (v2f64) args. + if (VA.getLocVT() != MVT::f64) return false; + + CCValAssign &NextVA = ArgLocs[++i]; + + // TODO: Only handle register args for now. + if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRRD), VA.getLocReg()) + .addReg(NextVA.getLocReg(), RegState::Define) + .addReg(Arg)); + RegArgs.push_back(VA.getLocReg()); + RegArgs.push_back(NextVA.getLocReg()); } else { - // Need to store - return false; + assert(VA.isMemLoc()); + // Need to store on the stack. + unsigned Base = ARM::SP; + int Offset = VA.getLocMemOffset(); + + if (!ARMEmitStore(ArgVT, Arg, Base, Offset)) return false; } } - return true; } @@ -1338,24 +1408,16 @@ bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl &UsedRegs, if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) { // For this move we copy into two registers and then move into the // double fp reg we want. - // TODO: Are the copies necessary? - TargetRegisterClass *CopyRC = TLI.getRegClassFor(MVT::i32); - unsigned Copy1 = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - Copy1).addReg(RVLocs[0].getLocReg()); - UsedRegs.push_back(RVLocs[0].getLocReg()); - - unsigned Copy2 = createResultReg(CopyRC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - Copy2).addReg(RVLocs[1].getLocReg()); - UsedRegs.push_back(RVLocs[1].getLocReg()); - EVT DestVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVDRR), ResultReg) - .addReg(Copy1).addReg(Copy2)); + .addReg(RVLocs[0].getLocReg()) + .addReg(RVLocs[1].getLocReg())); + + UsedRegs.push_back(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[1].getLocReg()); // Finally update the result. UpdateValueMap(I, ResultReg); @@ -1377,6 +1439,69 @@ bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl &UsedRegs, return true; } +bool ARMFastISel::SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + if (F.isVarArg()) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (Ret->getNumOperands() > 0) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ValLocs; + CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); + + const Value *RV = Ret->getOperand(0); + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + // TODO: For now, don't try to handle cases where getLocInfo() + // says Full but the types don't match. + if (VA.getValVT() != TLI.getValueType(RV->getType())) + return false; + + // Make the copy. + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DstReg = VA.getLocReg(); + const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + // Avoid a cross-class copy. This is very unlikely. + if (!SrcRC->contains(DstReg)) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + DstReg).addReg(SrcReg); + + // Mark the register as live out of the function. + MRI.addLiveOut(VA.getLocReg()); + } + + unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(RetOpc))); + return true; +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1472,11 +1597,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Check the calling convention. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); + // TODO: Avoid some calling conventions? - if (CC != CallingConv::C) { - // errs() << "Can't handle calling convention: " << CC << "\n"; - return false; - } // Let SDISel handle vararg functions. const PointerType *PT = cast(CS.getCalledValue()->getType()); @@ -1607,6 +1729,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return SelectCall(I); case Instruction::Select: return SelectSelect(I); + case Instruction::Ret: + return SelectRet(I); default: break; } return false; @@ -1617,7 +1741,7 @@ namespace llvm { // Completely untested on non-darwin. const TargetMachine &TM = funcInfo.MF->getTarget(); const ARMSubtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->isTargetDarwin() && EnableARMFastISel) + if (Subtarget->isTargetDarwin() && !DisableARMFastISel) return new ARMFastISel(funcInfo); return 0; }