From 49d6fc02efc45932e4d889fa56bbbfeefafbdc85 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 12 Jun 2012 19:25:13 +0000 Subject: [PATCH] [arm-fast-isel] Add support for -arm-long-calls. Patch by Jush Lu . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 98 ++++++++++++++----------- test/CodeGen/ARM/fast-isel-call.ll | 32 ++++++++ test/CodeGen/ARM/fast-isel-intrinsic.ll | 32 ++++++++ 3 files changed, 121 insertions(+), 41 deletions(-) diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index e5d5157dd57..a9b482ae2db 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -190,7 +190,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); - unsigned ARMSelectCallOp(const GlobalValue *GV); + unsigned ARMSelectCallOp(bool UseReg); // Call handling routines. private: @@ -202,6 +202,7 @@ class ARMFastISel : public FastISel { SmallVectorImpl &RegArgs, CallingConv::ID CC, unsigned &NumBytes); + unsigned getLibcallReg(const Twine &Name); bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); @@ -2108,8 +2109,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } -unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - return isThumb2 ? ARM::tBL : ARM::BL; +unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { + if (UseReg) + return isThumb2 ? ARM::tBLXr : ARM::BLX; + else + return isThumb2 ? ARM::tBL : ARM::BL; +} + +unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, Name); + return ARMMaterializeGV(GV, TLI.getValueType(GV->getType())); } // A quick function that will emit a call for a named libcall in F with the @@ -2130,9 +2140,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // TODO: For now if we have long calls specified we don't handle the call. - if (EnableARMLongCalls) return false; - // Can't handle non-double multi-reg retvals. if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector RVLocs; @@ -2176,20 +2183,32 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; + unsigned CalleeReg = 0; + if (EnableARMLongCalls) { + CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); + if (CalleeReg == 0) return false; + } + // Issue the call. - MachineInstrBuilder MIB; - unsigned CallOpc = ARMSelectCallOp(NULL); - if (isThumb2) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addExternalSymbol(TLI.getLibcallName(Call)); - else + unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(CallOpc)); + if (isThumb2) { // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addExternalSymbol(TLI.getLibcallName(Call))); + AddDefaultPred(MIB); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); + } else { + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); + // Explicitly adding the predicate here. + AddDefaultPred(MIB); + } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2237,9 +2256,6 @@ bool ARMFastISel::SelectCall(const Instruction *I, RetVT != MVT::i8 && RetVT != MVT::i1) return false; - // TODO: For now if we have long calls specified we don't handle the call. - if (EnableARMLongCalls) return false; - // Can't handle non-double multi-reg retvals. if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 && RetVT != MVT::i16 && RetVT != MVT::i32) { @@ -2306,43 +2322,43 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call. - MachineInstrBuilder MIB; + bool UseReg = false; const GlobalValue *GV = dyn_cast(Callee); - unsigned CallOpc = ARMSelectCallOp(GV); + if (!GV || EnableARMLongCalls) UseReg = true; + unsigned CalleeReg = 0; + if (UseReg) { + if (IntrMemName) + CalleeReg = getLibcallReg(IntrMemName); + else + CalleeReg = getRegForValue(Callee); - if (!GV){ - CallOpc = isThumb2 ? ARM::tBLXr : ARM::BLX; - CalleeReg = getRegForValue(Callee); if (CalleeReg == 0) return false; } - // Explicitly adding the predicate here. + // Issue the call. + unsigned CallOpc = ARMSelectCallOp(UseReg); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(CallOpc)); if(isThumb2) { // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))); - if (!GV) + AddDefaultPred(MIB); + if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); else MIB.addExternalSymbol(IntrMemName, 0); } else { - if (!GV) - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addReg(CalleeReg)); + if (UseReg) + MIB.addReg(CalleeReg); else if (!IntrMemName) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); + MIB.addGlobalAddress(GV, 0, 0); else - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addExternalSymbol(IntrMemName, 0)); + MIB.addExternalSymbol(IntrMemName, 0); + + // Explicitly adding the predicate here. + AddDefaultPred(MIB); } // Add implicit physical register uses to the call. diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll index 10d6746acfd..0f2475651cd 100644 --- a/test/CodeGen/ARM/fast-isel-call.ll +++ b/test/CodeGen/ARM/fast-isel-call.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG define i32 @t0(i1 zeroext %a) nounwind { %1 = zext i1 %a to i32 @@ -99,6 +101,11 @@ entry: ; ARM: uxtb r9, r12 ; ARM: str r9, [sp, #4] ; ARM: bl _bar +; ARM-LONG: @t10 +; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr +; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr +; ARM-LONG: ldr lr, [lr] +; ARM-LONG: blx lr ; THUMB: @t10 ; THUMB: movs r0, #0 ; THUMB: movt r0, #0 @@ -121,6 +128,11 @@ entry: ; THUMB: uxtb.w r9, r12 ; THUMB: str.w r9, [sp, #4] ; THUMB: bl _bar +; THUMB-LONG: @t10 +; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr +; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr +; THUMB-LONG: ldr.w lr, [lr] +; THUMB-LONG: blx lr %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 } @@ -146,3 +158,23 @@ define void @foo3() uwtable { %call = call i32 %1(i32 0) ret void } + +define i32 @LibCall(i32 %a, i32 %b) { +entry: +; ARM: LibCall +; ARM: bl ___udivsi3 +; ARM-LONG: LibCall +; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr +; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; ARM-LONG: ldr r2, [r2] +; ARM-LONG: blx r2 +; THUMB: LibCall +; THUMB: bl ___udivsi3 +; THUMB-LONG: LibCall +; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr +; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr +; THUMB-LONG: ldr r2, [r2] +; THUMB-LONG: blx r2 + %tmp1 = udiv i32 %a, %b ; [#uses=1] + ret i32 %tmp1 +} diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index e6bdfa78d49..b73fceff6cd 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG +; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1 @temp = common global [60 x i8] zeroinitializer, align 1 @@ -13,6 +15,11 @@ define void @t1() nounwind ssp { ; ARM: movw r2, #10 ; ARM: uxtb r1, r1 ; ARM: bl _memset +; ARM-LONG: t1 +; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t1 ; THUMB: movw r0, :lower16:_message1 ; THUMB: movt r0, :upper16:_message1 @@ -23,6 +30,11 @@ define void @t1() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: uxtb r1, r1 ; THUMB: bl _memset +; THUMB-LONG: t1 +; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false) ret void } @@ -41,6 +53,11 @@ define void @t2() nounwind ssp { ; ARM: mov r0, r1 ; ARM: ldr r1, [sp] @ 4-byte Reload ; ARM: bl _memcpy +; ARM-LONG: t2 +; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t2 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr @@ -51,6 +68,11 @@ define void @t2() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: mov r0, r1 ; THUMB: bl _memcpy +; THUMB-LONG: t2 +; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false) ret void } @@ -67,6 +89,11 @@ define void @t3() nounwind ssp { ; ARM: movw r2, #10 ; ARM: mov r0, r1 ; ARM: bl _memmove +; ARM-LONG: t3 +; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr +; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr +; ARM-LONG: ldr r3, [r3] +; ARM-LONG: blx r3 ; THUMB: t3 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr @@ -77,6 +104,11 @@ define void @t3() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: mov r0, r1 ; THUMB: bl _memmove +; THUMB-LONG: t3 +; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr +; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr +; THUMB-LONG: ldr r3, [r3] +; THUMB-LONG: blx r3 call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) ret void } -- 2.34.1