From 7b7ff9e15230c9372b23beea18da051c3890f7d1 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 28 Oct 2015 22:51:16 +0000 Subject: [PATCH] ARM: teach backend about WatchOS and TvOS libcalls. The most substantial changes are again for watchOS: libcalls are hard-float if needed and sincos has a different calling convention. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251571 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/TargetLibraryInfo.cpp | 10 +- lib/Target/ARM/ARMISelLowering.cpp | 69 ++++++++----- lib/Target/ARM/ARMSubtarget.cpp | 3 +- test/CodeGen/ARM/v7k-libcalls.ll | 154 +++++++++++++++++++++++++++++ test/CodeGen/ARM/v7k-sincos.ll | 16 +++ 5 files changed, 226 insertions(+), 26 deletions(-) create mode 100644 test/CodeGen/ARM/v7k-libcalls.ll create mode 100644 test/CodeGen/ARM/v7k-sincos.ll diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 39af09e70c2..d73cc1fa57d 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -74,13 +74,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. + // All versions of watchOS support it. if (T.isMacOSX()) { if (T.isMacOSXVersionLT(10, 5)) TLI.setUnavailable(LibFunc::memset_pattern16); } else if (T.isiOS()) { if (T.isOSVersionLT(3, 0)) TLI.setUnavailable(LibFunc::memset_pattern16); - } else { + } else if (!T.isWatchOS()) { TLI.setUnavailable(LibFunc::memset_pattern16); } @@ -288,8 +289,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } break; case Triple::IOS: + case Triple::WatchOS: TLI.setUnavailable(LibFunc::exp10l); - if (T.isOSVersionLT(7, 0)) { + if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || + (T.isOSVersionLT(9, 0) && + (T.getArch() == Triple::x86 || + T.getArch() == Triple::x86_64)))) { TLI.setUnavailable(LibFunc::exp10); TLI.setUnavailable(LibFunc::exp10f); } else { @@ -319,6 +324,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, case Triple::Darwin: case Triple::MacOSX: case Triple::IOS: + case Triple::WatchOS: case Triple::FreeBSD: case Triple::Linux: break; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 91edc12dc11..c8d34a23b48 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -242,6 +242,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setCmpLibcallCC(LC.Op, LC.Cond); } } + + // Set the correct calling convention for ARMv7k WatchOS. It's just + // AAPCS_VFP for functions as simple as libcalls. + if (Subtarget->isTargetWatchOS()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) + setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP); + } } // These libcalls are not available in 32-bit. @@ -377,8 +384,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().isiOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { + if (Subtarget->isTargetWatchOS() || + (Subtarget->isTargetIOS() && + !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -941,7 +949,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().isiOS()) { + if (Subtarget->isTargetWatchOS()) { + setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); + setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); + } + if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -6576,27 +6588,33 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); - - // Create stack object for sret. + Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); auto &DL = DAG.getDataLayout(); - const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); - const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL)); ArgListTy Args; - ArgListEntry Entry; - - Entry.Node = SRet; - Entry.Ty = RetTy->getPointerTo(); - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isSRet = true; - Args.push_back(Entry); + bool ShouldUseSRet = Subtarget->isAPCS_ABI(); + SDValue SRet; + if (ShouldUseSRet) { + // Create stack object for sret. + const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); + const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); + + ArgListEntry Entry; + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + RetTy = Type::getVoidTy(*DAG.getContext()); + } + ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.isSExt = false; @@ -6605,16 +6623,21 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const char *LibcallName = (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + RTLIB::Libcall LC = + (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, - std::move(Args), 0) - .setDiscardResult(); - + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CC, RetTy, Callee, std::move(Args), 0) + .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); + if (!ShouldUseSRet) + return CallResult.first; + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo(), false, false, false, 0); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index aea452cdbc5..84d3ebcc681 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -299,7 +299,8 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { } bool ARMSubtarget::hasSinCos() const { - return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0); + return isTargetWatchOS() || + (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0)); } bool ARMSubtarget::enableMachineScheduler() const { diff --git a/test/CodeGen/ARM/v7k-libcalls.ll b/test/CodeGen/ARM/v7k-libcalls.ll new file mode 100644 index 00000000000..a1dfb07ca61 --- /dev/null +++ b/test/CodeGen/ARM/v7k-libcalls.ll @@ -0,0 +1,154 @@ +; RUN: llc -mtriple=armv7k-apple-watchos2.0 -mcpu=cortex-a7 < %s | FileCheck %s + +define arm_aapcs_vfpcc float @t1(float %a, float %b) { +entry: +; CHECK: t1 +; CHECK-NOT: vmov +; CHECK: vadd.f32 + %a.addr = alloca float, align 4 + %b.addr = alloca float, align 4 + store float %a, float* %a.addr, align 4 + store float %b, float* %b.addr, align 4 + %0 = load float, float* %a.addr, align 4 + %1 = load float, float* %b.addr, align 4 + %add = fadd float %0, %1 + ret float %add +} + +define arm_aapcs_vfpcc double @t2(double %a, double %b) { +entry: +; CHECK: t2 +; CHECK-NOT: vmov +; CHECK: vadd.f64 + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + store double %a, double* %a.addr, align 8 + store double %b, double* %b.addr, align 8 + %0 = load double, double* %a.addr, align 8 + %1 = load double, double* %b.addr, align 8 + %add = fadd double %0, %1 + ret double %add +} + +define arm_aapcs_vfpcc i64 @t3(double %ti) { +entry: +; CHECK-LABEL: t3: +; CHECK-NOT: vmov +; CHECK: bl ___fixunsdfdi + %conv = fptoui double %ti to i64 + ret i64 %conv +} + +define arm_aapcs_vfpcc i64 @t4(double %ti) { +entry: +; CHECK-LABEL: t4: +; CHECK-NOT: vmov +; CHECK: bl ___fixdfdi + %conv = fptosi double %ti to i64 + ret i64 %conv +} + +define arm_aapcs_vfpcc double @t5(i64 %ti) { +entry: +; CHECK-LABEL: t5: +; CHECK: bl ___floatundidf +; CHECK-NOT: vmov +; CHECK: pop + %conv = uitofp i64 %ti to double + ret double %conv +} + +define arm_aapcs_vfpcc double @t6(i64 %ti) { +entry: +; CHECK-LABEL: t6: +; CHECK: bl ___floatdidf +; CHECK-NOT: vmov +; CHECK: pop + %conv = sitofp i64 %ti to double + ret double %conv +} + +define arm_aapcs_vfpcc float @t7(i64 %ti) { +entry: +; CHECK-LABEL: t7: +; CHECK: bl ___floatundisf +; CHECK-NOT: vmov +; CHECK: pop + %conv = uitofp i64 %ti to float + ret float %conv +} + +define arm_aapcs_vfpcc float @t8(i64 %ti) { +entry: +; CHECK-LABEL: t8: +; CHECK: bl ___floatdisf +; CHECK-NOT: vmov +; CHECK: pop + %conv = sitofp i64 %ti to float + ret float %conv +} + +define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) { +entry: +; CHECK-LABEL: t9: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd float %a, %b + %conv = fpext float %add to double + ret double %conv +} + +define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) { +entry: +; CHECK-LABEL: t10: +; CHECK-NOT: vmov +; CHECK: vldr + %add = fadd double %a, %c + ret double %add +} + +define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) { +entry: +; CHECK-LABEL: t11: +; CHECK: vldr + %add = fadd float %a, %c + ret float %add +} + +; rdar://16039676 +define arm_aapcs_vfpcc double @t12(double %a, double %b) { +entry: +; CHECK-LABEL: t12: +; CHECK: vstr + %add = fadd double %a, %b + %sub = fsub double %a, %b + %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub) + ret double %call +} + +define arm_aapcs_vfpcc double @t13(double %x) { +entry: +; CHECK-LABEL: t13: +; CHECK-NOT: vmov +; CHECK: bl ___sincos_stret + %call = tail call arm_aapcs_vfpcc double @cos(double %x) + %call1 = tail call arm_aapcs_vfpcc double @sin(double %x) + %mul = fmul double %call, %call1 + ret double %mul +} + +define arm_aapcs_vfpcc double @t14(double %x) { +; CHECK-LABEL: t14: +; CHECK-NOT: vmov +; CHECK: b ___exp10 + %__exp10 = tail call double @__exp10(double %x) #1 + ret double %__exp10 +} + +declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double) +declare arm_aapcs_vfpcc double @cos(double) #0 +declare arm_aapcs_vfpcc double @sin(double) #0 +declare double @__exp10(double) + +attributes #0 = { readnone } +attributes #1 = { readonly } diff --git a/test/CodeGen/ARM/v7k-sincos.ll b/test/CodeGen/ARM/v7k-sincos.ll new file mode 100644 index 00000000000..b89d4dc8120 --- /dev/null +++ b/test/CodeGen/ARM/v7k-sincos.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s + +declare double @sin(double) nounwind readnone +declare double @cos(double) nounwind readnone + +define double @test_stret(double %in) { +; CHECK-LABEL: test_stret: +; CHECK: blx ___sincos_stret +; CHECK-NOT: ldr +; CHECK: vadd.f64 d0, d0, d1 + + %sin = call double @sin(double %in) + %cos = call double @cos(double %in) + %sum = fadd double %sin, %cos + ret double %sum +} -- 2.34.1