[FastISel][AArch64] Add sqrt intrinsic support.
authorJuergen Ributzka <juergen@apple.com>
Thu, 31 Jul 2014 06:25:33 +0000 (06:25 +0000)
committerJuergen Ributzka <juergen@apple.com>
Thu, 31 Jul 2014 06:25:33 +0000 (06:25 +0000)
Fixes <rdar://problem/17867067>.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214388 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AArch64/AArch64FastISel.cpp
test/CodeGen/AArch64/fast-isel-sqrt.ll [new file with mode: 0644]

index 3d5fb66146ebe846abb923c8b6115fbd48b92662..647bb57deae12892bc1db3d0d64be1abec9de17d 100644 (file)
@@ -1708,6 +1708,25 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
         .addImm(1);
     return true;
   }
+  case Intrinsic::sqrt: {
+    Type *RetTy = II->getCalledFunction()->getReturnType();
+
+    MVT VT;
+    if (!isTypeLegal(RetTy, VT))
+      return false;
+
+    unsigned Op0Reg = getRegForValue(II->getOperand(0));
+    if (!Op0Reg)
+      return false;
+    bool Op0IsKill = hasTrivialKill(II->getOperand(0));
+
+    unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
+    if (!ResultReg)
+      return false;
+
+    UpdateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
diff --git a/test/CodeGen/AArch64/fast-isel-sqrt.ll b/test/CodeGen/AArch64/fast-isel-sqrt.ll
new file mode 100644 (file)
index 0000000..44ad8ea
--- /dev/null
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=arm64-apple-darwin                             < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define float @test_sqrt_f32(float %a) {
+; CHECK-LABEL: test_sqrt_f32
+; CHECK:       fsqrt s0, s0
+  %res = call float @llvm.sqrt.f32(float %a)
+  ret float %res
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define double @test_sqrt_f64(double %a) {
+; CHECK-LABEL: test_sqrt_f64
+; CHECK:       fsqrt d0, d0
+  %res = call double @llvm.sqrt.f64(double %a)
+  ret double %res
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+