[FastISel][X86] Add support for the sqrt intrinsic.

author Juergen Ributzka <juergen@apple.com>

Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)

committer Juergen Ributzka <juergen@apple.com>

Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)
author Juergen Ributzka <juergen@apple.com>
Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)
committer Juergen Ributzka <juergen@apple.com>
Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index 2391984964869757a0c0e318f67f8d1b6da122d2..329a96eb9fc4331310964222c46e0f4d53ca5f10 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1781,6 +1781,58 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
      return true;
    }
+  case Intrinsic::sqrt: {
+    if (!Subtarget->hasSSE1())
+      return false;
+
+    Type *RetTy = I.getCalledFunction()->getReturnType();
+
+    MVT VT;
+    if (!isTypeLegal(RetTy, VT))
+      return false;
+
+    // Unfortunatelly we can't use FastEmit_r, because the AVX version of FSQRT
+    // is not generated by FastISel yet.
+    // FIXME: Update this code once tablegen can handle it.
+    static const unsigned SqrtOpc[2][2] = {
+      {X86::SQRTSSr, X86::VSQRTSSr},
+      {X86::SQRTSDr, X86::VSQRTSDr}
+    };
+    bool HasAVX = Subtarget->hasAVX();
+    unsigned Opc;
+    const TargetRegisterClass *RC;
+    switch (VT.SimpleTy) {
+    default: return false;
+    case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
+    case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+    }
+
+    const Value *SrcVal = I.getArgOperand(0);
+    unsigned SrcReg = getRegForValue(SrcVal);
+
+    if (SrcReg == 0)
+      return false;
+
+    unsigned ImplicitDefReg = 0;
+    if (HasAVX) {
+      ImplicitDefReg = createResultReg(RC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+    }
+
+    unsigned ResultReg = createResultReg(RC);
+    MachineInstrBuilder MIB;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+                  ResultReg);
+
+    if (ImplicitDefReg)
+      MIB.addReg(ImplicitDefReg);
+
+    MIB.addReg(SrcReg);
+
+    UpdateValueMap(&I, ResultReg);
+    return true;
+  }
    case Intrinsic::sadd_with_overflow:
    case Intrinsic::uadd_with_overflow:
    case Intrinsic::ssub_with_overflow:
diff --git a/test/CodeGen/X86/sqrt.ll b/test/CodeGen/X86/sqrt.ll

new file mode 100644 (file)

index 0000000..be7c6e8
--- /dev/null
+++ b/test/CodeGen/X86/sqrt.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2                             | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx                             | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+
+define float @test_sqrt_f32(float %a) {
+; SSE2-LABEL: test_sqrt_f32
+; SSE2:       sqrtss %xmm0, %xmm0
+; AVX-LABEL:  test_sqrt_f32
+; AVX:        vsqrtss %xmm0, %xmm0
+  %res = call float @llvm.sqrt.f32(float %a)
+  ret float %res
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define double @test_sqrt_f64(double %a) {
+; SSE2-LABEL: test_sqrt_f64
+; SSE2:       sqrtsd %xmm0, %xmm0
+; AVX-LABEL:  test_sqrt_f64
+; AVX:        vsqrtsd %xmm0, %xmm0
+  %res = call double @llvm.sqrt.f64(double %a)
+  ret double %res
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+
author	Juergen Ributzka <juergen@apple.com>
	Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)
committer	Juergen Ributzka <juergen@apple.com>
	Wed, 11 Jun 2014 23:11:02 +0000 (23:11 +0000)
lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
test/CodeGen/X86/sqrt.ll	[new file with mode: 0644]	patch \| blob