Fix bug 9905: Failure in code selection for llvm intrinsics sqrt/exp (fix for FSQRT...

author Stepan Dyatkovskiy <stpworld@narod.ru>

Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)

committer Stepan Dyatkovskiy <stpworld@narod.ru>

Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)
author Stepan Dyatkovskiy <stpworld@narod.ru>
Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)
committer Stepan Dyatkovskiy <stpworld@narod.ru>
Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index f09a1c7e4f7de9d82640377692a838a50c383a12..21a34d207484e2b03f2fc816eb3f274852647b3d 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -468,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
      // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
      // neither Neon nor VFP support any arithmetic operations on it.
  
      // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
      // neither Neon nor VFP support any arithmetic operations on it.
+    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+    // supported for v4f32.
      setOperationAction(ISD::FADD, MVT::v2f64, Expand);
      setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
      setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
      setOperationAction(ISD::FADD, MVT::v2f64, Expand);
      setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
      setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    // FIXME: Code duplication: FDIV and FREM are expanded always, see
+    // ARMTargetLowering::addTypeForNEON method for details.
      setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
      setOperationAction(ISD::FREM, MVT::v2f64, Expand);
      setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
      setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    // FIXME: Create unittest.
+    // In another words, find a way when "copysign" appears in DAG with vector
+    // operands.
      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    // FIXME: Code duplication: SETCC has custom operation action, see
+    // ARMTargetLowering::addTypeForNEON method for details.
      setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
      setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FNEG and for FABS.
      setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
      setOperationAction(ISD::FABS, MVT::v2f64, Expand);
      setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
      setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
      setOperationAction(ISD::FABS, MVT::v2f64, Expand);
      setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -487,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
      setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
      setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
      setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
      setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
      setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
      setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
      setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
      setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
      setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+    
+    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
  
      // Neon does not support some operations on v1i64 and v2i64 types.
      setOperationAction(ISD::MUL, MVT::v1i64, Expand);
  
      // Neon does not support some operations on v1i64 and v2i64 types.
      setOperationAction(ISD::MUL, MVT::v1i64, Expand);
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll

new file mode 100644 (file)

index 0000000..ec6d850
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:A
+; CHECK-NEXT: movt    r1, :upper16:A
+; CHECK:      vldmia  r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short3]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short2]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short1]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short0]]
+; CHECK-NEXT: vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT:  movt  [[reg0]], :upper16:A
+; CHECK-NEXT:  vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:A
+; CHECK-NEXT: movt  [[reg0]], :upper16:A
+; CHECK:      vldmia [[reg0]], {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
author	Stepan Dyatkovskiy <stpworld@narod.ru>
	Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)
committer	Stepan Dyatkovskiy <stpworld@narod.ru>
	Thu, 8 Dec 2011 07:55:03 +0000 (07:55 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll	[new file with mode: 0644]	patch \| blob