ARM: support direct f16 <-> f64 conversions

author Tim Northover <tnorthover@apple.com>

Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)

committer Tim Northover <tnorthover@apple.com>

Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)
author Tim Northover <tnorthover@apple.com>
Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)
committer Tim Northover <tnorthover@apple.com>
Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index e74d0540eb0f67e321df13ab6e894008006d1c79..27048f9a10d358bdbf78e62187f6851c9483c299 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -825,7 +825,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
        setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
        setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
      }
-    // Special handling for half-precision FP.
+
+    // v8 adds f64 <-> f16 conversion. Before that it should be expanded.
+    if (!Subtarget->hasV8Ops()) {
+      setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+      setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+    }
+
+    // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
      if (!Subtarget->hasFP16()) {
        setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
        setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td

index e709e59b3fa783e61dda6bb1fad706ba85c02cc3..55a6efcb4c04326ff045ab37f40f447b40e1b09d 100644 (file)
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -551,12 +551,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                   /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
                   [/* For disassembly only; pattern left blank */]>;
  
-def : Pat<(fp_to_f16 SPR:$a),
-          (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def : Pat<(f16_to_fp GPR:$a),
-          (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
  def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                   /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                   [/* For disassembly only; pattern left blank */]>;
@@ -619,6 +613,19 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
    let Inst{5}     = Dm{4};
  }
  
+def : Pat<(fp_to_f16 SPR:$a),
+          (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(fp_to_f16 (f64 DPR:$a)),
+          (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+
+def : Pat<(f16_to_fp GPR:$a),
+          (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : Pat<(f64 (f16_to_fp GPR:$a)),
+          (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+
  multiclass vcvt_inst<string opc, bits<2> rm> {
    let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {
      def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll

index 7a99c175751cdf6be345d4af8d02d515da6fd2bc..d3f32556a093c05fc167aa28070eb2c43529b507 100644 (file)
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -1,5 +1,6 @@
  ; RUN: llc < %s | FileCheck %s
  ; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
+; RUN: llc -mtriple=armv8-eabi < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
  target triple = "armv7-eabi"
  
@@ -10,23 +11,61 @@ target triple = "armv7-eabi"
  define arm_aapcs_vfpcc void @foo() nounwind {
  ; CHECK-LABEL: foo:
  ; CHECK-FP16-LABEL: foo:
+; CHECK-ARMV8-LABEL: foo:
  entry:
    %0 = load i16* @x, align 2
    %1 = load i16* @y, align 2
    %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
  ; CHECK: __gnu_h2f_ieee
  ; CHECK-FP16: vcvtb.f32.f16
+; CHECK-ARMv8: vcvtb.f32.f16
    %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
  ; CHECK: __gnu_h2f_ieee
  ; CHECK-FP16: vcvtb.f32.f16
+; CHECK-ARMV8: vcvtb.f32.f16
    %4 = fadd float %2, %3
    %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
  ; CHECK: __gnu_f2h_ieee
  ; CHECK-FP16: vcvtb.f16.f32
+; CHECK-ARMV8: vcvtb.f16.f32
    store i16 %5, i16* @x, align 2
    ret void
  }
  
+define arm_aapcs_vfpcc double @test_from_fp16(i16 %in) {
+; CHECK-LABEL: test_from_fp16:
+; CHECK-FP-LABEL: test_from_fp16:
+; CHECK-ARMV8-LABEL: test_from_fp16:
+  %val = call double @llvm.convert.from.fp16.f64(i16 %in)
+; CHECK: bl __gnu_h2f_ieee
+; CHECK: vmov [[TMP:s[0-9]+]], r0
+; CHECK: vcvt.f64.f32 d0, [[TMP]]
+
+; CHECK-FP16: vmov [[TMP16:s[0-9]+]], r0
+; CHECK-FP16: vcvtb.f32.f16 [[TMP32:s[0-9]+]], [[TMP16]]
+; CHECK-FP16: vcvt.f64.f32 d0, [[TMP32]]
+
+; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0
+; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]]
+  ret double %val
+}
+
+define arm_aapcs_vfpcc i16 @test_to_fp16(double %in) {
+; CHECK-LABEL: test_to_fp16:
+; CHECK-FP-LABEL: test_to_fp16:
+; CHECK-ARMV8-LABEL: test_to_fp16:
+  %val = call i16 @llvm.convert.to.fp16.f64(double %in)
+; CHECK: bl __truncdfhf2
+
+; CHECK-FP16: bl __truncdfhf2
+
+; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
+; CHECK-ARMV8: vmov r0, [[TMP]]
+  ret i16 %val
+}
+
  declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
  
  declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
author	Tim Northover <tnorthover@apple.com>
	Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Thu, 17 Jul 2014 11:27:04 +0000 (11:27 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
test/CodeGen/ARM/fp16.ll		patch \| blob \| history