From 446f01b1d5e9840f2748fdcc0c9f70db6d72d640 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Wed, 10 Dec 2014 19:43:32 +0000
Subject: [PATCH] [AArch64] MachO large code-model: Materialize FP constants in
 code.

In the large code model we have to first get the address of the GOT entry, load
the address of the constant, and then load the constant itself.

To avoid these loads and the GOT entry alltogether this commit changes the way
how FP constants are materialized in the large code model. The constats are now
materialized in a GPR and then bitconverted/moved into the FPR.

Reviewed by Tim Northover

Fixes rdar://problem/16572564.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223941 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64FastISel.cpp     | 18 +++++++++++++++++
 lib/Target/AArch64/AArch64ISelLowering.cpp |  7 +++++++
 lib/Target/AArch64/AArch64InstrInfo.td     | 18 +++++++++++++++++
 test/CodeGen/AArch64/fpimm.ll              | 23 +++++++++++++++++++++-
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 3cad316c284..419fbc8be6c 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -367,6 +367,24 @@ unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
   }
 
+  // For the MachO large code model materialize the FP constant in code.
+  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+    unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+    const TargetRegisterClass *RC = Is64Bit ?
+        &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+
+    unsigned TmpReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
+        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+
+    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(TmpReg, getKillRegState(true));
+
+    return ResultReg;
+  }
+
   // Materialize via constant pool.  MachineConstantPool wants an explicit
   // alignment.
   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 373831a5596..3d0f638c5eb 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -387,6 +387,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   }
 
+  // Make floating-point constants legal for the large code model, so they don't
+  // become loads from the constant pool.
+  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+    setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  }
+
   // AArch64 does not have floating-point extending loads, i1 sign-extending
   // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index cd71339189f..01a01e059a5 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -481,6 +481,24 @@ def trunc_imm : SDNodeXForm<imm, [{
 def : Pat<(i64 i64imm_32bit:$src),
           (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
 
+// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
+def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i32);
+}]>;
+
+def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
+return CurDAG->getTargetConstant(
+  N->getValueAPF().bitcastToAPInt().getZExtValue(), MVT::i64);
+}]>;
+
+
+def : Pat<(f32 fpimm:$in),
+  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
+def : Pat<(f64 fpimm:$in),
+  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
+
+
 // Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
 // sequences.
 def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index e59520c4dc9..b7db9182a39 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,4 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu                                                  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large                             -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0
@@ -34,3 +36,22 @@ define void @check_double() {
 ; CHECK: ret
   ret void
 }
+
+; LARGE-LABEL: check_float2
+; LARGE:       movz [[REG:w[0-9]+]], #0x4049, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0xfdb
+; LARGE-NEXT:  fmov s0, [[REG]]
+define float @check_float2() {
+  ret float 3.14159274101257324218750
+}
+
+; LARGE-LABEL: check_double2
+; LARGE:       movz [[REG:x[0-9]+]], #0x4009, lsl #48
+; LARGE-NEXT:  movk [[REG]], #0x21fb, lsl #32
+; LARGE-NEXT:  movk [[REG]], #0x5444, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0x2d18
+; LARGE-NEXT:  fmov d0, [[REG]]
+define double @check_double2() {
+  ret double 3.1415926535897931159979634685441851615905761718750
+}
+
-- 
2.34.1