From 380fa65d5dabd9222e66d5c7d380e3ceaaea82ad Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Tue, 15 Apr 2014 13:59:57 +0000
Subject: [PATCH] AArch64/ARM64: copy patterns for fixed-point conversions

Code is mostly copied directly across, with a slight extension of the
ISelDAGToDAG function so that it can cope with the floating-point constants
being behind a litpool.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206285 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 52 ++++++++++++++++++++++
 lib/Target/ARM64/ARM64InstrFormats.td  | 61 +++++++++++++++++++-------
 test/CodeGen/AArch64/fcvt-fixed.ll     |  1 +
 3 files changed, 99 insertions(+), 15 deletions(-)
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
index 4938aaad036..956f61148ee 100644
--- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "arm64-isel"
 #include "ARM64TargetMachine.h"
 #include "MCTargetDesc/ARM64AddressingModes.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Function.h" // To access function attributes.
 #include "llvm/IR/GlobalValue.h"
@@ -179,6 +180,13 @@ private:
   bool isWorthFolding(SDValue V) const;
   bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset,
                          SDValue &Imm);
+
+  template<unsigned RegWidth>
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
 };
 } // end anonymous namespace
 
@@ -1751,6 +1759,50 @@ SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
+bool
+ARM64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                              unsigned RegWidth) {
+  APFloat FVal(0.0);
+  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+    FVal = CN->getValueAPF();
+  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
+    // Some otherwise illegal constants are allowed in this case.
+    if (LN->getOperand(1).getOpcode() != ARM64ISD::ADDlow ||
+        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
+      return false;
+
+    ConstantPoolSDNode *CN =
+        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
+    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
+  } else
+    return false;
+
+  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+  // x-register.
+  //
+  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+  // integers.
+  bool IsExact;
+
+  // fbits is between 1 and 64 in the worst-case, which means the fmul
+  // could have 2^64 as an actual operand. Need 65 bits of precision.
+  APSInt IntVal(65, true);
+  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+
+  // N.b. isPowerOf2 also checks for > 0.
+  if (!IsExact || !IntVal.isPowerOf2()) return false;
+  unsigned FBits = IntVal.logBase2();
+
+  // Checks above should have guaranteed that we haven't lost information in
+  // finding FBits, but it must still be in range.
+  if (FBits == 0 || FBits > RegWidth) return false;
+
+  FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
+  return true;
+}
+
 SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(errs() << "Selecting: ");
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td
index 2279f9be82e..1e3b3bc31ce 100644
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ b/lib/Target/ARM64/ARM64InstrFormats.td
@@ -304,17 +304,28 @@ def movk_symbol_g0 : Operand<i32> {
   let ParserMatchClass = MovKSymbolG0AsmOperand;
 }
 
-def fixedpoint32 : Operand<i32> {
+class fixedpoint_i32<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm, ld]> {
   let EncoderMethod = "getFixedPointScaleOpValue";
   let DecoderMethod = "DecodeFixedPointScaleImm32";
   let ParserMatchClass = Imm1_32Operand;
 }
-def fixedpoint64 : Operand<i64> {
+
+class fixedpoint_i64<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm, ld]> {
   let EncoderMethod = "getFixedPointScaleOpValue";
   let DecoderMethod = "DecodeFixedPointScaleImm64";
   let ParserMatchClass = Imm1_64Operand;
 }
 
+def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
+def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
+
+def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
+def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
+
 def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
   return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
 }]> {
@@ -3086,9 +3097,9 @@ class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
                       RegisterClass srcType, RegisterClass dstType,
-                      Operand immType, string asm>
+                      Operand immType, string asm, list<dag> pattern>
     : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
-         asm, "\t$Rd, $Rn, $scale", "", []>,
+         asm, "\t$Rd, $Rn, $scale", "", pattern>,
       Sched<[WriteFCvt]> {
   bits<5> Rd;
   bits<5> Rn;
@@ -3132,30 +3143,38 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
 }
 
 multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
-           SDPatternOperator OpN> {
+                             SDPatternOperator OpN> {
   // Scaled single-precision to 32-bit
   def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
-                              fixedpoint32, asm> {
+                              fixedpoint_f32_i32, asm,
+              [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn,
+                                          fixedpoint_f32_i32:$scale)))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let scale{5} = 1;
   }
 
   // Scaled single-precision to 64-bit
   def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64,
-                              fixedpoint64, asm> {
+                              fixedpoint_f32_i64, asm,
+              [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn,
+                                          fixedpoint_f32_i64:$scale)))]> {
     let Inst{31} = 1; // 64-bit GPR flag
   }
 
   // Scaled double-precision to 32-bit
   def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32,
-                              fixedpoint32, asm> {
+                              fixedpoint_f64_i32, asm,
+              [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn,
+                                          fixedpoint_f64_i32:$scale)))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let scale{5} = 1;
   }
 
   // Scaled double-precision to 64-bit
   def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64,
-                              fixedpoint64, asm> {
+                              fixedpoint_f64_i64, asm,
+              [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn,
+                                          fixedpoint_f64_i64:$scale)))]> {
     let Inst{31} = 1; // 64-bit GPR flag
   }
 }
@@ -3167,9 +3186,9 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
 let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
 class BaseIntegerToFP<bit isUnsigned,
                       RegisterClass srcType, RegisterClass dstType,
-                      Operand immType, string asm>
+                      Operand immType, string asm, list<dag> pattern>
     : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
-         asm, "\t$Rd, $Rn, $scale", "", []>,
+         asm, "\t$Rd, $Rn, $scale", "", pattern>,
       Sched<[WriteFCvt]> {
   bits<5> Rd;
   bits<5> Rn;
@@ -3222,24 +3241,36 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
   }
 
   // Scaled
-  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint32, asm> {
+  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+                             [(set FPR32:$Rd,
+                                   (fdiv (node GPR32:$Rn),
+                                         fixedpoint_f32_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{22} = 0; // 32-bit FPR flag
     let scale{5} = 1;
   }
 
-  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint32, asm> {
+  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+                             [(set FPR64:$Rd,
+                                   (fdiv (node GPR32:$Rn),
+                                         fixedpoint_f64_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{22} = 1; // 64-bit FPR flag
     let scale{5} = 1;
   }
 
-  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint64, asm> {
+  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+                             [(set FPR32:$Rd,
+                                   (fdiv (node GPR64:$Rn),
+                                         fixedpoint_f32_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{22} = 0; // 32-bit FPR flag
   }
 
-  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint64, asm> {
+  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+                             [(set FPR64:$Rd,
+                                   (fdiv (node GPR64:$Rn),
+                                         fixedpoint_f64_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{22} = 1; // 64-bit FPR flag
   }
diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll
index b754b73517c..5d7c83ebfb3 100644
--- a/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0
 
 ; (The O0 test is to make sure FastISel still constrains its operands properly
-- 
2.34.1