Add WIN_FTOL_* psudo-instructions to model the unique calling convention

author Michael J. Spencer <bigcheesegs@gmail.com>

Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)

committer Michael J. Spencer <bigcheesegs@gmail.com>

Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)
author Michael J. Spencer <bigcheesegs@gmail.com>
Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)
committer Michael J. Spencer <bigcheesegs@gmail.com>
Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp

index e3461c82c7a6cc11fb0a447d336190148a9b99bc..5c003885419395bd6479716ff21ffac0ebaf23f5 100644 (file)
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1644,6 +1644,32 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
      return;
    }
  
+  case X86::WIN_FTOL_32:
+  case X86::WIN_FTOL_64: {
+    MachineBasicBlock::iterator InsertPt = MI;
+
+    // Push the operand into ST0.
+    MachineOperand &Op = MI->getOperand(0);
+    assert(Op.isUse() && Op.isReg() &&
+      Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
+    unsigned FPReg = getFPReg(Op);
+    if (Op.isKill())
+      moveToTop(FPReg, I);
+    else
+      duplicateToTop(FPReg, FPReg, I);
+
+    // Emit the call. This will pop the operand.
+    BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+      .addExternalSymbol("_ftol2")
+      .addReg(X86::ST0, RegState::ImplicitKill)
+      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+      .addReg(X86::EDX, RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    --StackTop;
+
+    break;
+  }
+
    case X86::RET:
    case X86::RETI:
      // If RET has an FP register use operand, pass the first one in ST(0) and
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 8c645972e6786bd55f1fe5f163b9acaa5c48a09a..8cb8fbb369ab24503b1a91cf63e1529d5f6edd4e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -187,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
      setLibcallName(RTLIB::SREM_I64, "_allrem");
      setLibcallName(RTLIB::UREM_I64, "_aullrem");
      setLibcallName(RTLIB::MUL_I64, "_allmul");
-    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
-    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
      setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
      setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
      setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
      setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
      setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
-    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+
+    // The _ftol2 runtime function has an unusual calling conv, which
+    // is modeled by a special pseudo-instruction.
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
    }
  
    if (Subtarget->isTargetDarwin()) {
@@ -315,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
        setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
    }
  
+  if (isTargetFTOL()) {
+    // Use the _ftol2 runtime function, which has a pseudo-instruction
+    // to handle its weird calling convention.
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
+  }
+
    // TODO: when we have SSE, these could be more efficient, by using movd/movq.
    if (!X86ScalarSSEf64) {
      setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
@@ -7708,14 +7717,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
  
    EVT DstTy = Op.getValueType();
  
-  if (!IsSigned) {
+  if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
      assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
      DstTy = MVT::i64;
    }
  
    assert(DstTy.getSimpleVT() <= MVT::i64 &&
           DstTy.getSimpleVT() >= MVT::i16 &&
-         "Unknown FP_TO_SINT to lower!");
+         "Unknown FP_TO_INT to lower!");
  
    // These are really Legal.
    if (DstTy == MVT::i32 &&
@@ -7726,26 +7735,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
        isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
      return std::make_pair(SDValue(), SDValue());
  
-  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
-  // stack slot.
+  // We lower FP->int64 either into FISTP64 followed by a load from a temporary
+  // stack slot, or into the FTOL runtime function.
    MachineFunction &MF = DAG.getMachineFunction();
    unsigned MemSize = DstTy.getSizeInBits()/8;
    int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
    SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
  
-
-
    unsigned Opc;
-  switch (DstTy.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
-  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
-  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
-  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
-  }
+  if (!IsSigned && isIntegerTypeFTOL(DstTy))
+    Opc = X86ISD::WIN_FTOL;
+  else
+    switch (DstTy.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+    }
  
    SDValue Chain = DAG.getEntryNode();
    SDValue Value = Op.getOperand(0);
    EVT TheVT = Op.getOperand(0).getValueType();
+  // FIXME This causes a redundant load/store if the SSE-class value is already
+  // in memory, such as if it is on the callstack.
    if (isScalarFPTypeInSSEReg(TheVT)) {
      assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
      Chain = DAG.getStore(Chain, DL, Value, StackSlot,
@@ -7770,12 +7782,23 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                              MachineMemOperand::MOStore, MemSize, MemSize);
  
-  // Build the FP_TO_INT*_IN_MEM
-  SDValue Ops[] = { Chain, Value, StackSlot };
-  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                         Ops, 3, DstTy, MMO);
-
-  return std::make_pair(FIST, StackSlot);
+  if (Opc != X86ISD::WIN_FTOL) {
+    // Build the FP_TO_INT*_IN_MEM
+    SDValue Ops[] = { Chain, Value, StackSlot };
+    SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                           Ops, 3, DstTy, MMO);
+    return std::make_pair(FIST, StackSlot);
+  } else {
+    SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
+      DAG.getVTList(MVT::Other, MVT::Glue),
+      Chain, Value);
+    SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
+      MVT::i32, ftol.getValue(1));
+    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
+      MVT::i32, eax.getValue(2));
+    SDValue pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, eax, edx);
+    return std::make_pair(pair, SDValue());
+  }
  }
  
  SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
@@ -7788,10 +7811,14 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
    // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
    if (FIST.getNode() == 0) return Op;
  
-  // Load the result.
-  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, MachinePointerInfo(),
-                     false, false, false, 0);
+  if (StackSlot.getNode())
+    // Load the result.
+    return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                       FIST, StackSlot, MachinePointerInfo(),
+                       false, false, false, 0);
+  else
+    // The node is the result.
+    return FIST;
  }
  
  SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
@@ -10837,16 +10864,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
    case ISD::SUBE:
      // We don't want to expand or promote these.
      return;
-  case ISD::FP_TO_SINT: {
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: {
+    bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+    if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
+      return;
+
      std::pair<SDValue,SDValue> Vals =
-        FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+        FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned);
      SDValue FIST = Vals.first, StackSlot = Vals.second;
      if (FIST.getNode() != 0) {
        EVT VT = N->getValueType(0);
        // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
-                                    MachinePointerInfo(), 
-                                    false, false, false, 0));
+      if (StackSlot.getNode() != 0)
+        Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                      MachinePointerInfo(),
+                                      false, false, false, 0));
+      else
+        Results.push_back(FIST);
      }
      return;
    }
@@ -11060,6 +11096,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
    case X86ISD::MEMBARRIER:         return "X86ISD::MEMBARRIER";
    case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
+  case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
    }
  }
  
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 00b17d37956559f9a0eb3348c193de370e9a8462..8b6cda5927bda2d8b580381ae4315babf5ea5e33 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -299,6 +299,9 @@ namespace llvm {
        // falls back to heap allocation if not.
        SEG_ALLOCA,
  
+      // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+      WIN_FTOL,
+
        // Memory barrier
        MEMBARRIER,
        MFENCE,
@@ -611,6 +614,18 @@ namespace llvm {
        (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
      }
  
+    /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
+    /// for fptoui.
+    bool isTargetFTOL() const {
+      return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
+    }
+
+    /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
+    /// used for fptoui to the given type.
+    bool isIntegerTypeFTOL(EVT VT) const {
+      return isTargetFTOL() && VT == MVT::i64;
+    }
+
      /// createFastISel - This method returns a target specific FastISel object,
      /// or null if the target does not support "fast" ISel.
      virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td

index ce0a8850831601d44034b754cfd1066a656d0b4a..ac4923291231a41e323656da34917b55d2a33e98 100644 (file)
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -125,10 +125,26 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
                        [(set GR64:$dst,
                           (X86SegAlloca GR64:$size))]>,
                      Requires<[In64BitMode]>;
-
  }
  
+// The MSVC runtime contains an _ftol2 routine for converting floating-point
+// to integer values. It has a strange calling convention: the input is
+// popped from the x87 stack, and the return value is given in EDX:EAX. No
+// other registers (aside from flags) are touched.
+// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
+// variant is unnecessary.
+
+let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
+  def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
+                      "# win32 fptoui",
+                      [(X86WinFTOL RFP32:$src)]>,
+                    Requires<[In32BitMode]>;
  
+  def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
+                      "# win32 fptoui",
+                      [(X86WinFTOL RFP64:$src)]>,
+                    Requires<[In32BitMode]>;
+}
  
  //===----------------------------------------------------------------------===//
  // EH Pseudo Instructions
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index e575cea8046929ea171048cea5ae6bcae381a326..24f5722d554320097a9029f699699608a6d3d356 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -99,6 +99,8 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
  
  def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
  
+def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+
  def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
  
  def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -238,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
  def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                          [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
  
+def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
+                        [SDNPHasChain, SDNPOutGlue]>;
+
  //===----------------------------------------------------------------------===//
  // X86 Operand Definitions.
  //
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll

new file mode 100644 (file)

index 0000000..90d8a31
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
author	Michael J. Spencer <bigcheesegs@gmail.com>
	Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)
committer	Michael J. Spencer <bigcheesegs@gmail.com>
	Fri, 24 Feb 2012 19:01:22 +0000 (19:01 +0000)
lib/Target/X86/X86FloatingPoint.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrCompiler.td		patch \| blob \| history
lib/Target/X86/X86InstrInfo.td		patch \| blob \| history
test/CodeGen/X86/win_ftol2.ll	[new file with mode: 0644]	patch \| blob