From: Dale Johannesen <dalej@apple.com>
Date: Tue, 21 Oct 2008 20:50:01 +0000 (+0000)
Subject: Add an SSE2 algorithm for uint64->f64 conversion.
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=1c15bf58a3700d40a6a88ac0ce14a2c99f111483;p=oota-llvm.git

Add an SSE2 algorithm for uint64->f64 conversion.
The same one Apple gcc uses, faster.  Also gets the
extreme case in gcc.c-torture/execute/ieee/rbug.c
correct which we weren't before; this is not
sufficient to get the test to pass though, there
is another bug.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57926 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 53d12b224d9..96b427bd806 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5473,6 +5473,22 @@ ExpandIntToFP(bool isSigned, MVT DestTy, SDValue Source) {
       Hi = Source;
     }
 
+    // Check to see if the target has a custom way to lower this.  If so, use it.
+    // (Note we've already expanded the operand in this case.)
+    switch (TLI.getOperationAction(ISD::UINT_TO_FP, SourceVT)) {
+    default: assert(0 && "This action not implemented for this operation!");
+    case TargetLowering::Legal:
+    case TargetLowering::Expand:
+      break;   // This case is handled below.
+    case TargetLowering::Custom: {
+      SDValue NV = TLI.LowerOperation(DAG.getNode(ISD::UINT_TO_FP, DestTy,
+                                                    Source), DAG);
+      if (NV.getNode())
+        return LegalizeOp(NV);
+      break;   // The target decided this was legal after all
+    }
+    }
+
     // If this is unsigned, and not supported, first perform the conversion to
     // signed, then adjust the result if the sign bit is set.
     SDValue SignedConv = ExpandIntToFP(true, DestTy, Source);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index eee38406505..9ad02c8cb22 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -112,10 +112,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
   } else {
-    if (X86ScalarSSEf64)
+    if (X86ScalarSSEf64) {
+      // We have an impenetrably clever algorithm for ui64->double only.
+      setOperationAction(ISD::UINT_TO_FP   , MVT::i64  , Custom);
       // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
-    else
+    } else
       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
   }
 
@@ -4686,6 +4688,70 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  MVT SrcVT = Op.getOperand(0).getValueType();
+  assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!");
+  
+  // We only handle SSE2 f64 target here; caller can handle the rest.
+  if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
+    return SDValue();
+  
+  // Get a XMM-vector-sized stack slot.
+  unsigned Size = 128/8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+  // Build some magic constants.
+  std::vector<Constant*>CV0;
+  CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(APInt(32, 0)));
+  Constant *C0 = ConstantVector::get(CV0);
+  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
+
+  std::vector<Constant*>CV1;
+  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+  Constant *C1 = ConstantVector::get(CV1);
+  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4);
+
+  SmallVector<SDValue, 4> MaskVec;
+  MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+  MaskVec.push_back(DAG.getConstant(4, MVT::i32));
+  MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+  MaskVec.push_back(DAG.getConstant(5, MVT::i32));
+  SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],
+                                   MaskVec.size());
+  SmallVector<SDValue, 4> MaskVec2;
+  MaskVec2.push_back(DAG.getConstant(1, MVT::i64));
+  MaskVec2.push_back(DAG.getConstant(0, MVT::i64));
+  SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, &MaskVec2[0],
+                                 MaskVec2.size());
+
+  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
+                            Op.getOperand(0).getOperand(1));
+  SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
+                            Op.getOperand(0).getOperand(0));
+  SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
+                                XR1, XR2, UnpcklMask);
+  SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0,
+                         PseudoSourceValue::getConstantPool(), 0, false, 16);
+  SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
+                                Unpck1, CLod0, UnpcklMask);
+  SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2);
+  SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1,
+                         PseudoSourceValue::getConstantPool(), 0, false, 16);
+  SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1);
+  // Add the halves; easiest way is to swap them into another reg first.
+  SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64,
+                             Sub, Sub, ShufMask);
+  SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add,
+                     DAG.getIntPtrConstant(0));
+}
+
 std::pair<SDValue,SDValue> X86TargetLowering::
 FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
@@ -6184,6 +6250,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SRA_PARTS:
   case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
   case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FABS:               return LowerFABS(Op, DAG);
   case ISD::FNEG:               return LowerFNEG(Op, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b76ba845e51..0a2f4aa8922 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -558,6 +558,7 @@ namespace llvm {
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
     SDValue LowerShift(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFABS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);