From: Dale Johannesen Date: Tue, 21 Oct 2008 20:50:01 +0000 (+0000) Subject: Add an SSE2 algorithm for uint64->f64 conversion. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=1c15bf58a3700d40a6a88ac0ce14a2c99f111483;p=oota-llvm.git Add an SSE2 algorithm for uint64->f64 conversion. The same one Apple gcc uses, faster. Also gets the extreme case in gcc.c-torture/execute/ieee/rbug.c correct which we weren't before; this is not sufficient to get the test to pass though, there is another bug. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57926 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 53d12b224d9..96b427bd806 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5473,6 +5473,22 @@ ExpandIntToFP(bool isSigned, MVT DestTy, SDValue Source) { Hi = Source; } + // Check to see if the target has a custom way to lower this. If so, use it. + // (Note we've already expanded the operand in this case.) + switch (TLI.getOperationAction(ISD::UINT_TO_FP, SourceVT)) { + default: assert(0 && "This action not implemented for this operation!"); + case TargetLowering::Legal: + case TargetLowering::Expand: + break; // This case is handled below. + case TargetLowering::Custom: { + SDValue NV = TLI.LowerOperation(DAG.getNode(ISD::UINT_TO_FP, DestTy, + Source), DAG); + if (NV.getNode()) + return LegalizeOp(NV); + break; // The target decided this was legal after all + } + } + // If this is unsigned, and not supported, first perform the conversion to // signed, then adjust the result if the sign bit is set. SDValue SignedConv = ExpandIntToFP(true, DestTy, Source); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index eee38406505..9ad02c8cb22 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -112,10 +112,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); } else { - if (X86ScalarSSEf64) + if (X86ScalarSSEf64) { + // We have an impenetrably clever algorithm for ui64->double only. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); - else + } else setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); } @@ -4686,6 +4688,70 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return Result; } +SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + MVT SrcVT = Op.getOperand(0).getValueType(); + assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!"); + + // We only handle SSE2 f64 target here; caller can handle the rest. + if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) + return SDValue(); + + // Get a XMM-vector-sized stack slot. + unsigned Size = 128/8; + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + // Build some magic constants. + std::vectorCV0; + CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); + CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); + CV0.push_back(ConstantInt::get(APInt(32, 0))); + CV0.push_back(ConstantInt::get(APInt(32, 0))); + Constant *C0 = ConstantVector::get(CV0); + SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4); + + std::vectorCV1; + CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); + CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); + Constant *C1 = ConstantVector::get(CV1); + SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4); + + SmallVector MaskVec; + MaskVec.push_back(DAG.getConstant(0, MVT::i32)); + MaskVec.push_back(DAG.getConstant(4, MVT::i32)); + MaskVec.push_back(DAG.getConstant(1, MVT::i32)); + MaskVec.push_back(DAG.getConstant(5, MVT::i32)); + SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0], + MaskVec.size()); + SmallVector MaskVec2; + MaskVec2.push_back(DAG.getConstant(1, MVT::i64)); + MaskVec2.push_back(DAG.getConstant(0, MVT::i64)); + SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, &MaskVec2[0], + MaskVec2.size()); + + SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, + Op.getOperand(0).getOperand(1)); + SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32, + Op.getOperand(0).getOperand(0)); + SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, + XR1, XR2, UnpcklMask); + SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0, + PseudoSourceValue::getConstantPool(), 0, false, 16); + SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, + Unpck1, CLod0, UnpcklMask); + SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2); + SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1, + PseudoSourceValue::getConstantPool(), 0, false, 16); + SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1); + // Add the halves; easiest way is to swap them into another reg first. + SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64, + Sub, Sub, ShufMask); + SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add, + DAG.getIntPtrConstant(0)); +} + std::pair X86TargetLowering:: FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) { assert(Op.getValueType().getSimpleVT() <= MVT::i64 && @@ -6184,6 +6250,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShift(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index b76ba845e51..0a2f4aa8922 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -558,6 +558,7 @@ namespace llvm { SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); SDValue LowerShift(SDValue Op, SelectionDAG &DAG); SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); SDValue LowerFABS(SDValue Op, SelectionDAG &DAG); SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG);