From 6fa2f9c63688ff240d58c80bc0e433bb27a53c85 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 9 Mar 2008 07:05:32 +0000 Subject: [PATCH] rename FpGETRESULT32 -> FpGET_ST0_32 etc. Add support for isel'ing value preserving FP roundings from one fp stack reg to another into a noop, instead of stack traffic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48093 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FloatingPoint.cpp | 10 ++--- lib/Target/X86/X86ISelDAGToDAG.cpp | 17 +++++--- lib/Target/X86/X86ISelLowering.cpp | 9 ++-- lib/Target/X86/X86ISelLowering.h | 8 ++-- lib/Target/X86/X86InstrFPStack.td | 64 +++++++++++++++++------------ 5 files changed, 62 insertions(+), 46 deletions(-) diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 3650825e2b5..8c1fb7bfe8a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -214,7 +214,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { MachineInstr *PrevMI = 0; if (I != BB.begin()) - PrevMI = prior(I); + PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs DOUT << "\nFPInst:\t" << *MI; @@ -917,13 +917,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; switch (MI->getOpcode()) { default: assert(0 && "Unknown SpecialFP instruction!"); - case X86::FpGETRESULT32: // Appears immediately after a call returning FP type! - case X86::FpGETRESULT64: // Appears immediately after a call returning FP type! - case X86::FpGETRESULT80: + case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! + case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! + case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! assert(StackTop == 0 && "Stack should be empty after a call!"); pushReg(getFPReg(MI->getOperand(0))); break; - case X86::FpGETRESULT80x2: + case X86::FpGET_ST0_ST1: assert(StackTop == 0 && "Stack should be empty after a call!"); pushReg(getFPReg(MI->getOperand(0))); pushReg(getFPReg(MI->getOperand(1))); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 363f2bbc78f..6036dcbf2e0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -486,10 +486,15 @@ void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { if (SrcIsSSE && DstIsSSE) continue; - // If this is an FPStack extension (but not a truncation), it is a noop. - if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND) - continue; - + if (!SrcIsSSE && !DstIsSSE) { + // If this is an FPStack extension, it is a noop. + if (N->getOpcode() == ISD::FP_EXTEND) + continue; + // If this is a value-preserving FPStack truncation, it is a noop. + if (N->getConstantOperandVal(1)) + continue; + } + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. @@ -1150,7 +1155,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); - case X86ISD::FP_GET_RESULT2: { + case X86ISD::FP_GET_ST0_ST1: { SDOperand Chain = N.getOperand(0); SDOperand InFlag = N.getOperand(1); AddToISelQueue(Chain); @@ -1161,7 +1166,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { Tys.push_back(MVT::Other); Tys.push_back(MVT::Flag); SDOperand Ops[] = { Chain, InFlag }; - SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys, + SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys, Ops, 2); Chain = SDOperand(ResNode, 2); InFlag = SDOperand(ResNode, 3); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index be7f91c6ae6..5f68d71d7d5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -917,9 +917,8 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, if (isScalarFPTypeInSSEReg(GetResultTy)) GetResultTy = MVT::f80; SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag); - SDOperand GROps[] = { Chain, InFlag }; - SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); + SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2); Chain = RetVal.getValue(1); InFlag = RetVal.getValue(2); @@ -969,7 +968,7 @@ LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag, const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag }; SDVTList Tys = DAG.getVTList(VTs, 4); SDOperand Ops[] = { Chain, InFlag }; - SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2); + SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0_ST1, Tys, Ops, 2); Chain = RetVal.getValue(2); SDOperand FIN = TheCall->getOperand(5); Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0); @@ -5564,8 +5563,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; - case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT"; - case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2"; + case X86ISD::FP_GET_ST0: return "X86ISD::FP_GET_ST0"; + case X86ISD::FP_GET_ST0_ST1: return "X86ISD::FP_GET_ST0_ST1"; case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT"; case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e0ddf63bf0f..9152c203f10 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -84,14 +84,14 @@ namespace llvm { /// as. FST, - /// FP_GET_RESULT - This corresponds to FpGETRESULT pseudo instruction + /// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction /// which copies from ST(0) to the destination. It takes a chain and /// writes a RFP result and a chain. - FP_GET_RESULT, + FP_GET_ST0, - /// FP_GET_RESULT2 - Same as FP_GET_RESULT except it copies two values + /// FP_GET_ST0_ST1 - Same as FP_GET_RESULT except it copies two values /// ST(0) and ST(1). - FP_GET_RESULT2, + FP_GET_ST0_ST1, /// FP_SET_RESULT - This corresponds to FpSETRESULT pseudo instruction /// which copies the source operand to ST(0). It takes a chain+value and diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 2110f75edc2..832ea6177c1 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -18,7 +18,8 @@ //===----------------------------------------------------------------------===// def SDTX86FpGet : SDTypeProfile<1, 0, [SDTCisFP<0>]>; -def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; +def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, + SDTCisVT<1, f80>]>; def SDTX86FpSet : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, @@ -32,9 +33,7 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def X86fpget : SDNode<"X86ISD::FP_GET_RESULT", SDTX86FpGet, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; -def X86fpget2 : SDNode<"X86ISD::FP_GET_RESULT2", SDTX86FpGet2, +def X86fpget_st0 : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; def X86fpset : SDNode<"X86ISD::FP_SET_RESULT", SDTX86FpSet, [SDNPHasChain, SDNPOutFlag]>; @@ -139,17 +138,15 @@ let isTerminator = 1 in // encoding and asm printing info). // Pseudo Instructions for FP stack return values. -def FpGETRESULT32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, - [(set RFP32:$dst, X86fpget)]>; // FPR = ST(0) - -def FpGETRESULT64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, - [(set RFP64:$dst, X86fpget)]>; // FPR = ST(0) - -def FpGETRESULT80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, - [(set RFP80:$dst, X86fpget)]>; // FPR = ST(0) +def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, + [(set RFP32:$dst, X86fpget_st0)]>; // FPR = ST(0) +def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, + [(set RFP64:$dst, X86fpget_st0)]>; // FPR = ST(0) +def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, + [(set RFP80:$dst, X86fpget_st0)]>; // FPR = ST(0) -def FpGETRESULT80x2 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP, - []>; // FPR = ST(0), FPR = ST(1) +def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP, + []>; // FPR = ST(0), FPR = ST(1) let Defs = [ST0] in { @@ -174,15 +171,15 @@ class FpIf64 pattern> : // Register copies. Just copies, the shortening ones do not truncate. let neverHasSideEffects = 1 in { -def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; -def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; -def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; -def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; -def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; -def MOV_Fp8080 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; + def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; + def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; + def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; + def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; + def MOV_Fp8080 : FpI_ <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; } // Factoring for arithmetic. @@ -583,6 +580,21 @@ def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; // Used to conv. i64 to f64 since there isn't a SSE version. def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; -def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>; +// FP extensions map onto simple pseudo-value conversions if they are to/from +// the FP stack. +def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, + Requires<[FPStackf64]>; + +// FP truncations map onto simple pseudo-value conversions if they are to/from +// the FP stack. We have validated that only value-preserving truncations make +// it through isel. +def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>, + Requires<[FPStackf32]>; +def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>, + Requires<[FPStackf64]>; -- 2.34.1