Eliminate the FP_GET_ST0/FP_SET_ST0 target-specific dag nodes, just lower to

author Chris Lattner <sabre@nondot.org>

Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index e2dc29c7369e8ce6e80864db78aa5bfc4a759d07..179f600d4964c948ae4718289f500948f365ce67 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -854,28 +854,18 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
    SDOperand Flag;
  
    // Copy the result values into the output registers.
-  if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
-      RVLocs[0].getLocReg() != X86::ST0) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i) {
-      CCValAssign &VA = RVLocs[i];
-      assert(VA.isRegLoc() && "Can only return in registers!");
-      Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
-                               Flag);
-      Flag = Chain.getValue(1);
-    }
-  } else {
-    // We need to handle a destination of ST0 specially, because it isn't really
-    // a register.
-    SDOperand Value = Op.getOperand(1);
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDOperand ValToCopy = Op.getOperand(i*2+1);
      
-    // an XMM register onto the fp-stack.  Do this with an FP_EXTEND to f80.
-    // This will get legalized into a load/store if it can't get optimized away.
-    if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
-      Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
+    // If this is a copy from an xmm register to ST(0), use an FPExtend to
+    // change the value to the FP stack register class.
+    if (RVLocs[i].getLocReg() == X86::ST0 &&
+        isScalarFPTypeInSSEReg(RVLocs[i].getValVT()))
+      ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy);
      
-    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
-    SDOperand Ops[] = { Chain, Value };
-    Chain = DAG.getNode(X86ISD::FP_SET_ST0, Tys, Ops, 2);
+    Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
      Flag = Chain.getValue(1);
    }
    
@@ -905,37 +895,31 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
    SmallVector<SDOperand, 8> ResultVals;
    
    // Copy all of the result registers out of their specified physreg.
-  if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i) {
-      Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
-                                 RVLocs[i].getValVT(), InFlag).getValue(1);
-      InFlag = Chain.getValue(2);
-      ResultVals.push_back(Chain.getValue(0));
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    MVT::ValueType CopyVT = RVLocs[i].getValVT();
+    
+    // If this is a call to a function that returns an fp value on the floating
+    // point stack, but where we prefer to use the value in xmm registers, copy
+    // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+    if (RVLocs[i].getLocReg() == X86::ST0 &&
+        isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+      CopyVT = MVT::f80;
      }
-  } else {
-    // Copies from the FP stack are special, as ST0 isn't a valid register
-    // before the fp stackifier runs.
      
-    // Copy ST0 into an RFP register with FP_GET_RESULT.  If this will end up
-    // in an SSE register, copy it out as F80 and do a truncate, otherwise use
-    // the specified value type.
-    MVT::ValueType GetResultTy = RVLocs[0].getValVT();
-    if (isScalarFPTypeInSSEReg(GetResultTy))
-      GetResultTy = MVT::f80;
-    SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
-    SDOperand GROps[] = { Chain, InFlag };
-    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_ST0, Tys, GROps, 2);
-    Chain  = RetVal.getValue(1);
-    InFlag = RetVal.getValue(2);
-
-    // If we want the result in an SSE register, use an FP_TRUNCATE to get it
-    // there.
-    if (GetResultTy != RVLocs[0].getValVT())
-      RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
-                           // This truncation won't change the value.
-                           DAG.getIntPtrConstant(1));
+    Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
+                               CopyVT, InFlag).getValue(1);
+    SDOperand Val = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    if (CopyVT != RVLocs[i].getValVT()) {
+      // Round the F80 the right size, which also moves to the appropriate xmm
+      // register.
+      Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val,
+                        // This truncation won't change the value.
+                        DAG.getIntPtrConstant(1));
+    }
      
-    ResultVals.push_back(RetVal);
+    ResultVals.push_back(Val);
    }
    
    // Merge everything together with a MERGE_VALUES node.
@@ -5573,9 +5557,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
    case X86ISD::FLD:                return "X86ISD::FLD";
    case X86ISD::FST:                return "X86ISD::FST";
-  case X86ISD::FP_GET_ST0:         return "X86ISD::FP_GET_ST0";
    case X86ISD::FP_GET_ST0_ST1:     return "X86ISD::FP_GET_ST0_ST1";
-  case X86ISD::FP_SET_ST0:         return "X86ISD::FP_SET_ST0";
    case X86ISD::CALL:               return "X86ISD::CALL";
    case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
    case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index cb8d94d73cb1953719fa03f60a0953859371d9c9..fef9be312667bf6741e51c49ed4149874659c389 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -84,20 +84,10 @@ namespace llvm {
        /// as.
        FST,
  
-      /// FP_GET_ST0 - This corresponds to FpGET_ST0 pseudo instruction
-      /// which copies from ST(0) to the destination. It takes a chain and
-      /// writes a RFP result and a chain.
-      FP_GET_ST0,
-
        /// FP_GET_ST0_ST1 - Same as FP_GET_ST0 except it copies two values
        /// ST(0) and ST(1).
        FP_GET_ST0_ST1,
  
-      /// FP_SET_ST0 - This corresponds to FpSET_ST0 pseudo instruction
-      /// which copies the source operand to ST(0). It takes a chain+value and
-      /// returns a chain and a flag.
-      FP_SET_ST0,
-
        /// CALL/TAILCALL - These operations represent an abstract X86 call
        /// instruction, which includes a bunch of information.  In particular the
        /// operands of these node are:
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td

index 479aa183a007f471df22c5f3a27933f34db925b3..d37ecf80899df834130b1452d7e92e575744cc9c 100644 (file)
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -17,10 +17,8 @@
  // FPStack specific DAG Nodes.
  //===----------------------------------------------------------------------===//
  
-def SDTX86FpGet     : SDTypeProfile<1, 0, [SDTCisFP<0>]>;
  def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, 
                                             SDTCisVT<1, f80>]>;
-def SDTX86FpSet     : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
  def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
                                             SDTCisPtrTy<1>, 
                                             SDTCisVT<2, OtherVT>]>;
@@ -33,10 +31,6 @@ def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
  
  def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
  
-def X86fpget_st0    : SDNode<"X86ISD::FP_GET_ST0", SDTX86FpGet,
-                             [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
-def X86fpset        : SDNode<"X86ISD::FP_SET_ST0", SDTX86FpSet,
-                             [SDNPHasChain, SDNPOutFlag]>;
  def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
                               [SDNPHasChain, SDNPMayLoad]>;
  def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
@@ -138,26 +132,18 @@ let isTerminator = 1 in
  // encoding and asm printing info).
  
  // Pseudo Instructions for FP stack return values.
-def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP,
-                        [(set RFP32:$dst, X86fpget_st0)]>;       // FPR = ST(0)
-def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP,
-                        [(set RFP64:$dst, X86fpget_st0)]>;       // FPR = ST(0)
-def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP,
-                        [(set RFP80:$dst, X86fpget_st0)]>;       // FPR = ST(0)
+def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
  
  def FpGET_ST0_ST1 : FpI_<(outs RFP80:$dst1, RFP80:$dst2), (ins), SpecialFP,
                           []>;                        // FPR = ST(0), FPR = ST(1)
  
  
  let Defs = [ST0] in {
-def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP,
-                        [(X86fpset RFP32:$src)]>;// ST(0) = FPR
-
-def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP,
-                        [(X86fpset RFP64:$src)]>;// ST(0) = FPR
-
-def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP,
-                        [(X86fpset RFP80:$src)]>;// ST(0) = FPR
+def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
  }
  
  // FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td

index cb376c03cb81bb230833370ea66ee10951ff2ce6..93d2a510fbc4fe8b067f1ad51f49530e44818a63 100644 (file)
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -481,14 +481,14 @@ def FR64 : RegisterClass<"X86", [f64], 64,
  // faster on common hardware.  In reality, this should be controlled by a
  // command line option or something.
  
-def RFP32 : RegisterClass<"X86", [f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP64 : RegisterClass<"X86", [f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP80 : RegisterClass<"X86", [f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
  
  // Floating point stack registers (these are not allocatable by the
  // register allocator - the floating point stackifier is responsible
  // for transforming FPn allocations to STn registers)
-def RST : RegisterClass<"X86", [f80], 32,
+def RST : RegisterClass<"X86", [f80, f64, f32], 32,
                          [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
      let MethodProtos = [{
      iterator allocation_order_end(const MachineFunction &MF) const;
author	Chris Lattner <sabre@nondot.org>
	Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 10 Mar 2008 21:08:41 +0000 (21:08 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrFPStack.td		patch \| blob \| history
lib/Target/X86/X86RegisterInfo.td		patch \| blob \| history