PowerPC: Simplify FADD in round-to-zero mode.

author Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)

committer Ulrich Weigand <ulrich.weigand@de.ibm.com>

Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)
author Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)
committer Ulrich Weigand <ulrich.weigand@de.ibm.com>
Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 6631e4ab8f3c09829dfdb16478082799907e990c..5ff23d1cef2edb6cf8dd4b3ccfa7cc6db1825697 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -584,10 +584,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::STCX:            return "PPCISD::STCX";
    case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
    case PPCISD::MFFS:            return "PPCISD::MFFS";
-  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
-  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
    case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
-  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
    case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
    case PPCISD::CR6SET:          return "PPCISD::CR6SET";
    case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
@@ -5667,50 +5664,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                               MVT::f64, N->getOperand(0),
                               DAG.getIntPtrConstant(1));
  
-    // This sequence changes FPSCR to do round-to-zero, adds the two halves
-    // of the long double, and puts FPSCR back the way it was.  We do not
-    // actually model FPSCR.
-    std::vector<EVT> NodeTys;
-    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
-
-    NodeTys.push_back(MVT::f64);   // Return register
-    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
-    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
-    MFFSreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(31, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = DAG.getConstant(30, MVT::i32);
-    Ops[1] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
-    InFlag = Result.getValue(0);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);    // result of add
-    NodeTys.push_back(MVT::Glue);   // Returns a flag
-    Ops[0] = Lo;
-    Ops[1] = Hi;
-    Ops[2] = InFlag;
-    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
-    FPreg = Result.getValue(0);
-    InFlag = Result.getValue(1);
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::f64);
-    Ops[0] = DAG.getConstant(1, MVT::i32);
-    Ops[1] = MFFSreg;
-    Ops[2] = FPreg;
-    Ops[3] = InFlag;
-    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
-    FPreg = Result.getValue(0);
+    // Add the two halves of the long double in round-to-zero mode.
+    SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
  
      // We know the low half is about to be thrown away, so just use something
      // convenient.
@@ -6523,6 +6478,30 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      BB = exitMBB;
      BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
        .addReg(ShiftReg);
+  } else if (MI->getOpcode() == PPC::FADDrtz) {
+    // This pseudo performs an FADD with rounding mode temporarily forced
+    // to round-to-zero.  We emit this via custom inserter since the FPSCR
+    // is not modeled at the SelectionDAG level.
+    unsigned Dest = MI->getOperand(0).getReg();
+    unsigned Src1 = MI->getOperand(1).getReg();
+    unsigned Src2 = MI->getOperand(2).getReg();
+    DebugLoc dl   = MI->getDebugLoc();
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+    // Save FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+    // Set rounding mode to round-to-zero.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+    // Perform addition.
+    BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+    // Restore FPSCR value.
+    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
    } else {
      llvm_unreachable("Unexpected instr type to insert");
    }
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index da438a5aa7f2af1945e16f914969f5a4f578f31b..6296400ec719afcf9dc640c39f6f9b3918595b2a 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -142,26 +142,13 @@ namespace llvm {
        /// an optional input flag argument.
        COND_BRANCH,
  
-      // The following 5 instructions are used only as part of the
-      // long double-to-int conversion sequence.
-
-      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
-      /// register.
-      MFFS,
-
-      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
-      MTFSB0,
-
-      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
-      MTFSB1,
-
-      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
-      /// rounding towards zero.  It has flags added so it won't move past the
-      /// FPSCR-setting instructions.
+      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+      /// towards zero.  Used only as part of the long double-to-int
+      /// conversion sequence.
        FADDRTZ,
  
-      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
-      MTFSF,
+      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+      MFFS,
  
        /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
        /// reserve indexed. This is used to implement atomic operations.
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td

index c4fd1929502d3b5ee4720e0335a09991c2692a1c..129c5855aa01635654d04d1d63f05982b4ed050e 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -676,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
  // This is probably 1.7.9, but I don't have the reference that uses this
  // numbering scheme...
  class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
-                      string cstr, InstrItinClass itin, list<dag>pattern>
+              InstrItinClass itin, list<dag>pattern>
    : I<opcode, OOL, IOL, asmstr, itin> {
    bits<8> FM;
    bits<5> rT;
  
    bit RC = 0;    // set by isDOT
    let Pattern = pattern;
-  let Constraints = cstr;
  
    let Inst{6} = 0;
    let Inst{7-14}  = FM;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td

index a847cd83ccc7d6ac8b64c1c13163f26d2b8cd8a5..aee41d3d7e0c2a1d7368708dbf1ddfd306ea1e76 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -64,20 +64,13 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
  def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
                         [SDNPHasChain, SDNPMayStore]>;
  
-// This sequence is used for long double->int conversions.  It changes the
-// bits in the FPSCR which is not modelled.  
-def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
-                        [SDNPOutGlue]>;
-def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
-                       [SDNPInGlue, SDNPOutGlue]>;
-def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
-                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
-                        SDTCisVT<3, f64>]>,
-                       [SDNPInGlue]>;
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs   : SDNode<"PPCISD::MFFS",
+                       SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+
  
  def PPCfsel   : SDNode<"PPCISD::FSEL",  
     // Type constraint for fsel.
@@ -1288,27 +1281,23 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
                         "mfocrf $rT, $FXM", SprMFCR>,
              PPC970_DGroup_First, PPC970_Unit_CRU;
  
-// Instructions to manipulate FPSCR.  Only long double handling uses these.
-// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
  
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR.  Note that FPSCR is not modeled at the DAG level.
  let Uses = [RM], Defs = [RM] in { 
    def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
-                         "mtfsb0 $FM", IntMTFSB0,
-                        [(PPCmtfsb0 (i32 imm:$FM))]>,
+                        "mtfsb0 $FM", IntMTFSB0, []>,
                 PPC970_DGroup_Single, PPC970_Unit_FPU;
    def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
-                         "mtfsb1 $FM", IntMTFSB0,
-                        [(PPCmtfsb1 (i32 imm:$FM))]>,
+                        "mtfsb1 $FM", IntMTFSB0, []>,
                 PPC970_DGroup_Single, PPC970_Unit_FPU;
-  // MTFSF does not actually produce an FP result.  We pretend it copies
-  // input reg B to the output.  If we didn't do this it would look like the
-  // instruction had no outputs (because we aren't modelling the FPSCR) and
-  // it would be deleted.
-  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
-                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
-                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
-                         [(set f64:$FRA, (PPCmtfsf (i32 imm:$FM),
-                                                    f64:$rT, f64:$FRB))]>,
+  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+                       "mtfsf $FM, $rT", IntMTFSB0, []>,
                 PPC970_DGroup_Single, PPC970_Unit_FPU;
  }
  let Uses = [RM] in {
@@ -1316,11 +1305,6 @@ let Uses = [RM] in {
                           "mffs $rT", IntMFFS,
                           [(set f64:$rT, (PPCmffs))]>,
                 PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def FADDrtz: AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>,
-               PPC970_DGroup_Single, PPC970_Unit_FPU;
  }
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>
	Tue, 26 Mar 2013 10:56:22 +0000 (10:56 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
lib/Target/PowerPC/PPCInstrFormats.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.td		patch \| blob \| history