Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to...
authorEvan Cheng <evan.cheng@apple.com>
Mon, 12 Jan 2009 03:19:55 +0000 (03:19 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Mon, 12 Jan 2009 03:19:55 +0000 (03:19 +0000)
Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62074 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/CodeGen/ScheduleDAG.h
lib/CodeGen/ScheduleDAGEmit.cpp
lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
test/CodeGen/X86/pr3244.ll [new file with mode: 0644]

index 03d11e2d8287dd6e278d465447cffe7d0b144802..765c26a594134814f8bdc6ee2cb4baeb49d94988 100644 (file)
@@ -485,7 +485,7 @@ namespace llvm {
   protected:
     void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
 
-    void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
 
     /// ForceUnitLatencies - Return true if all scheduling edges should be given a
     /// latency value of one.  The default is to return false; schedulers may
index d10d670d346634a9ea5744516339541e5d77de23..1f40771e3bd1ae09653021c02563654cad2b2382 100644 (file)
@@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() {
   TII->insertNoop(*BB, BB->end());
 }
 
-void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
                                   DenseMap<SUnit*, unsigned> &VRBaseMap) {
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
@@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
       unsigned Reg = 0;
       for (SUnit::const_succ_iterator II = SU->Succs.begin(),
              EE = SU->Succs.end(); II != EE; ++II) {
-        if (I->getReg()) {
-          Reg = I->getReg();
+        if (II->getReg()) {
+          Reg = II->getReg();
           break;
         }
       }
-      assert(I->getReg() && "Unknown physical register!");
       TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
                         SU->CopyDstRC, SU->CopySrcRC);
     } else {
index 113dfb1751dcadb9bf585921f37d0326716e7173..b86492992c4ea6615f24c55562cac492e0569ed4 100644 (file)
@@ -28,7 +28,7 @@ using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
-STATISTIC(NumCCCopies,   "Number of cross class copies");
+STATISTIC(NumPRCopies,   "Number of physical copies");
 
 static RegisterScheduler
   fastDAGScheduler("fast", "Fast suboptimal list scheduling",
@@ -93,10 +93,10 @@ private:
   void ReleasePred(SUnit *SU, SDep *PredEdge);
   void ScheduleNodeBottomUp(SUnit*, unsigned);
   SUnit *CopyAndMoveSuccessors(SUnit*);
-  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
-                                  const TargetRegisterClass*,
-                                  const TargetRegisterClass*,
-                                  SmallVector<SUnit*, 2>&);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
 
@@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   ++NumDups;
   return NewSU;
 }
 
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
@@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
 
-  ++NumCCCopies;
+  ++NumPRCopies;
 }
 
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
         if (!NewDef) {
-          // Issue expensive cross register class copies.
-          MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(Reg, VT);
-          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-          if (!DestRC) {
-            assert(false && "Don't know how to copy this physical register!");
-            abort();
-          }
+          // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
-          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DOUT << "Adding an edge from SU # " << TrySU->NodeNum
                << " to SU #" << Copies.front()->NodeNum << "\n";
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
index 65de7a5c8f8263a0d8b10df5fa45f952c014f056..bc5443eaba8fbd3eebc8ee7b876b7d0dfbba220e 100644 (file)
@@ -35,7 +35,7 @@ using namespace llvm;
 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
-STATISTIC(NumCCCopies,   "Number of cross class copies");
+STATISTIC(NumPRCopies,   "Number of physical register copies");
 
 static RegisterScheduler
   burrListDAGScheduler("list-burr",
@@ -121,10 +121,10 @@ private:
   void UnscheduleNodeBottomUp(SUnit*);
   void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
   SUnit *CopyAndMoveSuccessors(SUnit*);
-  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
-                                  const TargetRegisterClass*,
-                                  const TargetRegisterClass*,
-                                  SmallVector<SUnit*, 2>&);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleTopDown();
   void ListScheduleBottomUp();
@@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
   return NewSU;
 }
 
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
-                                              const TargetRegisterClass *DestRC,
-                                              const TargetRegisterClass *SrcRC,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                               const TargetRegisterClass *DestRC,
+                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
   SUnit *CopyFromSU = CreateNewSUnit(NULL);
   CopyFromSU->CopySrcRC = SrcRC;
@@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
       DelDeps.push_back(std::make_pair(SuccSU, *I));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
   AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
@@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
 
-  ++NumCCCopies;
+  ++NumPRCopies;
 }
 
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
       }
 
       if (!CurSU) {
-        // Can't backtrack. Try duplicating the nodes that produces these
-        // "expensive to copy" values to break the dependency. In case even
-        // that doesn't work, insert cross class copies.
+        // Can't backtrack. If it's too expensive to copy the value, then try
+        // duplicate the nodes that produces these "too expensive to copy"
+        // values to break the dependency. In case even that doesn't work,
+        // insert cross class copies.
+        // If it's not too expensive, i.e. cost != -1, issue copies.
         SUnit *TrySU = NotReady[0];
         SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
         if (!NewDef) {
-          // Issue expensive cross register class copies.
-          MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(Reg, VT);
-          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-          if (!DestRC) {
-            assert(false && "Don't know how to copy this physical register!");
-            abort();
-          }
+          // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
-          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DOUT << "Adding an edge from SU #" << TrySU->NodeNum
                << " to SU #" << Copies.front()->NodeNum << "\n";
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
index a8d904c2f67327cf248a3cf11ff6d2153425d3e9..c755086a8d5db0a6df0697b3d7b4e0eac1e1d2db 100644 (file)
@@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
 
 /// CheckForPhysRegDependency - Check if the dependency between def and use of
 /// a specified operand is a physical register dependency. If so, returns the
-/// register.
+/// register and the cost of copying the register.
 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
                                       const TargetRegisterInfo *TRI, 
                                       const TargetInstrInfo *TII,
-                                      unsigned &PhysReg) {
+                                      unsigned &PhysReg, int &Cost) {
   if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
     return;
 
@@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
   if (Def->isMachineOpcode()) {
     const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
     if (ResNo >= II.getNumDefs() &&
-        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
       PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
   }
 }
 
@@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         bool isChain = OpVT == MVT::Other;
 
         unsigned PhysReg = 0;
+        int Cost = 1;
         // Determine if this is a physical register dependency.
-        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg);
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
         assert((PhysReg == 0 || !isChain) &&
                "Chain dependence via physreg data?");
+        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+        // emits a copy from the physical register to a virtual register unless
+        // it requires a cross class copy (cost < 0). That means we are only
+        // treating "expensive to copy" register dependency as physical register
+        // dependency. This may change in the future though.
+        if (Cost >= 0)
+          PhysReg = 0;
         SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                          OpSU->Latency, PhysReg));
       }
@@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
 
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
-  if (SU->getNode())
-    SU->getNode()->dump(DAG);
-  else
-    cerr << "CROSS RC COPY ";
+  if (!SU->getNode()) {
+    cerr << "PHYS REG COPY\n";
+    return;
+  }
+
+  SU->getNode()->dump(DAG);
   cerr << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
index ebe084d811f5bba7515de2697f856492b5fe6809..d6179651589c1b72faefae1f5362a3dcf7bd57e3 100644 (file)
@@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
 
     // For pre-regalloc scheduling, create instructions corresponding to the
     // SDNode and any flagged SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
     SmallVector<SDNode *, 4> FlaggedNodes;
     for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
@@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
       EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
       FlaggedNodes.pop_back();
     }
-    if (!SU->getNode())
-      EmitCrossRCCopy(SU, CopyVRBaseMap);
-    else
-      EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
+    EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
   }
 
   return BB;
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
new file mode 100644 (file)
index 0000000..0765f86
--- /dev/null
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llc -march=x86
+; PR3244
+
+@g_62 = external global i16             ; <i16*> [#uses=1]
+@g_487 = external global i32            ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
+        %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
+        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind             ; <i32> [#uses=1]
+        %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
+        %6 = mul i8 %3, %2              ; <i8> [#uses=1]
+        %7 = mul i8 %6, %5              ; <i8> [#uses=1]
+        %8 = sext i8 %7 to i16          ; <i16> [#uses=1]
+        %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind     
+        ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)