R600: Make sure to schedule AR register uses and defs in the same clause
authorTom Stellard <thomas.stellard@amd.com>
Wed, 5 Jun 2013 03:43:06 +0000 (03:43 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Wed, 5 Jun 2013 03:43:06 +0000 (03:43 +0000)
Reviewed-by: vljn at ovi.com
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183294 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/R600/R600InstrInfo.cpp
lib/Target/R600/R600MachineScheduler.cpp
lib/Target/R600/R600MachineScheduler.h
test/CodeGen/R600/indirect-addressing.ll [new file with mode: 0644]

index 2a4a2459015b788646bcf2bf5e7f9eefc0468a7d..d915f40a6265f8ee3e086e6c567e1fc5fa0cac6f 100644 (file)
@@ -816,7 +816,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
 
   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
                                       AddrReg, ValueReg)
-                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+                                      .addReg(AMDGPU::AR_X,
+                                           RegState::Implicit | RegState::Kill);
   setImmOperand(Mov, R600Operands::DST_REL, 1);
   return Mov;
 }
@@ -833,7 +834,8 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
   MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
                                       ValueReg,
                                       AddrReg)
-                                      .addReg(AMDGPU::AR_X, RegState::Implicit);
+                                      .addReg(AMDGPU::AR_X,
+                                           RegState::Implicit | RegState::Kill);
   setImmOperand(Mov, R600Operands::SRC0_REL, 1);
 
   return Mov;
index 8d61b8c610b25866fae8a0e599b8f54515298757..9469e0fc6b338d85f1667a3ba51f8c5346888d5d 100644 (file)
@@ -59,8 +59,16 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
   bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
       (!Available[IDFetch].empty() || !Available[IDOther].empty());
 
-  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
-      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+  // We want to scheduled AR defs as soon as possible to make sure they aren't
+  // put in a different ALU clause from their uses.
+  if (!SU && !UnscheduledARDefs.empty()) {
+      SU = UnscheduledARDefs[0];
+      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
+      NextInstKind = IDAlu;
+  }
+
+  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
     // try to pick ALU
     SU = pickAlu();
     if (SU) {
@@ -84,6 +92,15 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
       NextInstKind = IDOther;
   }
 
+  // We want to schedule the AR uses as late as possible to make sure that
+  // the AR defs have been released.
+  if (!SU && !UnscheduledARUses.empty()) {
+      SU = UnscheduledARUses[0];
+      UnscheduledARUses.erase(UnscheduledARUses.begin());
+      NextInstKind = IDAlu;
+  }
+
+
   DEBUG(
       if (SU) {
         dbgs() << " ** Pick node **\n";
@@ -149,6 +166,21 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
   DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
 
   int IK = getInstKind(SU);
+
+  // Check for AR register defines
+  for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
+                                        E = SU->getInstr()->operands_end();
+                                        I != E; ++I) {
+    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
+      if (I->isDef()) {
+        UnscheduledARDefs.push_back(SU);
+      } else {
+        UnscheduledARUses.push_back(SU);
+      }
+      return;
+    }
+  }
+
   // There is no export clause, we can schedule one as soon as its ready
   if (IK == IDOther)
     Available[IDOther].push_back(SU);
index 814ae9e546cddb11698be69cc107192afa5e149d..4dedf70d5f75764000c4770d7b105cd9360b27a4 100644 (file)
@@ -52,6 +52,8 @@ class R600SchedStrategy : public MachineSchedStrategy {
 
   std::vector<SUnit *> Available[IDLast], Pending[IDLast];
   std::vector<SUnit *> AvailableAlus[AluLast];
+  std::vector<SUnit *> UnscheduledARDefs;
+  std::vector<SUnit *> UnscheduledARUses;
 
   InstKind CurInstKind;
   int CurEmitted;
diff --git a/test/CodeGen/R600/indirect-addressing.ll b/test/CodeGen/R600/indirect-addressing.ll
new file mode 100644 (file)
index 0000000..7291cb4
--- /dev/null
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; This test checks that uses and defs of the AR register happen in the same
+; instruction clause.
+
+; CHECK: @mova_same_clause
+; CHECK: MOVA_INT
+; CHECK-NOT: ALU clause
+; CHECK: 0 + AR.x
+; CHECK: MOVA_INT
+; CHECK-NOT: ALU clause
+; CHECK: 0 + AR.x
+
+define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+entry:
+  %stack = alloca [5 x i32], align 4
+  %0 = load i32 addrspace(1)* %in, align 4
+  %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
+  store i32 4, i32* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
+  %1 = load i32 addrspace(1)* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
+  store i32 5, i32* %arrayidx3, align 4
+  %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
+  %2 = load i32* %arrayidx10, align 4
+  store i32 %2, i32 addrspace(1)* %out, align 4
+  %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
+  %3 = load i32* %arrayidx12
+  %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
+  store i32 %3, i32 addrspace(1)* %arrayidx13
+  ret void
+}