Added a late machine instruction copy propagation pass. This catches
authorEvan Cheng <evan.cheng@apple.com>
Sat, 7 Jan 2012 03:02:36 +0000 (03:02 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Sat, 7 Jan 2012 03:02:36 +0000 (03:02 +0000)
opportunities that only present themselves after late optimizations
such as tail duplication .e.g.
## BB#1:
        movl    %eax, %ecx
        movl    %ecx, %eax
        ret

The register allocator also leaves some of them around (due to false
dep between copies from phi-elimination, etc.)

This required some changes in codegen passes. Post-ra scheduler and the
pseudo-instruction expansion passes have been moved after branch folding
and tail merging. They were before branch folding before because it did
not always update block livein's. That's fixed now. The pass change makes
independently since we want to properly schedule instructions after
branch folding / tail duplication.

rdar://10428165
rdar://10640363

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147716 91177308-0d34-0410-b5e6-96231b3b80d8

13 files changed:
include/llvm/CodeGen/Passes.h
include/llvm/InitializePasses.h
lib/CodeGen/AggressiveAntiDepBreaker.cpp
lib/CodeGen/BranchFolding.cpp
lib/CodeGen/CMakeLists.txt
lib/CodeGen/CriticalAntiDepBreaker.cpp
lib/CodeGen/LLVMTargetMachine.cpp
lib/CodeGen/MachineCopyPropagation.cpp [new file with mode: 0644]
lib/CodeGen/RegisterScavenging.cpp
lib/CodeGen/ScheduleDAGInstrs.cpp
lib/CodeGen/ScheduleDAGInstrs.h
test/CodeGen/ARM/code-placement.ll
test/CodeGen/X86/machine-cp.ll [new file with mode: 0644]

index b4f6cac858eb6f3dedc69049d5f20ed086d0815b..f077f2b8d663c3fc0df79467151b9cdc4fdbfc32 100644 (file)
@@ -193,6 +193,10 @@ namespace llvm {
   /// instructions.
   FunctionPass *createMachineSinkingPass();
 
+  /// createMachineCopyPropagationPass - This pass performs copy propagation on
+  /// machine instructions.
+  FunctionPass *createMachineCopyPropagationPass();
+
   /// createPeepholeOptimizerPass - This pass performs peephole optimizations -
   /// like extension and comparison eliminations.
   FunctionPass *createPeepholeOptimizerPass();
index ce87f16c2ec653965471364015cef0f233930493..ed044c0ad94f2170cbc8e1b2bc906b665b37940e 100644 (file)
@@ -79,6 +79,7 @@ void initializeCallGraphAnalysisGroup(PassRegistry&);
 void initializeCodeGenPreparePass(PassRegistry&);
 void initializeConstantMergePass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
+void initializeMachineCopyPropagationPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
 void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
index 6cf4571d730bf820776c4bedbc64cac59827b811..77958d9c95218af8ee7799193fe7ef50838dc483 100644 (file)
@@ -186,7 +186,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     for (const unsigned *Alias = TRI->getOverlaps(Reg);
index 89894c37ee0e629718783050ad0344e8583e7c4f..29e8545030be63148538e75c8cff9f8e8082f879 100644 (file)
@@ -180,7 +180,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
   TRI = tri;
   MMI = mmi;
 
-  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+  RS = new RegScavenger();
 
   // Fix CFG.  The later algorithms expect it to be right.
   bool MadeChange = false;
@@ -368,16 +368,14 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
 
 void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
                                    MachineBasicBlock *NewMBB) {
-  if (RS) {
-    RS->enterBasicBlock(CurMBB);
-    if (!CurMBB->empty())
-      RS->forward(prior(CurMBB->end()));
-    BitVector RegsLiveAtExit(TRI->getNumRegs());
-    RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
-      if (RegsLiveAtExit[i])
-        NewMBB->addLiveIn(i);
-  }
+  RS->enterBasicBlock(CurMBB);
+  if (!CurMBB->empty())
+    RS->forward(prior(CurMBB->end()));
+  BitVector RegsLiveAtExit(TRI->getNumRegs());
+  RS->getRegsUsed(RegsLiveAtExit, false);
+  for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+    if (RegsLiveAtExit[i])
+      NewMBB->addLiveIn(i);
 }
 
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
index ce9d0d4d0d626f09651dd0fd4a5ef616d79e8082..7fa5e47f9ebc05cfb90a47779a3d048e124b1154 100644 (file)
@@ -40,6 +40,7 @@ add_llvm_library(LLVMCodeGen
   MachineBlockPlacement.cpp
   MachineBranchProbabilityInfo.cpp
   MachineCodeEmitter.cpp
+  MachineCopyPropagation.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
index 128143e70fb721f180b4a7f00f5b714b1a8c40ae..5c325396dbefeb54ce39a351d68b244d30672104 100644 (file)
@@ -102,7 +102,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const unsigned *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
index 62227fd4d65005ad3126d06a2fe3e3e89bdfd876..7fd089a5db88ef8642325f69e3ec81a9e0fa981c 100644 (file)
@@ -452,23 +452,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (addPostRegAlloc(PM))
     printAndVerify(PM, "After PostRegAlloc passes");
 
-  PM.add(createExpandPostRAPseudosPass());
-  printAndVerify(PM, "After ExpandPostRAPseudos");
-
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
   PM.add(createPrologEpilogCodeInserter());
   printAndVerify(PM, "After PrologEpilogCodeInserter");
 
-  // Run pre-sched2 passes.
-  if (addPreSched2(PM))
-    printAndVerify(PM, "After PreSched2 passes");
-
-  // Second pass scheduler.
-  if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) {
-    PM.add(createPostRAScheduler(getOptLevel()));
-    printAndVerify(PM, "After PostRAScheduler");
-  }
-
   // Branch folding must be run after regalloc and prolog/epilog insertion.
   if (getOptLevel() != CodeGenOpt::None && !DisableBranchFold) {
     PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
@@ -481,6 +468,26 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     printNoVerify(PM, "After TailDuplicate");
   }
 
+  // Copy propagation.
+  if (getOptLevel() != CodeGenOpt::None) {
+    PM.add(createMachineCopyPropagationPass());
+    printNoVerify(PM, "After copy propagation pass");
+  }
+
+  // Expand pseudo instructions before second scheduling pass.
+  PM.add(createExpandPostRAPseudosPass());
+  printNoVerify(PM, "After ExpandPostRAPseudos");
+
+  // Run pre-sched2 passes.
+  if (addPreSched2(PM))
+    printNoVerify(PM, "After PreSched2 passes");
+
+  // Second pass scheduler.
+  if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) {
+    PM.add(createPostRAScheduler(getOptLevel()));
+    printNoVerify(PM, "After PostRAScheduler");
+  }
+
   PM.add(createGCMachineCodeAnalysisPass());
 
   if (PrintGCInfo)
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644 (file)
index 0000000..861025c
--- /dev/null
@@ -0,0 +1,240 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+  class MachineCopyPropagation : public MachineFunctionPass {
+    const TargetRegisterInfo *TRI;
+    BitVector ReservedRegs;
+    
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MachineCopyPropagation() : MachineFunctionPass(ID) {
+     initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    void SourceNoLongerAvailable(unsigned Reg,
+                               DenseMap<unsigned, unsigned> &SrcMap,
+                               DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+    bool CopyPropagateBlock(MachineBasicBlock &MBB);
+  };
+}
+char MachineCopyPropagation::ID = 0;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+                "Machine Copy Propagation Pass", false, false)
+
+FunctionPass *llvm::createMachineCopyPropagationPass() {
+  return new MachineCopyPropagation();
+}
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+                              DenseMap<unsigned, unsigned> &SrcMap,
+                              DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+  DenseMap<unsigned, unsigned>::iterator SI = SrcMap.find(Reg);
+  if (SI != SrcMap.end()) {
+    unsigned MappedDef = SI->second;
+    // Source of copy is no longer available for propagation.
+    if (AvailCopyMap.erase(MappedDef)) {
+      for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+        AvailCopyMap.erase(*SR);
+    }
+  }
+  for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+    SI = SrcMap.find(*AS);
+    if (SI != SrcMap.end()) {
+      unsigned MappedDef = SI->second;
+      if (AvailCopyMap.erase(MappedDef)) {
+        for (const unsigned *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+          AvailCopyMap.erase(*SR);
+      }
+    }
+  }
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion
+  DenseMap<unsigned, MachineInstr*> AvailCopyMap;   // Def -> available copies map
+  DenseMap<unsigned, MachineInstr*> CopyMap;        // Def -> copies map
+  DenseMap<unsigned, unsigned> SrcMap;              // Src -> Def map
+
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (MI->isCopy()) {
+      unsigned Def = MI->getOperand(0).getReg();
+      unsigned Src = MI->getOperand(1).getReg();
+
+      if (TargetRegisterInfo::isVirtualRegister(Def) ||
+          TargetRegisterInfo::isVirtualRegister(Src))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+      if (CI != AvailCopyMap.end()) {
+        MachineInstr *CopyMI = CI->second;
+        unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+        if (!ReservedRegs.test(Def) &&
+            (SrcSrc == Def || TRI->isSubRegister(SrcSrc, Def))) {
+          // The two copies cancel out and the source of the first copy
+          // hasn't been overridden, eliminate the second one. e.g.
+          //  %ECX<def> = COPY %EAX<kill>
+          //  ... nothing clobbered EAX.
+          //  %EAX<def> = COPY %ECX
+          // =>
+          //  %ECX<def> = COPY %EAX
+          CopyMI->getOperand(1).setIsKill(false);
+          MI->eraseFromParent();
+          Changed = true;
+          ++NumDeletes;
+          continue;
+        }
+      }
+
+      // If Src is defined by a previous copy, it cannot be eliminated.
+      CI = CopyMap.find(Src);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const unsigned *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+
+      // Copy is now a candidate for deletion.
+      MaybeDeadCopies.insert(MI);
+
+      // If 'Src' is previously source of another copy, then this earlier copy's
+      // source is no longer available. e.g.
+      // %xmm9<def> = copy %xmm2
+      // ...
+      // %xmm2<def> = copy %xmm0
+      // ...
+      // %xmm2<def> = copy %xmm9
+      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+      // Remember Def is defined by the copy.
+      CopyMap[Def] = MI;
+      AvailCopyMap[Def] = MI;
+      for (const unsigned *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+        CopyMap[*SR] = MI;
+        AvailCopyMap[*SR] = MI;
+      }
+
+      // Remember source that's copied to Def. Once it's clobbered, then
+      // it's no longer available for copy propagation.
+      SrcMap[Src] = Def;
+
+      continue;
+    }
+
+    // Not a copy.
+    SmallVector<unsigned, 2> Defs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      if (MO.isDef()) {
+        Defs.push_back(Reg);
+        continue;
+      }
+
+      // If 'Reg' is defined by a copy, the copy is no longer a candidate
+      // for elimination.
+      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Reg = Defs[i];
+
+      // No longer defined by a copy.
+      CopyMap.erase(Reg);
+      AvailCopyMap.erase(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CopyMap.erase(*AS);
+        AvailCopyMap.erase(*AS);
+      }
+
+      // If 'Reg' is previously source of a copy, it is no longer available for
+      // copy propagation.
+      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+    }
+  }
+
+  // If MBB doesn't have successors, delete the copies whose defs are not used.
+  // If MBB does have successors, then conservative assume the defs are live-out
+  // since we don't want to trust live-in lists.
+  if (MBB.succ_empty()) {
+    for (SmallSetVector<MachineInstr*, 8>::iterator
+           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+         DI != DE; ++DI) {
+      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+        (*DI)->eraseFromParent();
+        Changed = true;
+        ++NumDeletes;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+
+  TRI = MF.getTarget().getRegisterInfo();
+  ReservedRegs = TRI->getReservedRegs(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Changed |= CopyPropagateBlock(*I);
+
+  return Changed;
+}
index ca02aa1b81436721f72eec90325ecdf7451f432e..07cf0276912498cef05b6aeeaed62766742e7e81 100644 (file)
@@ -96,7 +96,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
 
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
-    const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+    const unsigned *CSRegs = TRI->getCalleeSavedRegs(&MF);
     if (CSRegs != NULL)
       for (unsigned i = 0; CSRegs[i]; ++i)
         CalleeSavedRegs.set(CSRegs[i]);
index b02f3b6e1e8ddb142dc58b996dc1df2fa52281d8..a6556a51c83c55f7544d70d95f42647ae8a4106f 100644 (file)
@@ -136,13 +136,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
 void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
   LoopRegs.Deps.clear();
   if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch()) {
-      MachineBasicBlock *Header = ML->getHeader();
-      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
-           E = Header->livein_end(); I != E; ++I)
-        LoopLiveInRegs.insert(*I);
+    if (BB == ML->getLoopLatch())
       LoopRegs.VisitLoop(ML);
-    }
 }
 
 /// AddSchedBarrierDeps - Add dependencies from instructions in the current
index 666bdf548c718c4cdb788e92f682691c31972dfd..a6233d34821ab870420e9da51adae6d3a3646f1d 100644 (file)
@@ -120,11 +120,6 @@ namespace llvm {
     ///
     LoopDependencies LoopRegs;
 
-    /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
-    /// back-edge-aware scheduling.
-    ///
-    SmallSet<unsigned, 8> LoopLiveInRegs;
-
   protected:
 
     /// DbgValues - Remember instruction that preceeds DBG_VALUE.
index 91ef65925221ff9064626b042940bc9f8b218cb9..487ec690ea5d8cf5a486578c9083d907ac119e3e 100644 (file)
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644 (file)
index 0000000..54fa01c
--- /dev/null
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}