LEA code size optimization pass (Part 2): Remove redundant LEA instructions.

author Andrey Turetskiy <andrey.turetskiy@gmail.com>

Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)

committer Andrey Turetskiy <andrey.turetskiy@gmail.com>

Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)
author Andrey Turetskiy <andrey.turetskiy@gmail.com>
Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)
committer Andrey Turetskiy <andrey.turetskiy@gmail.com>
Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h

index e3d95284ee9fda6a1566afd490cad7262a77b74e..01e65b89f480cf44922af8bc4c2109150f9d5aac 100644 (file)
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -54,7 +54,8 @@ FunctionPass *createX86PadShortFunctions();
  /// instructions, in order to eliminate execution delays in some processors.
  FunctionPass *createX86FixupLEAs();
  
-/// Return a pass that removes redundant address recalculations.
+/// Return a pass that removes redundant LEA instructions and redundant address
+/// recalculations.
  FunctionPass *createX86OptimizeLEAs();
  
  /// Return a pass that optimizes the code-size of x86 call sequences. This is
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp

index fc753f5b04a7783c785abe6c4fc46fd31705d0af..45cc0aef1d9340df5aa6e02891fc8aee8fb1df4e 100644 (file)
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -9,8 +9,10 @@
  //
  // This file defines the pass that performs some optimizations with LEA
  // instructions in order to improve code size.
-// Currently, it does one thing:
-// 1) Address calculations in load and store instructions are replaced by
+// Currently, it does two things:
+// 1) If there are two LEA instructions calculating addresses which only differ
+//    by displacement inside a basic block, one of them is removed.
+// 2) Address calculations in load and store instructions are replaced by
  //    existing LEA def registers where possible.
  //
  //===----------------------------------------------------------------------===//
@@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
                                       cl::init(false));
  
  STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
  
  namespace {
  class OptimizeLEAPass : public MachineFunctionPass {
@@ -71,6 +74,13 @@ private:
    /// \brief Returns true if the instruction is LEA.
    bool isLEA(const MachineInstr &MI);
  
+  /// \brief Returns true if the \p Last LEA instruction can be replaced by the
+  /// \p First. The difference between displacements of the addresses calculated
+  /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
+  /// replacement of the \p Last LEA's uses with the \p First's def register.
+  bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
+                     int64_t &AddrDispShift);
+
    /// \brief Returns true if two instructions have memory operands that only
    /// differ by displacement. The numbers of the first memory operands for both
    /// instructions are specified through \p N1 and \p N2. The address
@@ -88,6 +98,9 @@ private:
    /// \brief Removes redundant address calculations.
    bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
  
+  /// \brief Removes LEAs which calculate similar addresses.
+  bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List);
+
    DenseMap<const MachineInstr *, unsigned> InstrPos;
  
    MachineRegisterInfo *MRI;
@@ -194,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
           Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
  }
  
+// Check that the Last LEA can be replaced by the First LEA. To be so,
+// these requirements must be met:
+// 1) Addresses calculated by LEAs differ only by displacement.
+// 2) Def registers of LEAs belong to the same class.
+// 3) All uses of the Last LEA def register are replaceable, thus the
+//    register is used only as address base.
+bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
+                                    const MachineInstr &Last,
+                                    int64_t &AddrDispShift) {
+  assert(isLEA(First) && isLEA(Last) &&
+         "The function works only with LEA instructions");
+
+  // Compare instructions' memory operands.
+  if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift))
+    return false;
+
+  // Make sure that LEA def registers belong to the same class. There may be
+  // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
+  // be used as their operands, so we must be sure that replacing one LEA
+  // with another won't lead to putting a wrong register in the instruction.
+  if (MRI->getRegClass(First.getOperand(0).getReg()) !=
+      MRI->getRegClass(Last.getOperand(0).getReg()))
+    return false;
+
+  // Loop over all uses of the Last LEA to check that its def register is
+  // used only as address base for memory accesses. If so, it can be
+  // replaced, otherwise - no.
+  for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+    MachineInstr &MI = *MO.getParent();
+
+    // Get the number of the first memory operand.
+    const MCInstrDesc &Desc = MI.getDesc();
+    int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode());
+
+    // If the use instruction has no memory operand - the LEA is not
+    // replaceable.
+    if (MemOpNo < 0)
+      return false;
+
+    MemOpNo += X86II::getOperandBias(Desc);
+
+    // If the address base of the use instruction is not the LEA def register -
+    // the LEA is not replaceable.
+    if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
+      return false;
+
+    // If the LEA def register is used as any other operand of the use
+    // instruction - the LEA is not replaceable.
+    for (unsigned i = 0; i < MI.getNumOperands(); i++)
+      if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
+          isIdenticalOp(MI.getOperand(i), MO))
+        return false;
+
+    // Check that the new address displacement will fit 4 bytes.
+    if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
+        !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
+                   AddrDispShift))
+      return false;
+  }
+
+  return true;
+}
+
  // Check if MI1 and MI2 have memory operands which represent addresses that
  // differ only by displacement.
  bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
@@ -316,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
    return Changed;
  }
  
+// Try to find similar LEAs in the list and replace one with another.
+bool
+OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) {
+  bool Changed = false;
+
+  // Loop over all LEA pairs.
+  auto I1 = List.begin();
+  while (I1 != List.end()) {
+    MachineInstr &First = **I1;
+    auto I2 = std::next(I1);
+    while (I2 != List.end()) {
+      MachineInstr &Last = **I2;
+      int64_t AddrDispShift;
+
+      // LEAs should be in occurence order in the list, so we can freely
+      // replace later LEAs with earlier ones.
+      assert(calcInstrDist(First, Last) > 0 &&
+             "LEAs must be in occurence order in the list");
+
+      // Check that the Last LEA instruction can be replaced by the First.
+      if (!isReplaceable(First, Last, AddrDispShift)) {
+        ++I2;
+        continue;
+      }
+
+      // Loop over all uses of the Last LEA and update their operands. Note that
+      // the correctness of this has already been checked in the isReplaceable
+      // function.
+      for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
+                UE = MRI->use_end();
+           UI != UE;) {
+        MachineOperand &MO = *UI++;
+        MachineInstr &MI = *MO.getParent();
+
+        // Get the number of the first memory operand.
+        const MCInstrDesc &Desc = MI.getDesc();
+        int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+                      X86II::getOperandBias(Desc);
+
+        // Update address base.
+        MO.setReg(First.getOperand(0).getReg());
+
+        // Update address disp.
+        MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp);
+        if (Op->isImm())
+          Op->setImm(Op->getImm() + AddrDispShift);
+        else if (Op->isGlobal())
+          Op->setOffset(Op->getOffset() + AddrDispShift);
+        else
+          llvm_unreachable("Invalid address displacement operand");
+      }
+
+      // Since we can possibly extend register lifetime, clear kill flags.
+      MRI->clearKillFlags(First.getOperand(0).getReg());
+
+      ++NumRedundantLEAs;
+      DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
+
+      // By this moment, all of the Last LEA's uses must be replaced. So we can
+      // freely remove it.
+      assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+             "The LEA's def register must have no uses");
+      Last.eraseFromParent();
+
+      // Erase removed LEA from the list.
+      I2 = List.erase(I2);
+
+      Changed = true;
+    }
+    ++I1;
+  }
+
+  return Changed;
+}
+
  bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
    bool Changed = false;
  
@@ -339,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
      if (LEAs.empty())
        continue;
  
+    // Remove redundant LEA instructions. The optimization may have a negative
+    // effect on performance, so do it only for -Oz.
+    if (MF.getFunction()->optForMinSize())
+      Changed |= removeRedundantLEAs(LEAs);
+
      // Remove redundant address calculations.
      Changed |= removeRedundantAddrCalc(LEAs);
    }
diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll

index 571f2d9084c4dcc2aa4d1d6353ec1dd652dc22aa..8096bfabd6cfe945520763925dfdd54276724b16 100644 (file)
--- a/test/CodeGen/X86/lea-opt.ll
+++ b/test/CodeGen/X86/lea-opt.ll
@@ -129,3 +129,41 @@ sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1,
  ; CHECK:       movl ${{[1-4]+}}, ([[REG2]])
  ; CHECK:       movl ${{[1-4]+}}, ([[REG3]])
  }
+
+define void @test4(i64 %x) nounwind minsize {
+entry:
+  %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+  %tmp = load i32, i32* %a, align 4
+  %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+  %tmp1 = load i32, i32* %b, align 4
+  %sub = sub i32 %tmp, %tmp1
+  %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+  %tmp2 = load i32, i32* %c, align 4
+  %add = add nsw i32 %sub, %tmp2
+  switch i32 %add, label %sw.epilog [
+    i32 1, label %sw.bb.1
+    i32 2, label %sw.bb.2
+  ]
+
+sw.bb.1:                                          ; preds = %entry
+  store i32 111, i32* %b, align 4
+  store i32 222, i32* %c, align 4
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  store i32 333, i32* %b, align 4
+  store i32 444, i32* %c, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
+  ret void
+; CHECK-LABEL: test4:
+; CHECK:       leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
+; CHECK:       movl -4([[REG2]]), {{.*}}
+; CHECK:       subl ([[REG2]]), {{.*}}
+; CHECK:       addl 4([[REG2]]), {{.*}}
+; CHECK:       movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:       movl ${{[1-4]+}}, 4([[REG2]])
+; CHECK:       movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:       movl ${{[1-4]+}}, 4([[REG2]])
+}
author	Andrey Turetskiy <andrey.turetskiy@gmail.com>
	Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)
committer	Andrey Turetskiy <andrey.turetskiy@gmail.com>
	Wed, 13 Jan 2016 11:30:44 +0000 (11:30 +0000)
lib/Target/X86/X86.h		patch \| blob \| history
lib/Target/X86/X86OptimizeLEAs.cpp		patch \| blob \| history
test/CodeGen/X86/lea-opt.ll		patch \| blob \| history