Allow MachineCSE to coalesce trivial subregister copies the same way

author Andrew Trick <atrick@apple.com>

Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)

committer Andrew Trick <atrick@apple.com>

Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)
author Andrew Trick <atrick@apple.com>
Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)
committer Andrew Trick <atrick@apple.com>
Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp

index 2e90f7472ed9efc5ac3d2645761b11a81bda3ebd..80982bca8ce8dd1fba768c36aac7acdfa93e4db0 100644 (file)
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -131,13 +131,18 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
      unsigned SrcReg = DefMI->getOperand(1).getReg();
      if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
        continue;
-    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+    if (DefMI->getOperand(0).getSubReg())
        continue;
-    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+    unsigned SrcSubReg = DefMI->getOperand(1).getSubReg();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    if (SrcSubReg)
+      RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+                                         SrcSubReg);
+    if (!MRI->constrainRegClass(SrcReg, RC))
        continue;
      DEBUG(dbgs() << "Coalescing: " << *DefMI);
      DEBUG(dbgs() << "***     to: " << *MI);
-    MO.setReg(SrcReg);
+    MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
      MRI->clearKillFlags(SrcReg);
      DefMI->eraseFromParent();
      ++NumCoalesces;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp

index b9a6b479c358ef50097dec84f8af36bc1b93f4d0..b94576029191fc416912544735506e9813749fee 100644 (file)
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1349,6 +1349,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
    unsigned LastCopiedReg = 0;
    SlotIndex LastCopyIdx;
    unsigned RegB = 0;
+  unsigned SubRegB = 0;
    for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
      unsigned SrcIdx = TiedPairs[tpi].first;
      unsigned DstIdx = TiedPairs[tpi].second;
@@ -1359,6 +1360,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
      // Grab RegB from the instruction because it may have changed if the
      // instruction was commuted.
      RegB = MI->getOperand(SrcIdx).getReg();
+    SubRegB = MI->getOperand(SrcIdx).getSubReg();
  
      if (RegA == RegB) {
        // The register is tied to multiple destinations (or else we would
@@ -1383,8 +1385,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
  #endif
  
      // Emit a copy.
-    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-            TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+    MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      TII->get(TargetOpcode::COPY), RegA);
+    // If this operand is folding a truncation, the truncation now moves to the
+    // copy so that the register classes remain valid for the operands.
+    MIB.addReg(RegB, 0, SubRegB);
+    const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+    if (SubRegB) {
+      if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+        assert(TRI->getMatchingSuperRegClass(MRI->getRegClass(RegB),
+                                             MRI->getRegClass(RegA), SubRegB) &&
+               "tied subregister must be a truncation");
+        // The superreg class will not be used to constrain the subreg class.
+        RC = 0;
+      }
+      else {
+        assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+               && "tied subregister must be a truncation");
+      }
+    }
  
      // Update DistanceMap.
      MachineBasicBlock::iterator PrevMI = MI;
@@ -1404,7 +1423,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
        }
      }
  
-    DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+    DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
  
      MachineOperand &MO = MI->getOperand(SrcIdx);
      assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
@@ -1417,9 +1436,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
      // Make sure regA is a legal regclass for the SrcIdx operand.
      if (TargetRegisterInfo::isVirtualRegister(RegA) &&
          TargetRegisterInfo::isVirtualRegister(RegB))
-      MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
-
+      MRI->constrainRegClass(RegA, RC);
      MO.setReg(RegA);
+    MO.setSubReg(0);
  
      // Propagate SrcRegMap.
      SrcRegMap[RegA] = RegB;
@@ -1431,12 +1450,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
        // Replace other (un-tied) uses of regB with LastCopiedReg.
        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
          MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+            MO.isUse()) {
            if (MO.isKill()) {
              MO.setIsKill(false);
              RemovedKillFlag = true;
            }
            MO.setReg(LastCopiedReg);
+          MO.setSubReg(0);
          }
        }
      }
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll

index 215b86267a47623feaa2f4e801208dbccd1c2e0e..d7c684a730da0b3081e69cd87b9a943086f90029 100644 (file)
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -41,8 +41,8 @@ declare void @bar(i64) nounwind
  
  define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
  ; CHECK-LABEL: test3:
-; CHECK:      cmovnel %edi, %esi
-; CHECK-NEXT: movl    %esi, %edi
+; CHECK:      cmov{{n?}}el %[[R1:e..]], %[[R2:e..]]
+; CHECK-NEXT: movl    %[[R2]], %[[R2]]
  
    %c = trunc i64 %a to i32
    %d = trunc i64 %b to i32
diff --git a/test/CodeGen/X86/cse-add-with-overflow.ll b/test/CodeGen/X86/cse-add-with-overflow.ll

new file mode 100644 (file)

index 0000000..ee4fbad
--- /dev/null
+++ b/test/CodeGen/X86/cse-add-with-overflow.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s
+; rdar:15661073 simple example of redundant adds
+;
+; MachineCSE should coalesce trivial subregister copies.
+;
+; The extra movl+addl should be removed during MachineCSE.
+; CHECK-LABEL: redundantadd
+; CHECK: cmpq
+; CHECK: movq
+; CHECK-NOT: movl
+; CHECK: addl
+; CHECK-NOT: addl
+; CHECK: ret
+
+define i64 @redundantadd(i64* %a0, i64* %a1) {
+entry:
+  %tmp8 = load i64* %a0, align 8
+  %tmp12 = load i64* %a1, align 8
+  %tmp13 = icmp ult i64 %tmp12, -281474976710656
+  br i1 %tmp13, label %exit1, label %body
+
+exit1:
+  unreachable
+
+body:
+  %tmp14 = trunc i64 %tmp8 to i32
+  %tmp15 = trunc i64 %tmp12 to i32
+  %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15)
+  %tmp17 = extractvalue { i32, i1 } %tmp16, 1
+  br i1 %tmp17, label %exit2, label %return
+
+exit2:
+  unreachable
+
+return:
+  %tmp18 = add i64 %tmp12, %tmp8
+  %tmp19 = and i64 %tmp18, 4294967295
+  %tmp20 = or i64 %tmp19, -281474976710656
+  ret i64 %tmp20
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
author	Andrew Trick <atrick@apple.com>
	Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)
committer	Andrew Trick <atrick@apple.com>
	Mon, 16 Dec 2013 19:36:21 +0000 (19:36 +0000)
lib/CodeGen/MachineCSE.cpp		patch \| blob \| history
lib/CodeGen/TwoAddressInstructionPass.cpp		patch \| blob \| history
test/CodeGen/X86/cmov.ll		patch \| blob \| history
test/CodeGen/X86/cse-add-with-overflow.ll	[new file with mode: 0644]	patch \| blob