From: Jonas Paulsson <jonas.paulsson@ericsson.com>
Date: Thu, 8 Oct 2015 07:40:23 +0000 (+0000)
Subject: [SystemZ] SystemZElimCompare pass improved.
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=3d5f14c368671d3cb4df17d0d8913e98386d27e8;p=oota-llvm.git

[SystemZ] SystemZElimCompare pass improved.

Compare elimination extended to recognize load-and-test instructions used
for comparison and eliminate them the same way as with compare instructions.

Test case fp-cmp-05.ll updated to expect optimized results now also for z13.

The order of instruction shortening and compare elimination passes have been
changed so that opcodes do not have to be handled in both passes.

Reviewed by Ulrich Weigand.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249666 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index ada17fc6308..d5f11eb9005 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -155,6 +155,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
   return Ref;
 }
 
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+  // If we during isel used a load-and-test as a compare with 0, the
+  // def operand is dead.
+  return ((MI->getOpcode() == SystemZ::LTEBR ||
+	   MI->getOpcode() == SystemZ::LTDBR ||
+	   MI->getOpcode() == SystemZ::LTXBR) &&
+	  MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+  unsigned reg = 0;
+  if (Compare->isCompare())
+    reg = Compare->getOperand(0).getReg();
+  else if (isLoadAndTestAsCmp(Compare))
+    reg = Compare->getOperand(1).getReg();
+  assert (reg);
+
+  return reg;
+}
+
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
 // and convert the branch to a BRCT(G).  Return true on success.
@@ -185,7 +209,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
   // We already know that there are no references to the register between
   // MI and Compare.  Make sure that there are also no references between
   // Compare and Branch.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
     if (getRegReferences(MBBI, SrcReg))
@@ -305,6 +329,10 @@ static bool isCompareZero(MachineInstr *Compare) {
     return true;
 
   default:
+
+    if (isLoadAndTestAsCmp(Compare))
+      return true;
+
     return (Compare->getNumExplicitOperands() == 2 &&
             Compare->getOperand(1).isImm() &&
             Compare->getOperand(1).getImm() == 0);
@@ -322,7 +350,7 @@ optimizeCompareZero(MachineInstr *Compare,
     return false;
 
   // Search back for CC results that are based on the first operand.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock &MBB = *Compare->getParent();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
   Reference CCRefs;
@@ -431,7 +459,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
   while (MBBI != MBB.begin()) {
     MachineInstr *MI = --MBBI;
     if (CompleteCCUsers &&
-        MI->isCompare() &&
+        (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
         (optimizeCompareZero(MI, CCUsers) ||
          fuseCompareAndBranch(MI, CCUsers))) {
       ++MBBI;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index efa3f82cec4..22beaad2ab7 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -130,6 +130,13 @@ void SystemZPassConfig::addPreSched2() {
 }
 
 void SystemZPassConfig::addPreEmitPass() {
+
+  // Do instruction shortening before compare elimination because some
+  // vector instructions will be shortened into opcodes that compare
+  // elimination recognizes.
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
   // We eliminate comparisons here rather than earlier because some
   // transformations can change the set of available CC values and we
   // generally want those transformations to have priority.  This is
@@ -155,8 +162,6 @@ void SystemZPassConfig::addPreEmitPass() {
   // preventing that would be a win or not.
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
   addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
 }
 
diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll
index 1d71a0fcec5..7e937d1adae 100644
--- a/test/CodeGen/SystemZ/fp-cmp-05.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -4,14 +4,15 @@
 ; handled by SystemZElimcompare, so for Z13 this is currently
 ; unimplemented.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -check-prefix=CHECK-Z10
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Load complement (sign-bit flipped).
 ; Test f32
 define float @f1(float %a, float %b, float %f) {
 ; CHECK-LABEL: f1:
-; CHECK-Z10: lcebr
-; CHECK-Z10-NEXT: je
+; CHECK: lcebr
+; CHECK-NEXT: je
   %neg = fsub float -0.0, %f
   %cond = fcmp oeq float %neg, 0.0
   %res = select i1 %cond, float %a, float %b
@@ -21,8 +22,8 @@ define float @f1(float %a, float %b, float %f) {
 ; Test f64
 define double @f2(double %a, double %b, double %f) {
 ; CHECK-LABEL: f2:
-; CHECK-Z10: lcdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lcdbr
+; CHECK-NEXT: je
   %neg = fsub double -0.0, %f
   %cond = fcmp oeq double %neg, 0.0
   %res = select i1 %cond, double %a, double %b
@@ -34,8 +35,8 @@ define double @f2(double %a, double %b, double %f) {
 declare float @llvm.fabs.f32(float %f)
 define float @f3(float %a, float %b, float %f) {
 ; CHECK-LABEL: f3:
-; CHECK-Z10: lnebr
-; CHECK-Z10-NEXT: je
+; CHECK: lnebr
+; CHECK-NEXT: je
   %abs = call float @llvm.fabs.f32(float %f)
   %neg = fsub float -0.0, %abs
   %cond = fcmp oeq float %neg, 0.0
@@ -47,8 +48,8 @@ define float @f3(float %a, float %b, float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f4(double %a, double %b, double %f) {
 ; CHECK-LABEL: f4:
-; CHECK-Z10: lndbr
-; CHECK-Z10-NEXT: je
+; CHECK: lndbr
+; CHECK-NEXT: je
   %abs = call double @llvm.fabs.f64(double %f)
   %neg = fsub double -0.0, %abs
   %cond = fcmp oeq double %neg, 0.0
@@ -60,8 +61,8 @@ define double @f4(double %a, double %b, double %f) {
 ; Test f32
 define float @f5(float %a, float %b, float %f) {
 ; CHECK-LABEL: f5:
-; CHECK-Z10: lpebr
-; CHECK-Z10-NEXT: je
+; CHECK: lpebr
+; CHECK-NEXT: je
   %abs = call float @llvm.fabs.f32(float %f)
   %cond = fcmp oeq float %abs, 0.0
   %res = select i1 %cond, float %a, float %b
@@ -71,8 +72,8 @@ define float @f5(float %a, float %b, float %f) {
 ; Test f64
 define double @f6(double %a, double %b, double %f) {
 ; CHECK-LABEL: f6:
-; CHECK-Z10: lpdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lpdbr
+; CHECK-NEXT: je
   %abs = call double @llvm.fabs.f64(double %f)
   %cond = fcmp oeq double %abs, 0.0
   %res = select i1 %cond, double %a, double %b