[PowerPC] Add an MI SSA peephole pass.

author Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)

committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>

Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)
author Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)
committer Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h

index ae8d8b4f5dfe0d4b3a22bec2ac3a22cfa0cb81b5..e157fd37c6e1d53734a9e183f57006c11515c3ab 100644 (file)
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -41,6 +41,7 @@ namespace llvm {
    FunctionPass *createPPCVSXCopyPass();
    FunctionPass *createPPCVSXFMAMutatePass();
    FunctionPass *createPPCVSXSwapRemovalPass();
    FunctionPass *createPPCVSXCopyPass();
    FunctionPass *createPPCVSXFMAMutatePass();
    FunctionPass *createPPCVSXSwapRemovalPass();
+  FunctionPass *createPPCMIPeepholePass();
    FunctionPass *createPPCBranchSelectionPass();
    FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
    FunctionPass *createPPCTLSDynamicCallPass();
    FunctionPass *createPPCBranchSelectionPass();
    FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
    FunctionPass *createPPCTLSDynamicCallPass();
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp

new file mode 100644 (file)

index 0000000..fe339d7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer.  It runs at the end of
+// the SSA phases, following VSX swap removal.  A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here.  Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects:  it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+  void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+  static char ID;
+  const PPCInstrInfo *TII;
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+
+  PPCMIPeephole() : MachineFunctionPass(ID) {
+    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  // Perform peepholes.
+  bool simplifyCode(void);
+
+  // Find the "true" register represented by SrcReg (following chains
+  // of copies and subreg_to_reg operations).
+  unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    initialize(MF);
+    return simplifyCode();
+  }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  MRI = &MF->getRegInfo();
+  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+  DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+  DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+  bool Simplified = false;
+  MachineInstr* ToErase = nullptr;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+
+      // If the previous instruction was marked for elimination,
+      // remove it now.
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      // Ignore debug instructions.
+      if (MI.isDebugValue())
+        continue;
+
+      // Per-opcode peepholes.
+      switch (MI.getOpcode()) {
+
+      default:
+        break;
+
+      case PPC::XXPERMDI: {
+        // Perform simplifications of 2x64 vector swaps and splats.
+        // A swap is identified by an immediate value of 2, and a splat
+        // is identified by an immediate value of 0 or 3.
+        int Immed = MI.getOperand(3).getImm();
+
+        if (Immed != 1) {
+
+          // For each of these simplifications, we need the two source
+          // regs to match.  Unfortunately, MachineCSE ignores COPY and
+          // SUBREG_TO_REG, so for example we can see
+          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+          // We have to look through chains of COPY and SUBREG_TO_REG
+          // to find the real source values for comparison.
+          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+          if (TrueReg1 == TrueReg2
+              && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+            // If this is a splat or a swap fed by another splat, we
+            // can replace it with a copy.
+            if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+              unsigned FeedImmed = DefMI->getOperand(3).getImm();
+              unsigned FeedReg1
+                = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              unsigned FeedReg2
+                = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs()
+                      << "Optimizing splat/swap or splat/splat "
+                      "to splat/copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(MI.getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+
+              // If this is a splat fed by a swap, we can simplify modify
+              // the splat to splat the other value from the swap's input
+              // parameter.
+              else if ((Immed == 0 || Immed == 3)
+                       && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+                DEBUG(MI.dump());
+                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+                MI.getOperand(3).setImm(3 - Immed);
+                Simplified = true;
+              }
+
+              // If this is a swap fed by a swap, we can replace it
+              // with a copy from the first swap's input.
+              else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(DefMI->getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+            }
+          }
+        }
+        break;
+      }
+      }
+    }
+
+    // If the last instruction was marked for elimination,
+    // remove it now.
+    if (ToErase) {
+      ToErase->eraseFromParent();
+      ToErase = nullptr;
+    }
+  }
+
+  return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg).  Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register.  If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+  while (true) {
+
+    MachineInstr *MI = MRI->getVRegDef(SrcReg);
+    if (!MI->isCopyLike())
+      return SrcReg;
+
+    unsigned CopySrcReg;
+    if (MI->isCopy())
+      CopySrcReg = MI->getOperand(1).getReg();
+    else {
+      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+      CopySrcReg = MI->getOperand(2).getReg();
+    }
+
+    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+      return CopySrcReg;
+
+    SrcReg = CopySrcReg;
+  }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+                      "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+                    "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp

index df687b2cade30471d23c92d5403ddbf9f75b7b6e..24a9ef0ef077189a14d480256710cf1f5ddf88dc 100644 (file)
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
  opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                  cl::desc("Disable VSX Swap Removal for PPC"));
  
  opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                  cl::desc("Disable VSX Swap Removal for PPC"));
  
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+                            cl::desc("Disable machine peepholes for PPC"));
+
  static cl::opt<bool>
  EnableGEPOpt("ppc-gep-opt", cl::Hidden,
               cl::desc("Enable optimizations on complex GEPs"),
  static cl::opt<bool>
  EnableGEPOpt("ppc-gep-opt", cl::Hidden,
               cl::desc("Enable optimizations on complex GEPs"),
@@ -348,6 +352,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
    if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
        !DisableVSXSwapRemoval)
      addPass(createPPCVSXSwapRemovalPass());
    if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
        !DisableVSXSwapRemoval)
      addPass(createPPCVSXSwapRemovalPass());
+  // Target-specific peephole cleanups performed after instruction
+  // selection.
+  if (!DisableMIPeephole) {
+    addPass(createPPCMIPeepholePass());
+    addPass(&DeadMachineInstructionElimID);
+  }
  }
  
  void PPCPassConfig::addPreRegAlloc() {
  }
  
  void PPCPassConfig::addPreRegAlloc() {
diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

index 35d501e40cbb486936715c556f53dc02900631e3..7e8991647aee41345504a427708b63d1aa50ce26 100644 (file)
--- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -63,7 +63,7 @@ entry:
    ret <2 x i64> %splat.splat
  ; CHECK: mtvsrd {{[0-9]+}}, 3
  ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
    ret <2 x i64> %splat.splat
  ; CHECK: mtvsrd {{[0-9]+}}, 3
  ; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
-; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
  }
  
  ; Function Attrs: nounwind
  }
  
  ; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/pr25157-peephole.ll b/test/CodeGen/PowerPC/pr25157-peephole.ll

new file mode 100644 (file)

index 0000000..c5bd49b
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify peephole simplification of splats and swaps.  Bugpoint-reduced
+; test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+  br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+  br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+  unreachable
+
+L.LB38_2426:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+  br label %L.LB38_2140
+
+L.LB38_2140:
+  ret void
+
+L.LB38_2438:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+  unreachable
+
+L.LB38_2451:
+  br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+  unreachable
+
+L.LB38_2452:
+  %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+  %1 = fpext float %0 to double
+  %2 = insertelement <2 x double> undef, double %1, i32 1
+  store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+  unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
+; CHECK: xxspltd
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/swaps-le-5.ll b/test/CodeGen/PowerPC/swaps-le-5.ll

index 5cd739a0efa971b7c1c71f2e473848d73c4d394e..3e13bd16c23b4a2d29e1f7052e89ea508384e7b7 100644 (file)
--- a/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,11 +15,11 @@ entry:
  }
  
  ; CHECK-LABEL: @bar0
  }
  
  ; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
  ; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
  ; CHECK: stxvd2x [[REG3]]
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
  ; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
  ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
  
  define void @bar1(double %y) {
  entry:
  
  define void @bar1(double %y) {
  entry:
@@ -30,11 +30,11 @@ entry:
  }
  
  ; CHECK-LABEL: @bar1
  }
  
  ; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
  ; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
  ; CHECK: stxvd2x [[REG3]]
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
  ; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
  ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
  
  define void @baz0() {
  entry:
  
  define void @baz0() {
  entry:
diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll

index 365aeee2d8fc0dbab19bbb690ba3350ce849649c..df88322e4fd8e220d6d8e324172849f514d51a2a 100644 (file)
--- a/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -20,8 +20,7 @@ entry:
  ; CHECK-LABEL: @bar0
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
  ; CHECK-LABEL: @bar0
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
  ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
  ; CHECK: stxvd2x [[REG5]]
  
  ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
  ; CHECK: stxvd2x [[REG5]]
  
@@ -37,8 +36,7 @@ entry:
  ; CHECK-LABEL: @bar1
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
  ; CHECK-LABEL: @bar1
  ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
  ; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
-; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
-; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
  ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
  ; CHECK: stxvd2x [[REG5]]
  
  ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
  ; CHECK: stxvd2x [[REG5]]
  
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll

index b4b1d248d1a0ac21f007bf807214fe30729ebb20..b2eefb666760ea2e7e2510476f514a327c957049 100644 (file)
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1228,9 +1228,8 @@ define <2 x i32> @test80(i32 %v) {
  ; CHECK-LE-LABEL: @test80
  ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
  ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
  ; CHECK-LE-LABEL: @test80
  ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
  ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]]
  ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
  ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxspltd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[R1]]
  ; CHECK-LE-DAG: xxswapd 35, [[V2]]
  ; CHECK-LE: vaddudm 2, 2, 3
  ; CHECK-LE: blr
  ; CHECK-LE-DAG: xxswapd 35, [[V2]]
  ; CHECK-LE: vaddudm 2, 2, 3
  ; CHECK-LE: blr
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll

index 6c89b1092bdfc310560b57fc0ae1103b4336d3cc..97e1548f965f479746ee03bd43dea37e51070f13 100644 (file)
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -33,12 +33,8 @@ define double @teste0(<2 x double>* %p1) {
    %r = extractelement <2 x double> %v, i32 0
    ret double %r
  
    %r = extractelement <2 x double> %v, i32 0
    ret double %r
  
-; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
-
  ; CHECK-LABEL: teste0
  ; CHECK-LABEL: teste0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 1, 0
+; CHECK: lxvd2x 1, 0, 3
  }
  
  define double @teste1(<2 x double>* %p1) {
  }
  
  define double @teste1(<2 x double>* %p1) {
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll

index dcfa0e78886769527bc5fa691169040e6c4db8f1..4f767c7ca78f7559d713d68c8941b1bdbb06bb9d 100644 (file)
--- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -8,8 +8,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
  
  ; CHECK-LABEL: test00
  ; CHECK: lxvd2x 0, 0, 3
  
  ; CHECK-LABEL: test00
  ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
  }
  
  define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
  }
  
  define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
@@ -58,9 +57,7 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
    ret <2 x double> %v3
  
  ; CHECK-LABEL: @test10
    ret <2 x double> %v3
  
  ; CHECK-LABEL: @test10
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 3
  }
  
  define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
  }
  
  define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -71,8 +68,7 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
  
  ; CHECK-LABEL: @test11
  ; CHECK: lxvd2x 0, 0, 3
  
  ; CHECK-LABEL: @test11
  ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
  }
  
  define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
  }
  
  define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
@@ -139,8 +135,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
  
  ; CHECK-LABEL: @test22
  ; CHECK: lxvd2x 0, 0, 4
  
  ; CHECK-LABEL: @test22
  ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
  }
  
  define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
  }
  
  define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
@@ -189,9 +184,7 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
    ret <2 x double> %v3
  
  ; CHECK-LABEL: @test32
    ret <2 x double> %v3
  
  ; CHECK-LABEL: @test32
-; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 4
  }
  
  define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
  }
  
  define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
@@ -202,6 +195,5 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
  
  ; CHECK-LABEL: @test33
  ; CHECK: lxvd2x 0, 0, 4
  
  ; CHECK-LABEL: @test33
  ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
  }
  }
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll

index 9121e92896c8379c66c0e75bf525a2c43c5d5fc5..b98073d6a6e63d4a4104c9a2498feb858811c83b 100644 (file)
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -11,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
  
  entry:
    br label %loop
  
  entry:
    br label %loop
@@ -32,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
  
  entry:
    br label %loop
  
  entry:
    br label %loop
@@ -56,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
  
  entry:
    br label %loop
  
  entry:
    br label %loop
@@ -77,12 +80,14 @@ define void @test4(i64 %upper) gc "statepoint-example" {
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK: statepoint
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK: statepoint
+; CHECK-LABEL: exit
  
  ; COUNTED-64-LABEL: test4
  ; COUNTED-64-LABEL: entry
  ; COUNTED-64: statepoint
  ; COUNTED-64-LABEL: loop
  ; COUNTED-64-NOT: statepoint
  
  ; COUNTED-64-LABEL: test4
  ; COUNTED-64-LABEL: entry
  ; COUNTED-64: statepoint
  ; COUNTED-64-LABEL: loop
  ; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
  
  entry:
    br label %loop
  
  entry:
    br label %loop
@@ -105,12 +110,14 @@ define void @test5(i64 %upper) gc "statepoint-example" {
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK: statepoint
  ; CHECK: statepoint
  ; CHECK-LABEL: loop
  ; CHECK: statepoint
+; CHECK-LABEL: exit
  
  ; COUNTED-64-LABEL: test5
  ; COUNTED-64-LABEL: entry
  ; COUNTED-64: statepoint
  ; COUNTED-64-LABEL: loop
  ; COUNTED-64: statepoint
  
  ; COUNTED-64-LABEL: test5
  ; COUNTED-64-LABEL: entry
  ; COUNTED-64: statepoint
  ; COUNTED-64-LABEL: loop
  ; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
  
  entry:
    br label %loop
  
  entry:
    br label %loop
author	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)
committer	Bill Schmidt <wschmidt@linux.vnet.ibm.com>
	Tue, 10 Nov 2015 21:38:26 +0000 (21:38 +0000)
lib/Target/PowerPC/PPC.h		patch \| blob \| history
lib/Target/PowerPC/PPCMIPeephole.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/PowerPC/PPCTargetMachine.cpp		patch \| blob \| history
test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll		patch \| blob \| history
test/CodeGen/PowerPC/pr25157-peephole.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/swaps-le-5.ll		patch \| blob \| history
test/CodeGen/PowerPC/swaps-le-6.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx_insert_extract_le.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx_shuffle_le.ll		patch \| blob \| history
test/Transforms/PlaceSafepoints/finite-loops.ll		patch \| blob \| history