From 8f0f45882405651ce0062c631a8c1f5f44aefcd8 Mon Sep 17 00:00:00 2001
From: Chad Rosier <mcrosier@codeaurora.org>
Date: Wed, 7 May 2014 16:41:55 +0000
Subject: [PATCH] [ARM64][fast-isel] Disable target specific optimizations at
 -O0.  Functionally, this patch disables the dead register elimination pass
 and the load/store pair optimization pass at -O0.  The ILP optimizations
 don't require the optimization level to be checked because the call to
 addILPOpts is predicated with the necessary check.  The AdvSIMDScalar pass is
 disabled by default at all optimization levels.  This patch leaves that pass
 disabled by default.

Also, move command-line options into ARM64TargetMachine.cpp and add a few
additional flags to aid in debugging.  This fixes an issue with the
-debug-pass=Structure flag where passes were printed, but not actually run
(i.e., AdvSIMDScalar pass).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208223 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp  |  8 ----
 lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp |  6 ---
 lib/Target/ARM64/ARM64TargetMachine.cpp      | 47 +++++++++++++-------
 test/CodeGen/ARM64/abi_align.ll              |  4 +-
 test/CodeGen/ARM64/fast-isel-conversion.ll   |  4 +-
 5 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
index da280f8be08..5950a8f18e1 100644
--- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
@@ -49,10 +49,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "arm64-simd-scalar"
 
-static cl::opt<bool>
-AdvSIMDScalar("arm64-simd-scalar",
-              cl::desc("enable use of AdvSIMD scalar integer instructions"),
-              cl::init(false), cl::Hidden);
 // Allow forcing all i64 operations with equivalent SIMD instructions to use
 // them. For stress-testing the transformation function.
 static cl::opt<bool>
@@ -368,10 +364,6 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
 
 // runOnMachineFunction - Pass entry point from PassManager.
 bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
-  // Early exit if pass disabled.
-  if (!AdvSIMDScalar)
-    return false;
-
   bool Changed = false;
   DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
 
diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
index da7a8cd3d90..40b39126fad 100644
--- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
+++ b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
@@ -40,8 +40,6 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
 STATISTIC(NumUnscaledPairCreated,
           "Number of load/store from unscaled generated");
 
-static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
-                                    cl::Hidden);
 static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
                                    cl::Hidden);
 
@@ -923,10 +921,6 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
 }
 
 bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  // Early exit if pass disabled.
-  if (!DoLoadStoreOpt)
-    return false;
-
   const TargetMachine &TM = Fn.getTarget();
   TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
   TRI = TM.getRegisterInfo();
diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp
index b7e1beb8580..f5c187ceb27 100644
--- a/lib/Target/ARM64/ARM64TargetMachine.cpp
+++ b/lib/Target/ARM64/ARM64TargetMachine.cpp
@@ -20,24 +20,30 @@
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
-static cl::opt<bool> EnableCCMP("arm64-ccmp",
-                                cl::desc("Enable the CCMP formation pass"),
-                                cl::init(true));
+static cl::opt<bool>
+EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"),
+           cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if "
+                     "converter pass"), cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"),
+                     cl::init(true), cl::Hidden);
 
-static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
-                                          cl::desc("Suppress STP for ARM64"),
-                                          cl::init(true));
+static cl::opt<bool>
+EnableAdvSIMDScalar("arm64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
+                    " integer instructions"), cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-EnablePromoteConstant("arm64-promote-const", cl::Hidden,
-                      cl::desc("Enable the promote constant pass"),
-                      cl::init(true));
+EnablePromoteConstant("arm64-promote-const", cl::desc("Enable the promote "
+                      "constant pass"), cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
-EnableCollectLOH("arm64-collect-loh", cl::Hidden,
-                 cl::desc("Enable the pass that emits the linker"
-                          " optimization hints (LOH)"),
-                 cl::init(true));
+EnableCollectLOH("arm64-collect-loh", cl::desc("Enable the pass that emits the"
+                 " linker optimization hints (LOH)"), cl::init(true),
+                 cl::Hidden);
 
 static cl::opt<bool>
 EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
@@ -47,6 +53,10 @@ EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
                                        " register"),
                               cl::init(true));
 
+static cl::opt<bool>
+EnableLoadStoreOpt("arm64-load-store-opt", cl::desc("Enable the load/store pair"
+                   " optimization pass"), cl::init(true), cl::Hidden);
+
 extern "C" void LLVMInitializeARM64Target() {
   // Register the target.
   RegisterTargetMachine<ARM64leTargetMachine> X(TheARM64leTarget);
@@ -159,7 +169,8 @@ bool ARM64PassConfig::addInstSelector() {
 bool ARM64PassConfig::addILPOpts() {
   if (EnableCCMP)
     addPass(createARM64ConditionalCompares());
-  addPass(&EarlyIfConverterID);
+  if (EnableEarlyIfConvert)
+    addPass(&EarlyIfConverterID);
   if (EnableStPairSuppress)
     addPass(createARM64StorePairSuppressPass());
   return true;
@@ -167,13 +178,14 @@ bool ARM64PassConfig::addILPOpts() {
 
 bool ARM64PassConfig::addPreRegAlloc() {
   // Use AdvSIMD scalar instructions whenever profitable.
-  addPass(createARM64AdvSIMDScalar());
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
+    addPass(createARM64AdvSIMDScalar());
   return true;
 }
 
 bool ARM64PassConfig::addPostRegAlloc() {
   // Change dead register definitions to refer to the zero register.
-  if (EnableDeadRegisterElimination)
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
     addPass(createARM64DeadRegisterDefinitions());
   return true;
 }
@@ -182,7 +194,8 @@ bool ARM64PassConfig::addPreSched2() {
   // Expand some pseudo instructions to allow proper scheduling.
   addPass(createARM64ExpandPseudoPass());
   // Use load/store pair instructions when possible.
-  addPass(createARM64LoadStoreOptimizationPass());
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
+    addPass(createARM64LoadStoreOptimizationPass());
   return true;
 }
 
diff --git a/test/CodeGen/ARM64/abi_align.ll b/test/CodeGen/ARM64/abi_align.ll
index 13a2a394f39..44c5a07ce39 100644
--- a/test/CodeGen/ARM64/abi_align.ll
+++ b/test/CodeGen/ARM64/abi_align.ll
@@ -490,7 +490,9 @@ entry:
 ; FAST: sub sp, sp, #48
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
-; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]]
+; Load/Store opt is disabled with -O0, so the i128 is split.
+; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
+; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
                                            i32 6, i32 7, i128 %0, i32 8) #5
diff --git a/test/CodeGen/ARM64/fast-isel-conversion.ll b/test/CodeGen/ARM64/fast-isel-conversion.ll
index 067979dd3d4..c5417de0ae9 100644
--- a/test/CodeGen/ARM64/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM64/fast-isel-conversion.ll
@@ -60,7 +60,7 @@ entry:
 ; CHECK: mov x3, x0
 ; CHECK: ubfx x3, x3, #0, #32
 ; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
+; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
   %b.addr = alloca i16, align 2
@@ -117,7 +117,7 @@ entry:
 ; CHECK: mov x3, x0
 ; CHECK: sxtw x3, w3
 ; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
+; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
   %b.addr = alloca i16, align 2
-- 
2.34.1