Fixes the issue of removing manually added fake conditional branches

[oota-llvm.git] / lib / Target / AArch64 / AArch64TargetMachine.cpp
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp

index 8bf62882edd2d3c5fb0d7e989787426f147cbd39..8cc3ff227254001902674c9a69260609c48739d9 100644 (file)
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -85,7 +85,12 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
  static cl::opt<bool>
  EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
               cl::desc("Enable optimizations on complex GEPs"),
-             cl::init(true));
+             cl::init(false));
+
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
  
  extern "C" void LLVMInitializeAArch64Target() {
    // Register the target.
@@ -105,9 +110,8 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  }
  
  // Helper function to build a DataLayout string
-static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
-  Triple Triple(TT);
-  if (Triple.isOSBinFormatMachO())
+static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
+  if (TT.isOSBinFormatMachO())
      return "e-m:o-i64:64-i128:128-n32:64-S128";
    if (LittleEndian)
      return "e-m:e-i64:64-i128:128-n32:64-S128";
@@ -116,19 +120,17 @@ static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
  
  /// TargetMachine ctor - Create an AArch64 architecture model.
  ///
-AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
+AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
                                             StringRef CPU, StringRef FS,
                                             const TargetOptions &Options,
                                             Reloc::Model RM, CodeModel::Model CM,
                                             CodeGenOpt::Level OL,
                                             bool LittleEndian)
      // This nested ternary is horrible, but DL needs to be properly
-    // initialized
-    // before TLInfo is constructed.
+    // initialized before TLInfo is constructed.
      : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
                          Options, RM, CM, OL),
-      TLOF(createTLOF(Triple(getTargetTriple()))),
-      Subtarget(TT, CPU, FS, *this, LittleEndian),
+      TLOF(createTLOF(getTargetTriple())),
        isLittle(LittleEndian) {
    initAsmInfo();
  }
@@ -153,28 +155,27 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
      // creation will depend on the TM and the code generation flags on the
      // function that reside in TargetOptions.
      resetTargetOptions(F);
-    I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, isLittle);
+    I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
+                                            isLittle);
    }
    return I.get();
  }
  
  void AArch64leTargetMachine::anchor() { }
  
-AArch64leTargetMachine::
-AArch64leTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+AArch64leTargetMachine::AArch64leTargetMachine(
+    const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
  
  void AArch64beTargetMachine::anchor() { }
  
-AArch64beTargetMachine::
-AArch64beTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS, const TargetOptions &Options,
-                       Reloc::Model RM, CodeModel::Model CM,
-                       CodeGenOpt::Level OL)
-  : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+AArch64beTargetMachine::AArch64beTargetMachine(
+    const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
  
  namespace {
  /// AArch64 Code Generator Pass Configuration Options.
@@ -202,7 +203,7 @@ public:
  } // namespace
  
  TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
      return TargetTransformInfo(AArch64TTIImpl(this, F));
    });
  }
@@ -215,6 +216,9 @@ void AArch64PassConfig::addIRPasses() {
    // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
    // ourselves.
    addPass(createAtomicExpandPass(TM));
+  // XXX-update: Immediate add -licm pass after atomic expand pass to deal with
+  // loop invariants introduced mannually.
+//  addPass(createLICMPass());
  
    // Cmpxchg instructions are often used with a subsequent comparison to
    // determine whether it succeeded. We can exploit existing control-flow in
@@ -224,6 +228,10 @@ void AArch64PassConfig::addIRPasses() {
  
    TargetPassConfig::addIRPasses();
  
+  // Match interleaved memory accesses to ldN/stN intrinsics.
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createInterleavedAccessPass(TM));
+
    if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
      // Call SeparateConstOffsetFromGEP pass to extract constants within indices
      // and lower a GEP with multiple indices to either arithmetic operations or
@@ -247,8 +255,14 @@ bool AArch64PassConfig::addPreISel() {
    // FIXME: On AArch64, this depends on the type.
    // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
    // and the offset has to be a multiple of the related size in bytes.
-  if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createGlobalMergePass(TM, 4095));
+  if ((TM->getOptLevel() != CodeGenOpt::None &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE) {
+    bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+                               (EnableGlobalMerge == cl::BOU_UNSET);
+    addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
+  }
+
    if (TM->getOptLevel() != CodeGenOpt::None)
      addPass(createAArch64AddressTypePromotionPass());
  
@@ -260,7 +274,7 @@ bool AArch64PassConfig::addInstSelector() {
  
    // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
    // references to _TLS_MODULE_BASE_ as possible.
-  if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
+  if (TM->getTargetTriple().isOSBinFormatELF() &&
        getOptLevel() != CodeGenOpt::None)
      addPass(createAArch64CleanupLocalDynamicTLSPass());
  
@@ -315,6 +329,6 @@ void AArch64PassConfig::addPreEmitPass() {
    // range of their destination.
    addPass(createAArch64BranchRelaxation());
    if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
-      Triple(TM->getTargetTriple()).isOSBinFormatMachO())
+      TM->getTargetTriple().isOSBinFormatMachO())
      addPass(createAArch64CollectLOHPass());
  }