Add the ability to use GEPs for address sinking in CGP

author Hal Finkel <hfinkel@anl.gov>

Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp

index 9765485b5d51de34011b763d3f85eb419113218b..91d702a56608e5958a16565fbbdf48b2990e36fc 100644 (file)
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -39,6 +39,7 @@
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  #include "llvm/Transforms/Utils/BuildLibCalls.h"
  #include "llvm/Transforms/Utils/BypassSlowDivision.h"
@@ -70,6 +71,10 @@ static cl::opt<bool> DisableSelectToBranch(
    "disable-cgp-select2branch", cl::Hidden, cl::init(false),
    cl::desc("Disable select to branch conversion."));
  
+static cl::opt<bool> AddrSinkUsingGEPs(
+  "addr-sink-using-gep", cl::Hidden, cl::init(false),
+  cl::desc("Address sinking in CGP using GEPs."));
+
  static cl::opt<bool> EnableAndCmpSinking(
     "enable-andcmp-sinking", cl::Hidden, cl::init(true),
     cl::desc("Enable sinkinig and/cmp into branches."));
@@ -2423,6 +2428,127 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                   << *MemoryInst);
      if (SunkAddr->getType() != Addr->getType())
        SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+  } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
+               TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+    // By default, we use the GEP-based method when AA is used later. This
+    // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
+    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+    Value *ResultPtr = 0, *ResultIndex = 0;
+
+    // First, find the pointer.
+    if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
+      ResultPtr = AddrMode.BaseReg;
+      AddrMode.BaseReg = 0;
+    }
+
+    if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
+      // We can't add more than one pointer together, nor can we scale a
+      // pointer (both of which seem meaningless).
+      if (ResultPtr || AddrMode.Scale != 1)
+        return false;
+
+      ResultPtr = AddrMode.ScaledReg;
+      AddrMode.Scale = 0;
+    }
+
+    if (AddrMode.BaseGV) {
+      if (ResultPtr)
+        return false;
+
+      ResultPtr = AddrMode.BaseGV;
+    }
+
+    // If the real base value actually came from an inttoptr, then the matcher
+    // will look through it and provide only the integer value. In that case,
+    // use it here.
+    if (!ResultPtr && AddrMode.BaseReg) {
+      ResultPtr =
+        Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
+      AddrMode.BaseReg = 0;
+    } else if (!ResultPtr && AddrMode.Scale == 1) {
+      ResultPtr =
+        Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
+      AddrMode.Scale = 0;
+    }
+
+    if (!ResultPtr &&
+        !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
+      SunkAddr = Constant::getNullValue(Addr->getType());
+    } else if (!ResultPtr) {
+      return false;
+    } else {
+      Type *I8PtrTy =
+        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+
+      // Start with the base register. Do this first so that subsequent address
+      // matching finds it last, which will prevent it from trying to match it
+      // as the scaled value in case it happens to be a mul. That would be
+      // problematic if we've sunk a different mul for the scale, because then
+      // we'd end up sinking both muls.
+      if (AddrMode.BaseReg) {
+        Value *V = AddrMode.BaseReg;
+        if (V->getType() != IntPtrTy)
+          V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+
+        ResultIndex = V;
+      }
+
+      // Add the scale value.
+      if (AddrMode.Scale) {
+        Value *V = AddrMode.ScaledReg;
+        if (V->getType() == IntPtrTy) {
+          // done.
+        } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+                   cast<IntegerType>(V->getType())->getBitWidth()) {
+          V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+        } else {
+          // It is only safe to sign extend the BaseReg if we know that the math
+          // required to create it did not overflow before we extend it. Since
+          // the original IR value was tossed in favor of a constant back when
+          // the AddrMode was created we need to bail out gracefully if widths
+          // do not match instead of extending it.
+          Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
+          if (I && (ResultIndex != AddrMode.BaseReg))
+            I->eraseFromParent();
+          return false;
+        }
+
+        if (AddrMode.Scale != 1)
+          V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+                                "sunkaddr");
+        if (ResultIndex)
+          ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
+        else
+          ResultIndex = V;
+      }
+
+      // Add in the Base Offset if present.
+      if (AddrMode.BaseOffs) {
+        Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+        if (ResultIndex) {
+         // We need to add this separately from the scale above to help with
+         // SDAG consecutive load/store merging.
+          if (ResultPtr->getType() != I8PtrTy)
+            ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        }
+
+        ResultIndex = V;
+      }
+
+      if (!ResultIndex) {
+        SunkAddr = ResultPtr;
+      } else {
+        if (ResultPtr->getType() != I8PtrTy)
+          ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+      }
+
+      if (SunkAddr->getType() != Addr->getType())
+        SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+    }
    } else {
      DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                   << *MemoryInst);
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll

index 94bced5200c62a68b9f2d84aabaaa6c13b675e90..5a8f623e6f506d9be1456642280b7b005d0c0eca 100644 (file)
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s
  
  ; <rdar://problem/8686347>
  
diff --git a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll b/test/CodeGen/ARM64/dagcombiner-indexed-load.ll

index 6cea039ad855c1d1243424b3471335720475fb28..2e4b658f1c9c993f83b31f4bc870ca17151f9c8e 100644 (file)
--- a/test/CodeGen/ARM64/dagcombiner-indexed-load.ll
+++ b/test/CodeGen/ARM64/dagcombiner-indexed-load.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -O3 < %s | FileCheck %s
+; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
  ; Test case for a DAG combiner bug where we combined an indexed load
  ; with an extension (sext, zext, or any) into a regular extended load,
  ; i.e., dropping the indexed value.
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll

index ccf52974a67bbc749c23fb48147ce65de72480ed..df83f8b191c64c53f9c52272c2f34c8e2a74c1d8 100644 (file)
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -mcpu=g5 < %s | FileCheck %s
+; RUN: llc -mcpu=g5 -addr-sink-using-gep=1 < %s | FileCheck %s
  ;; Formerly crashed, see PR 1508
  target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
  target triple = "powerpc64-apple-darwin8"
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll

index e1f890192d127c7bcb72fe37d18596a23ec66d5d..4d7c3a185a8bfafe7e7eab86c1049cae83d048db 100644 (file)
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
-; RUN:   grep push | count 3
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin -addr-sink-using-gep=1 | FileCheck %s
  
  define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
  entry:
+; CHECK-LABEL: @foo
+; CHECK: push
+; CHECK: push
+; CHECK: push
+; CHECK-NOT: push
+
         icmp sgt i32 %size, 0           ; <i1>:0 [#uses=1]
         br i1 %0, label %bb.preheader, label %return
  
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll

index 0ef3aa5b6f073cfb33ba816b6e441d021fae1ab6..f6d68520b7b4f5591750815a7b5c845810ea7efb 100644 (file)
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
  
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll

index e3d6b349d6e17a5cd78e301d813d70c296b2dc2c..78e1dd287f6e72f202831a1df250f3c5fe02e861 100644 (file)
--- a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
@@ -1,4 +1,5 @@
  ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
+; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
  ; This file tests the different cases what are involved when codegen prepare
  ; tries to get sign extension out of the way of addressing mode.
  ; This tests require an actual target as addressing mode decisions depends
@@ -281,6 +282,25 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
  ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
  ; CHECK: load i32* [[ADDR2]]
  ; CHECK: ret
+; CHECK-GEP-LABEL: @checkProfitability
+; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
+; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
+; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
+; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
+; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
+; BB then
+; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
+; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
+; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
+; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
+; CHECK-GEP: load i32* [[ADDR1]]
+; BB else
+; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
+; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
+; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
+; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
+; CHECK-GEP: load i32* [[ADDR2]]
+; CHECK-GEP: ret
  define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
    %shl = shl nsw i32 %arg1, 1
    %add1 = add nsw i32 %shl, %arg2
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll

index 316accfa41ac9ae5f79ad647f7121924dff1d616..4ff0f1c0ba24a12a4996a4d6731f77c26681efc9 100644 (file)
--- a/test/CodeGen/X86/codegen-prepare.ll
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -addr-sink-using-gep=1 | FileCheck %s
  
  ; Check that the CodeGenPrepare Pass
  ; does not wrongly rewrite the address computed by Instruction %4
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll

index 458f19dfc4f77f05787e7878d972f2a853aa8b1e..e4af9b67f95e69231a112558a248454350d8087a 100644 (file)
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -addr-sink-using-gep=1 | FileCheck %s
  
  define i32 @test(i32* %X, i32 %B) {
  ; CHECK-LABEL: test:
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll

index 940688c6252f7622869a7829bd8040fb1cd807e5..f98963d8e90eb827e2c00d874e6eaff2eafcb1b9 100644 (file)
--- a/test/CodeGen/X86/merge_store.ll
+++ b/test/CodeGen/X86/merge_store.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -addr-sink-using-gep=1 | FileCheck %s
  
  define void @merge_store(i32* nocapture %a) {
  ; CHECK-LABEL: merge_store:
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll

index c24eda3ddc8dcfb20a2f98e69c8d7c2f0e5570aa..f4edf092641f7b1cfacd1c89c4c102ac3dcfeaa0 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -1,4 +1,5 @@
  ; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
+; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9
  
  ; @simple is the most basic chain of address induction variables. Chaining
  ; saves at least one register and avoids complex addressing and setup
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll

index e42b67fd35af2443a12887690ff058afbae70151..937791dca413aaba8952761044bfd8dff9ed2e9c 100644 (file)
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -1,5 +1,7 @@
  ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
  ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32
  
  ; @simple is the most basic chain of address induction variables. Chaining
  ; saves at least one register and avoids complex addressing and setup
author	Hal Finkel <hfinkel@anl.gov>
	Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sat, 12 Apr 2014 00:59:48 +0000 (00:59 +0000)
lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
test/CodeGen/ARM/phi.ll		patch \| blob \| history
test/CodeGen/ARM64/dagcombiner-indexed-load.ll		patch \| blob \| history
test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll		patch \| blob \| history
test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll		patch \| blob \| history
test/CodeGen/X86/MergeConsecutiveStores.ll		patch \| blob \| history
test/CodeGen/X86/codegen-prepare-addrmode-sext.ll		patch \| blob \| history
test/CodeGen/X86/codegen-prepare.ll		patch \| blob \| history
test/CodeGen/X86/isel-sink.ll		patch \| blob \| history
test/CodeGen/X86/merge_store.ll		patch \| blob \| history
test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll		patch \| blob \| history
test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll		patch \| blob \| history