[InstCombine] Do an about-face on how LLVM canonicalizes (cast (load

author Chandler Carruth <chandlerc@gmail.com>

Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

index 4aafc2e2cad7ab71030baab4f5ceb5b80dc6dac2..cfdfa00e80566407e05df2e39573e7ad57e9bd5d 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -291,76 +291,58 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
    return visitAllocSite(AI);
  }
  
+/// \brief Combine loads to match the type of value their uses after looking
+/// through intervening bitcasts.
+///
+/// The core idea here is that if the result of a load is used in an operation,
+/// we should load the type most conducive to that operation. For example, when
+/// loading an integer and converting that immediately to a pointer, we should
+/// instead directly load a pointer.
+///
+/// However, this routine must never change the width of a load or the number of
+/// loads as that would introduce a semantic change. This combine is expected to
+/// be a semantic no-op which just allows loads to more closely model the types
+/// of their consuming operations.
+///
+/// Currently, we also refuse to change the precise type used for an atomic load
+/// or a volatile load. This is debatable, and might be reasonable to change
+/// later. However, it is risky in case some backend or other part of LLVM is
+/// relying on the exact type loaded to select appropriate atomic operations.
+static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
+  // FIXME: We could probably with some care handle both volatile and atomic
+  // loads here but it isn't clear that this is important.
+  if (!LI.isSimple())
+    return nullptr;
  
-/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
-static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
-                                        const DataLayout *DL) {
-  User *CI = cast<User>(LI.getOperand(0));
-  Value *CastOp = CI->getOperand(0);
-
-  PointerType *DestTy = cast<PointerType>(CI->getType());
-  Type *DestPTy = DestTy->getElementType();
-  if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
-
-    // If the address spaces don't match, don't eliminate the cast.
-    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
-      return nullptr;
-
-    Type *SrcPTy = SrcTy->getElementType();
-
-    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
-         DestPTy->isVectorTy()) {
-      // If the source is an array, the code below will not succeed.  Check to
-      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
-      // constants.
-      if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
-        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
-          if (ASrcTy->getNumElements() != 0) {
-            Type *IdxTy = DL
-                        ? DL->getIntPtrType(SrcTy)
-                        : Type::getInt64Ty(SrcTy->getContext());
-            Value *Idx = Constant::getNullValue(IdxTy);
-            Value *Idxs[2] = { Idx, Idx };
-            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
-            SrcTy = cast<PointerType>(CastOp->getType());
-            SrcPTy = SrcTy->getElementType();
-          }
-
-      if (IC.getDataLayout() &&
-          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
-            SrcPTy->isVectorTy()) &&
-          // Do not allow turning this into a load of an integer, which is then
-          // casted to a pointer, this pessimizes pointer analysis a lot.
-          (SrcPTy->isPtrOrPtrVectorTy() ==
-           LI.getType()->isPtrOrPtrVectorTy()) &&
-          IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
-               IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
-
-        // Okay, we are casting from one integer or pointer type to another of
-        // the same size.  Instead of casting the pointer before the load, cast
-        // the result of the loaded value.
-        LoadInst *NewLoad =
-          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
-        NewLoad->setAlignment(LI.getAlignment());
-        NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
-        // Now cast the result of the load.
-        PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
-        PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
-        if (OldTy && NewTy &&
-            OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
-          return new AddrSpaceCastInst(NewLoad, LI.getType());
-        }
+  if (LI.use_empty())
+    return nullptr;
  
-        return new BitCastInst(NewLoad, LI.getType());
-      }
+  Value *Ptr = LI.getPointerOperand();
+  unsigned AS = LI.getPointerAddressSpace();
+
+  // Fold away bit casts of the loaded value by loading the desired type.
+  if (LI.hasOneUse())
+    if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
+      LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
+          IC.Builder->CreateBitCast(Ptr, BC->getDestTy()->getPointerTo(AS)),
+          LI.getAlignment(), LI.getName());
+      BC->replaceAllUsesWith(NewLoad);
+      IC.EraseInstFromFunction(*BC);
+      return &LI;
      }
-  }
+
+  // FIXME: We should also canonicalize loads of vectors when their elements are
+  // cast to other types.
    return nullptr;
  }
  
  Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    Value *Op = LI.getOperand(0);
  
+  // Try to canonicalize the loaded type.
+  if (Instruction *Res = combineLoadToOperationType(*this, LI))
+    return Res;
+
    // Attempt to improve the alignment.
    if (DL) {
      unsigned KnownAlign =
@@ -376,11 +358,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
        LI.setAlignment(EffectiveLoadAlign);
    }
  
-  // load (cast X) --> cast (load X) iff safe.
-  if (isa<CastInst>(Op))
-    if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
-      return Res;
-
    // None of the following transforms are legal for volatile/atomic loads.
    // FIXME: Some of it is okay for atomic loads; needs refactoring.
    if (!LI.isSimple()) return nullptr;
@@ -419,12 +396,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    }
  
-  // Instcombine load (constantexpr_cast global) -> cast (load global)
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
-    if (CE->isCast())
-      if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
-        return Res;
-
    if (Op->hasOneUse()) {
      // Change select and PHI nodes to select values instead of addresses: this
      // helps alias analysis out a lot, allows many others simplifications, and
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll

index ccee87433f3220ad0a3d72a30b1a9d383010141a..98cecefcc29463614911bd21db70f7ad34e6d38c 100644 (file)
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0"
  
  ; Check transforms involving atomic operations
  
-define i32* @test1(i8** %p) {
-; CHECK-LABEL: define i32* @test1(
-; CHECK: load atomic i8** %p monotonic, align 8
-  %c = bitcast i8** %p to i32**
-  %r = load atomic i32** %c monotonic, align 8
-  ret i32* %r
-}
-
  define i32 @test2(i32* %p) {
  ; CHECK-LABEL: define i32 @test2(
  ; CHECK: %x = load atomic i32* %p seq_cst, align 4
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll

index a6b56f94ffbf40fa48443ca286d51472abd943d9..bc36b25b6ded493014490b6fe9253011487878f4 100644 (file)
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -90,7 +90,8 @@ entry:
  define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
  entry:
  ; CHECK-LABEL: @bitcast_alias_scalar
-; CHECK: bitcast float %tmp to i32
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32*
  ; CHECK-NOT: fptoui
  ; CHECK-NOT: uitofp
  ; CHECK: bitcast i32 %call to float
@@ -104,7 +105,8 @@ entry:
  define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
  entry:
  ; CHECK-LABEL: @bitcast_alias_vector
-; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>*
  ; CHECK-NOT: fptoui
  ; CHECK-NOT: uitofp
  ; CHECK: bitcast <2 x i32> %call to <2 x float>
@@ -118,7 +120,8 @@ entry:
  define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
  entry:
  ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
-; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64*
  ; CHECK: %call = call i64 @func_i64
  ; CHECK: bitcast i64 %call to <2 x float>
    %tmp = load <2 x float>* %source, align 8
@@ -130,7 +133,8 @@ entry:
  define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
  entry:
  ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
-; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>*
  ; CHECK: call <2 x float> @func_v2f32
  ; CHECK: bitcast <2 x float> %call to i64
    %tmp = load i64* %source, align 8
@@ -142,7 +146,8 @@ entry:
  define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
  entry:
  ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
-; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>*
  ; CHECK: call <2 x i32*> @func_v2i32p
  ; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
    %tmp = load <2 x i64*>* %source, align 8
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll

index 7fac78a40f569f47b1293f4f3fa1a05bcac453dd..bb61f02c8e230af091c143710aabe9ae41abf7f0 100644 (file)
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() {
    ret i32 %a
  }
  
-define <4 x i32> @constant_fold_bitcast_vector_as() {
+define <4 x float> @constant_fold_bitcast_vector_as() {
  ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
  ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-; CHECK: bitcast <4 x float> %1 to <4 x i32>
-  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
-  ret <4 x i32> %a
+  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
  }
  
  @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll

index 7990fdb3eca391ed52de2a164afaf374597be331..4656837cc05ed439956119da8c2ddefbea74bebc 100644 (file)
--- a/test/Transforms/InstCombine/descale-zero.ll
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0"
  
  define internal i8* @descale_zero() {
  entry:
-; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
-; CHECK-NEXT: bitcast i16*
+; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
  ; CHECK-NEXT: ret i8*
    %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
    %num = load i64* inttoptr (i64 64 to i64*), align 64
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll

index b7daccc71af573ed3f41372911f6140dacb00dc6..bb4666219a71275c5688de71405ce602d25cb232 100644 (file)
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -703,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
  
  ; CHECK-LABEL: @test39(
  ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
-; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8
  }
  
  define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll

deleted file mode 100644 (file)

index fd6339c..0000000
--- a/test/Transforms/InstCombine/load-addrspace-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-n8:16:32:64"
-
-define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
-; CHECK-LABEL: @pointer_to_addrspace_pointer(
-; CHECK: load
-; CHECK: addrspacecast
-  %y = bitcast i32 addrspace(1)** %x to i32**
-  %z = load i32** %y
-  ret i32* %z
-}
-
author	Chandler Carruth <chandlerc@gmail.com>
	Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Sat, 18 Oct 2014 06:36:22 +0000 (06:36 +0000)
lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp		patch \| blob \| history
test/Transforms/InstCombine/atomic.ll		patch \| blob \| history
test/Transforms/InstCombine/bitcast-alias-function.ll		patch \| blob \| history
test/Transforms/InstCombine/constant-fold-address-space-pointer.ll		patch \| blob \| history
test/Transforms/InstCombine/descale-zero.ll		patch \| blob \| history
test/Transforms/InstCombine/getelementptr.ll		patch \| blob \| history
test/Transforms/InstCombine/load-addrspace-cast.ll	[deleted file]	patch \| blob \| history