From 333d5c9f518ee5e3a33f48be5fee2d2eaf8c8978 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 25 Nov 2014 10:09:51 +0000 Subject: [PATCH] [InstCombine] Change LLVM To canonicalize toward the value type being stored rather than the pointer type. This change is analogous to r220138 which changed the canonicalization for loads. The rationale is the same: memory does not have a type, operations (and thus the values they produce) have a type. We should match that type as closely as possible rather than reading some form of semantics into the pointer type. With this change, loads and stores should no longer be made with nonsensical types for the values that tehy load and store. This is particularly important when trying to match specific loaded and stored types in the process of doing other instcombines, which is what led me down this twisty maze of miscanonicalization. I've put quite some effort into looking through IR to find places where LLVM's optimizer was being unreasonably conservative in the face of mismatched load and store types, however it is possible (let's say, likely!) I have missed some. If you see regressions here, or from r220138, the likely cause is some part of LLVM failing to cope with load and store types differing. Test cases appreciated, it is important that we root all of these out of LLVM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222748 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombineLoadStoreAlloca.cpp | 172 ++++++++---------- .../InstCombine/2011-06-13-nsw-alloca.ll | 3 +- .../InstCombine/bitcast-alias-function.ll | 15 +- test/Transforms/InstCombine/bitcast-store.ll | 10 +- test/Transforms/InstCombine/cast.ll | 9 - test/Transforms/InstCombine/getelementptr.ll | 4 +- test/Transforms/InstCombine/select.ll | 10 +- 7 files changed, 96 insertions(+), 127 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c0df914cb66..b24f0eafd5d 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -491,102 +491,81 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { return nullptr; } -/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P -/// when possible. This makes it generally easy to do alias analysis and/or -/// SROA/mem2reg of the memory object. -static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { - User *CI = cast(SI.getOperand(1)); - Value *CastOp = CI->getOperand(0); - - Type *DestPTy = CI->getType()->getPointerElementType(); - PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (!SrcTy) return nullptr; - - Type *SrcPTy = SrcTy->getElementType(); +/// \brief Combine stores to match the type of value being stored. +/// +/// The core idea here is that the memory does not have any intrinsic type and +/// where we can we should match the type of a store to the type of value being +/// stored. +/// +/// However, this routine must never change the width of a store or the number of +/// stores as that would introduce a semantic change. This combine is expected to +/// be a semantic no-op which just allows stores to more closely model the types +/// of their incoming values. +/// +/// Currently, we also refuse to change the precise type used for an atomic or +/// volatile store. This is debatable, and might be reasonable to change later. +/// However, it is risky in case some backend or other part of LLVM is relying +/// on the exact type stored to select appropriate atomic operations. +/// +/// \returns true if the store was successfully combined away. This indicates +/// the caller must erase the store instruction. We have to let the caller erase +/// the store instruction sas otherwise there is no way to signal whether it was +/// combined or not: IC.EraseInstFromFunction returns a null pointer. +static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) { + // FIXME: We could probably with some care handle both volatile and atomic + // stores here but it isn't clear that this is important. + if (!SI.isSimple()) + return false; - if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return nullptr; + Value *Ptr = SI.getPointerOperand(); + Value *V = SI.getValueOperand(); + unsigned AS = SI.getPointerAddressSpace(); + SmallVector, 8> MD; + SI.getAllMetadata(MD); + + // Fold away bit casts of the stored value by storing the original type. + if (auto *BC = dyn_cast(V)) { + V = BC->getOperand(0); + StoreInst *NewStore = IC.Builder->CreateAlignedStore( + V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + SI.getAlignment()); + for (const auto &MDPair : MD) { + unsigned ID = MDPair.first; + MDNode *N = MDPair.second; + // Note, essentially every kind of metadata should be preserved here! This + // routine is supposed to clone a store instruction changing *only its + // type*. The only metadata it makes sense to drop is metadata which is + // invalidated when the pointer type changes. This should essentially + // never be the case in LLVM, but we explicitly switch over only known + // metadata to be conservatively correct. If you are adding metadata to + // LLVM which pertains to stores, you almost certainly want to add it + // here. + switch (ID) { + case LLVMContext::MD_dbg: + case LLVMContext::MD_tbaa: + case LLVMContext::MD_prof: + case LLVMContext::MD_fpmath: + case LLVMContext::MD_tbaa_struct: + case LLVMContext::MD_alias_scope: + case LLVMContext::MD_noalias: + case LLVMContext::MD_nontemporal: + case LLVMContext::MD_mem_parallel_loop_access: + case LLVMContext::MD_nonnull: + // All of these directly apply. + NewStore->setMetadata(ID, N); + break; - /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" - /// to its first element. This allows us to handle things like: - /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) - /// on 32-bit hosts. - SmallVector NewGEPIndices; - - // If the source is an array, the code below will not succeed. Check to - // see if a trivial 'gep P, 0, 0' will help matters. Only do this for - // constants. - if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { - // Index through pointer. - Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); - NewGEPIndices.push_back(Zero); - - while (1) { - if (StructType *STy = dyn_cast(SrcPTy)) { - if (!STy->getNumElements()) /* Struct can be empty {} */ - break; - NewGEPIndices.push_back(Zero); - SrcPTy = STy->getElementType(0); - } else if (ArrayType *ATy = dyn_cast(SrcPTy)) { - NewGEPIndices.push_back(Zero); - SrcPTy = ATy->getElementType(); - } else { + case LLVMContext::MD_invariant_load: + case LLVMContext::MD_range: break; } } - - SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); + return true; } - if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return nullptr; - - // If the pointers point into different address spaces don't do the - // transformation. - if (SrcTy->getAddressSpace() != CI->getType()->getPointerAddressSpace()) - return nullptr; - - // If the pointers point to values of different sizes don't do the - // transformation. - if (!IC.getDataLayout() || - IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != - IC.getDataLayout()->getTypeSizeInBits(DestPTy)) - return nullptr; - - // If the pointers point to pointers to different address spaces don't do the - // transformation. It is not safe to introduce an addrspacecast instruction in - // this case since, depending on the target, addrspacecast may not be a no-op - // cast. - if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && - SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) - return nullptr; - - // Okay, we are casting from one integer or pointer type to another of - // the same size. Instead of casting the pointer before - // the store, cast the value to be stored. - Value *NewCast; - Instruction::CastOps opcode = Instruction::BitCast; - Type* CastSrcTy = DestPTy; - Type* CastDstTy = SrcPTy; - if (CastDstTy->isPointerTy()) { - if (CastSrcTy->isIntegerTy()) - opcode = Instruction::IntToPtr; - } else if (CastDstTy->isIntegerTy()) { - if (CastSrcTy->isPointerTy()) - opcode = Instruction::PtrToInt; - } - - // SIOp0 is a pointer to aggregate and this is a store to the first field, - // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) - CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); - - Value *SIOp0 = SI.getOperand(0); - NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, - SIOp0->getName()+".c"); - SI.setOperand(0, NewCast); - SI.setOperand(1, CastOp); - return &SI; + // FIXME: We should also canonicalize loads of vectors when their elements are + // cast to other types. + return false; } /// equivalentAddressValues - Test if A and B will obviously have the same @@ -622,6 +601,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + // Try to canonicalize the stored type. + if (combineStoreToValueType(*this, SI)) + return EraseInstFromFunction(SI); + // Attempt to improve the alignment. if (DL) { unsigned KnownAlign = @@ -713,17 +696,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return EraseInstFromFunction(SI); - // If the pointer destination is a cast, see if we can fold the cast into the - // source instead. - if (isa(Ptr)) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->isCast()) - if (Instruction *Res = InstCombineStoreToCast(*this, SI)) - return Res; - - // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll index 895b2603ad9..c8f0351858c 100644 --- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll +++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll @@ -50,7 +50,8 @@ define void @fu2(i32 %parm) nounwind ssp { %7 = add i32 %6, 2048 ; CHECK: alloca i8 %8 = alloca i8, i32 %7 -; CHECK-NEXT: bitcast i8* +; CHECK-NEXT: bitcast double** +; CHECK-NEXT: store i8* %9 = bitcast i8* %8 to double* store double* %9, double** %ptr, align 4 br label %10 diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll index bc36b25b6de..cfec09200db 100644 --- a/test/Transforms/InstCombine/bitcast-alias-function.ll +++ b/test/Transforms/InstCombine/bitcast-alias-function.ll @@ -94,7 +94,8 @@ entry: ; CHECK: load i32* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp -; CHECK: bitcast i32 %call to float +; CHECK: bitcast float* %dest to i32* +; CHECK: store i32 %tmp = load float* %source, align 8 %call = call float @alias_i32_to_f32(float %tmp) nounwind store float %call, float* %dest, align 8 @@ -109,7 +110,8 @@ entry: ; CHECK: load <2 x i32>* ; CHECK-NOT: fptoui ; CHECK-NOT: uitofp -; CHECK: bitcast <2 x i32> %call to <2 x float> +; CHECK: bitcast <2 x float>* %dest to <2 x i32>* +; CHECK: store <2 x i32> %tmp = load <2 x float>* %source, align 8 %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind store <2 x float> %call, <2 x float>* %dest, align 8 @@ -123,7 +125,8 @@ entry: ; CHECK: bitcast <2 x float>* %source to i64* ; CHECK: load i64* ; CHECK: %call = call i64 @func_i64 -; CHECK: bitcast i64 %call to <2 x float> +; CHECK: bitcast <2 x float>* %dest to i64* +; CHECK: store i64 %tmp = load <2 x float>* %source, align 8 %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind store <2 x float> %call, <2 x float>* %dest, align 8 @@ -136,7 +139,8 @@ entry: ; CHECK: bitcast i64* %source to <2 x float>* ; CHECK: load <2 x float>* ; CHECK: call <2 x float> @func_v2f32 -; CHECK: bitcast <2 x float> %call to i64 +; CHECK: bitcast i64* %dest to <2 x float>* +; CHECK: store <2 x float> %tmp = load i64* %source, align 8 %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind store i64 %call, i64* %dest, align 8 @@ -149,7 +153,8 @@ entry: ; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>* ; CHECK: load <2 x i32*>* ; CHECK: call <2 x i32*> @func_v2i32p -; CHECK: bitcast <2 x i32*> %call to <2 x i64*> +; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>* +; CHECK: store <2 x i32*> %tmp = load <2 x i64*>* %source, align 8 %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind store <2 x i64*> %call, <2 x i64*>* %dest, align 8 diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll index e46b5c82d9f..3081cb18716 100644 --- a/test/Transforms/InstCombine/bitcast-store.ll +++ b/test/Transforms/InstCombine/bitcast-store.ll @@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 @G = external constant [5 x i8*] ; CHECK-LABEL: @foo -; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0 -define void @foo(%struct.A* %a) nounwind { +; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0 +define void @foo(i32 %x, float* %p) nounwind { entry: - %0 = bitcast %struct.A* %a to i8*** - store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0 + %x.cast = bitcast i32 %x to float + store float %x.cast, float* %p, align 16, !noalias !0 ret void } @@ -32,4 +32,4 @@ entry: ret void } -!0 = metadata !{metadata !"hello"} +!0 = metadata !{metadata !0} diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index 578b16d9811..a6865557409 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -199,15 +199,6 @@ define i1 @test24(i1 %C) { ; CHECK: ret i1 true } -define void @test25(i32** %P) { - %c = bitcast i32** %P to float** ; [#uses=1] - ;; Fold cast into null - store float* null, float** %c - ret void -; CHECK: store i32* null, i32** %P -; CHECK: ret void -} - define i32 @test26(float %F) { ;; no need to cast from float->double. %c = fpext float %F to double ; [#uses=1] diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll index bb4666219a7..94cc1803073 100644 --- a/test/Transforms/InstCombine/getelementptr.ll +++ b/test/Transforms/InstCombine/getelementptr.ll @@ -602,8 +602,8 @@ entry: %C = load i8** %B, align 8 ret i8* %C ; CHECK-LABEL: @test34( -; CHECK: %V.c = inttoptr i64 %V to i8* -; CHECK: ret i8* %V.c +; CHECK: %[[C:.*]] = inttoptr i64 %V to i8* +; CHECK: ret i8* %[[C]] } %t0 = type { i8*, [19 x i8] } diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll index f3fb76ee3e3..c8ae024c293 100644 --- a/test/Transforms/InstCombine/select.ll +++ b/test/Transforms/InstCombine/select.ll @@ -1395,11 +1395,11 @@ define i8* @test83(i1 %flag) { ; CHECK-LABEL: @test83( ; CHECK: %[[X:.*]] = alloca i8* ; CHECK-NEXT: %[[Y:.*]] = alloca i8* -; CHECK: %[[V:.*]] = load i64* %[[X]] -; CHECK-NEXT: %[[C1:.*]] = inttoptr i64 %[[V]] to i8* -; CHECK-NEXT: store i8* %[[C1]], i8** %[[Y]] -; CHECK-NEXT: %[[C2:.*]] = inttoptr i64 %[[V]] to i8* -; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i8* %[[C2]], i8* %[[C1]] +; CHECK-DAG: %[[X2:.*]] = bitcast i8** %[[X]] to i64* +; CHECK-DAG: %[[Y2:.*]] = bitcast i8** %[[Y]] to i64* +; CHECK: %[[V:.*]] = load i64* %[[X2]] +; CHECK-NEXT: store i64 %[[V]], i64* %[[Y2]] +; CHECK-NEXT: %[[C:.*]] = inttoptr i64 %[[V]] to i8* ; CHECK-NEXT: ret i8* %[[S]] entry: %x = alloca i8* -- 2.34.1