1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the visit functions for load, store and alloca.
12 //===----------------------------------------------------------------------===//
14 #include "InstCombine.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/Analysis/Loads.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/IntrinsicInst.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
21 #include "llvm/Transforms/Utils/Local.h"
24 /// Hidden option to stress test load slicing, i.e., when this option
25 /// is enabled, load slicing bypasses most of its profitability guards.
26 /// It will also generate, uncanonalized form of slicing.
28 StressLoadSlicing("instcombine-stress-load-slicing", cl::Hidden,
29 cl::desc("Bypass the profitability model of load "
33 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
34 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
36 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
37 /// some part of a constant global variable. This intentionally only accepts
38 /// constant expressions because we can't rewrite arbitrary instructions.
39 static bool pointsToConstantGlobal(Value *V) {
40 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
41 return GV->isConstant();
42 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
43 if (CE->getOpcode() == Instruction::BitCast ||
44 CE->getOpcode() == Instruction::GetElementPtr)
45 return pointsToConstantGlobal(CE->getOperand(0));
49 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
50 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
51 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
52 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
53 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
54 /// the alloca, and if the source pointer is a pointer to a constant global, we
55 /// can optimize this.
57 isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
58 SmallVectorImpl<Instruction *> &ToDelete,
59 bool IsOffset = false) {
60 // We track lifetime intrinsics as we encounter them. If we decide to go
61 // ahead and replace the value with the global, this lets the caller quickly
62 // eliminate the markers.
64 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
65 User *U = cast<Instruction>(*UI);
67 if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
68 // Ignore non-volatile loads, they are always ok.
69 if (!LI->isSimple()) return false;
73 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
74 // If uses of the bitcast are ok, we are ok.
75 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
79 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
80 // If the GEP has all zero indices, it doesn't offset the pointer. If it
82 if (!isOnlyCopiedFromConstantGlobal(
83 GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
88 if (CallSite CS = U) {
89 // If this is the function being called then we treat it like a load and
94 // If this is a readonly/readnone call site, then we know it is just a
95 // load (but one that potentially returns the value itself), so we can
96 // ignore it if we know that the value isn't captured.
97 unsigned ArgNo = CS.getArgumentNo(UI);
98 if (CS.onlyReadsMemory() &&
99 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
102 // If this is being passed as a byval argument, the caller is making a
103 // copy, so it is only a read of the alloca.
104 if (CS.isByValArgument(ArgNo))
108 // Lifetime intrinsics can be handled by the caller.
109 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
110 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
111 II->getIntrinsicID() == Intrinsic::lifetime_end) {
112 assert(II->use_empty() && "Lifetime markers have no result to use!");
113 ToDelete.push_back(II);
118 // If this is isn't our memcpy/memmove, reject it as something we can't
120 MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
124 // If the transfer is using the alloca as a source of the transfer, then
125 // ignore it since it is a load (unless the transfer is volatile).
126 if (UI.getOperandNo() == 1) {
127 if (MI->isVolatile()) return false;
131 // If we already have seen a copy, reject the second one.
132 if (TheCopy) return false;
134 // If the pointer has been offset from the start of the alloca, we can't
135 // safely handle this.
136 if (IsOffset) return false;
138 // If the memintrinsic isn't using the alloca as the dest, reject it.
139 if (UI.getOperandNo() != 0) return false;
141 // If the source of the memcpy/move is not a constant global, reject it.
142 if (!pointsToConstantGlobal(MI->getSource()))
145 // Otherwise, the transform is safe. Remember the copy instruction.
151 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
152 /// modified by a copy from a constant global. If we can prove this, we can
153 /// replace any uses of the alloca with uses of the global directly.
154 static MemTransferInst *
155 isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
156 SmallVectorImpl<Instruction *> &ToDelete) {
157 MemTransferInst *TheCopy = 0;
158 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
163 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
164 // Ensure that the alloca array size argument has type intptr_t, so that
165 // any casting is exposed early.
167 Type *IntPtrTy = TD->getIntPtrType(AI.getType());
168 if (AI.getArraySize()->getType() != IntPtrTy) {
169 Value *V = Builder->CreateIntCast(AI.getArraySize(),
176 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
177 if (AI.isArrayAllocation()) { // Check C != 1
178 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
180 ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
181 AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
182 New->setAlignment(AI.getAlignment());
184 // Scan to the end of the allocation instructions, to skip over a block of
185 // allocas if possible...also skip interleaved debug info
187 BasicBlock::iterator It = New;
188 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
190 // Now that I is pointing to the first non-allocation-inst in the block,
191 // insert our getelementptr instruction...
194 ? TD->getIntPtrType(AI.getType())
195 : Type::getInt64Ty(AI.getContext());
196 Value *NullIdx = Constant::getNullValue(IdxTy);
197 Value *Idx[2] = { NullIdx, NullIdx };
199 GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
200 InsertNewInstBefore(GEP, *It);
202 // Now make everything use the getelementptr instead of the original
204 return ReplaceInstUsesWith(AI, GEP);
205 } else if (isa<UndefValue>(AI.getArraySize())) {
206 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
210 if (TD && AI.getAllocatedType()->isSized()) {
211 // If the alignment is 0 (unspecified), assign it the preferred alignment.
212 if (AI.getAlignment() == 0)
213 AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
215 // Move all alloca's of zero byte objects to the entry block and merge them
216 // together. Note that we only do this for alloca's, because malloc should
217 // allocate and return a unique pointer, even for a zero byte allocation.
218 if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) {
219 // For a zero sized alloca there is no point in doing an array allocation.
220 // This is helpful if the array size is a complicated expression not used
222 if (AI.isArrayAllocation()) {
223 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
227 // Get the first instruction in the entry block.
228 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
229 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
230 if (FirstInst != &AI) {
231 // If the entry block doesn't start with a zero-size alloca then move
232 // this one to the start of the entry block. There is no problem with
233 // dominance as the array size was forced to a constant earlier already.
234 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
235 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
236 TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
237 AI.moveBefore(FirstInst);
241 // If the alignment of the entry block alloca is 0 (unspecified),
242 // assign it the preferred alignment.
243 if (EntryAI->getAlignment() == 0)
244 EntryAI->setAlignment(
245 TD->getPrefTypeAlignment(EntryAI->getAllocatedType()));
246 // Replace this zero-sized alloca with the one at the start of the entry
247 // block after ensuring that the address will be aligned enough for both
249 unsigned MaxAlign = std::max(EntryAI->getAlignment(),
251 EntryAI->setAlignment(MaxAlign);
252 if (AI.getType() != EntryAI->getType())
253 return new BitCastInst(EntryAI, AI.getType());
254 return ReplaceInstUsesWith(AI, EntryAI);
259 if (AI.getAlignment()) {
260 // Check to see if this allocation is only modified by a memcpy/memmove from
261 // a constant global whose alignment is equal to or exceeds that of the
262 // allocation. If this is the case, we can change all users to use
263 // the constant global instead. This is commonly produced by the CFE by
264 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
265 // is only subsequently read.
266 SmallVector<Instruction *, 4> ToDelete;
267 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
268 unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
269 AI.getAlignment(), TD);
270 if (AI.getAlignment() <= SourceAlign) {
271 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
272 DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
273 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
274 EraseInstFromFunction(*ToDelete[i]);
275 Constant *TheSrc = cast<Constant>(Copy->getSource());
277 = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
279 EraseInstFromFunction(*Copy);
286 // At last, use the generic allocation site handler to aggressively remove
288 return visitAllocSite(AI);
292 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
293 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
294 const DataLayout *TD) {
295 User *CI = cast<User>(LI.getOperand(0));
296 Value *CastOp = CI->getOperand(0);
298 PointerType *DestTy = cast<PointerType>(CI->getType());
299 Type *DestPTy = DestTy->getElementType();
300 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
302 // If the address spaces don't match, don't eliminate the cast.
303 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
306 Type *SrcPTy = SrcTy->getElementType();
308 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
309 DestPTy->isVectorTy()) {
310 // If the source is an array, the code below will not succeed. Check to
311 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
313 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
314 if (Constant *CSrc = dyn_cast<Constant>(CastOp))
315 if (ASrcTy->getNumElements() != 0) {
317 ? TD->getIntPtrType(SrcTy)
318 : Type::getInt64Ty(SrcTy->getContext());
319 Value *Idx = Constant::getNullValue(IdxTy);
320 Value *Idxs[2] = { Idx, Idx };
321 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
322 SrcTy = cast<PointerType>(CastOp->getType());
323 SrcPTy = SrcTy->getElementType();
326 if (IC.getDataLayout() &&
327 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
328 SrcPTy->isVectorTy()) &&
329 // Do not allow turning this into a load of an integer, which is then
330 // casted to a pointer, this pessimizes pointer analysis a lot.
331 (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
332 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
333 IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
335 // Okay, we are casting from one integer or pointer type to another of
336 // the same size. Instead of casting the pointer before the load, cast
337 // the result of the loaded value.
339 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
340 NewLoad->setAlignment(LI.getAlignment());
341 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
342 // Now cast the result of the load.
343 return new BitCastInst(NewLoad, LI.getType());
351 /// \brief Helper structure used to slice a load in smaller loads.
353 // The last instruction that represent the slice. This should be a
354 // truncate instruction.
356 // The original load instruction.
358 // The right shift amount in bits from the original load.
361 LoadedSlice(Instruction *Inst = NULL, LoadInst *Origin = NULL,
363 : Inst(Inst), Origin(Origin), Shift(Shift) {}
365 LoadedSlice(const LoadedSlice& LS) : Inst(LS.Inst), Origin(LS.Origin),
368 /// \brief Get the bits used in a chunk of bits \p BitWidth large.
369 /// \return Result is \p BitWidth and has used bits set to 1 and
370 /// not used bits set to 0.
371 APInt getUsedBits() const {
372 // Reproduce the trunc(lshr) sequence:
373 // - Start from the truncated value.
374 // - Zero extend to the desired bit width.
376 assert(Origin && "No original load to compare against.");
377 unsigned BitWidth = Origin->getType()->getPrimitiveSizeInBits();
378 assert(Inst && "This slice is not bound to an instruction");
379 assert(Inst->getType()->getPrimitiveSizeInBits() <= BitWidth &&
380 "Extracted slice is smaller than the whole type!");
381 APInt UsedBits(Inst->getType()->getPrimitiveSizeInBits(), 0);
382 UsedBits.setAllBits();
383 UsedBits = UsedBits.zext(BitWidth);
388 /// \brief Get the size of the slice to be loaded in bytes.
389 unsigned getLoadedSize() const {
390 unsigned SliceSize = getUsedBits().countPopulation();
391 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
392 return SliceSize / 8;
395 /// \brief Get the offset in bytes of this slice in the original chunk of
396 /// bits, whose layout is defined by \p IsBigEndian.
397 uint64_t getOffsetFromBase(bool IsBigEndian) const {
398 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not support.");
399 uint64_t Offset = Shift / 8;
400 unsigned TySizeInBytes = Origin->getType()->getPrimitiveSizeInBits() / 8;
401 assert(!(Origin->getType()->getPrimitiveSizeInBits() & 0x7) &&
402 "The size of the original loaded type is not a multiple of a"
404 // If Offset is bigger than TySizeInBytes, it means we are loading all
405 // zeros. This should have been optimized before in the process.
406 assert(TySizeInBytes > Offset &&
407 "Invalid shift amount for given loaded size");
409 Offset = TySizeInBytes - Offset - getLoadedSize();
413 /// \brief Generate the sequence of instructions to load the slice
414 /// represented by this object and redirect the uses of this slice to
415 /// this new sequence of instructions.
416 /// \pre this->Inst && this->Origin are valid Instructions.
417 /// \return The last instruction of the sequence used to load the slice.
418 Instruction *loadSlice(InstCombiner::BuilderTy &Builder,
419 bool IsBigEndian) const {
420 assert(Inst && Origin && "Unable to replace a non-existing slice.");
421 Value *BaseAddr = Origin->getOperand(0);
422 unsigned Alignment = Origin->getAlignment();
423 Builder.SetInsertPoint(Origin);
424 // Assume we are looking at a chunk of bytes.
425 // BaseAddr = (i8*)BaseAddr.
426 BaseAddr = Builder.CreateBitCast(BaseAddr, Builder.getInt8PtrTy(),
428 // Get the offset in that chunk of bytes w.r.t. the endianess.
429 uint64_t Offset = getOffsetFromBase(IsBigEndian);
431 APInt APOffset(64, Offset);
432 // BaseAddr = BaseAddr + Offset.
433 BaseAddr = Builder.CreateInBoundsGEP(BaseAddr, Builder.getInt(APOffset),
437 // Create the type of the loaded slice according to its size.
439 Type::getIntNTy(Origin->getContext(), getLoadedSize() * 8);
441 // Bit cast the raw pointer to the pointer type of the slice.
442 BaseAddr = Builder.CreateBitCast(BaseAddr, SliceType->getPointerTo(),
445 // Compute the new alignment.
447 Alignment = MinAlign(Alignment, Alignment + Offset);
449 // Create the load for the slice.
450 Instruction *LastInst = Builder.CreateAlignedLoad(BaseAddr, Alignment,
451 Inst->getName()+".val");
452 // If the final type is not the same as the loaded type, this means that
453 // we have to pad with zero. Create a zero extend for that.
454 Type * FinalType = Inst->getType();
455 if (SliceType != FinalType)
456 LastInst = cast<Instruction>(Builder.CreateZExt(LastInst, FinalType));
458 // Update the IR to reflect the new access to the slice.
459 Inst->replaceAllUsesWith(LastInst);
464 /// \brief Check if it would be profitable to expand this slice as an
465 /// independant load.
466 bool isProfitable() const {
467 // Slicing is assumed to be profitable iff the chains leads to arithmetic
469 SmallVector<const Instruction *, 8> Uses;
470 Uses.push_back(Inst);
472 const Instruction *Use = Uses.pop_back_val();
473 for (Value::const_use_iterator UseIt = Use->use_begin(),
474 UseItEnd = Use->use_end(); UseIt != UseItEnd; ++UseIt) {
475 const Instruction *UseOfUse = cast<Instruction>(*UseIt);
476 // Consider these instructions as arithmetic operations.
477 if (isa<BinaryOperator>(UseOfUse) ||
478 isa<CastInst>(UseOfUse) ||
479 isa<PHINode>(UseOfUse) ||
480 isa<GetElementPtrInst>(UseOfUse))
482 // No need to check if the Use has already been checked as we do not
483 // insert any PHINode.
484 Uses.push_back(UseOfUse);
486 } while (!Uses.empty());
487 DEBUG(dbgs() << "IC: Not a profitable slice " << *Inst << '\n');
493 /// \brief Check the profitability of all involved LoadedSlice.
494 /// Unless StressLoadSlicing is specified, this also returns false
495 /// when slicing is not in the canonical form.
496 /// The canonical form of sliced load is (1) two loads,
497 /// which are (2) next to each other in memory.
499 /// FIXME: We may want to allow more slices to be created but
500 /// this means other passes should know how to deal with all those
502 /// FIXME: We may want to split loads to different types, e.g.,
505 isSlicingProfitable(const SmallVectorImpl<LoadedSlice> &LoadedSlices,
506 const APInt &UsedBits) {
507 unsigned NbOfSlices = LoadedSlices.size();
509 if (!StressLoadSlicing && NbOfSlices != 2)
513 if (!StressLoadSlicing && !UsedBits.isAllOnesValue()) {
514 // Get rid of the unused bits on the right.
515 APInt MemoryLayout = UsedBits.lshr(UsedBits.countTrailingZeros());
516 // Get rid of the unused bits on the left.
517 if (MemoryLayout.countLeadingZeros())
518 MemoryLayout = MemoryLayout.trunc(MemoryLayout.getActiveBits());
519 // Check that the chunk of memory is completely used.
520 if (!MemoryLayout.isAllOnesValue())
524 unsigned NbOfProfitableSlices = 0;
525 for (unsigned CurrSlice = 0; CurrSlice < NbOfSlices; ++CurrSlice) {
526 if (LoadedSlices[CurrSlice].isProfitable())
527 ++NbOfProfitableSlices;
528 else if (!StressLoadSlicing)
531 // In Stress mode, we may have 0 profitable slice.
533 // In non-Stress mode, all the slices are profitable at this point.
534 return NbOfProfitableSlices > 0;
537 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
538 /// operations, split it in the various pieces being extracted.
540 /// This sort of thing is introduced by SROA.
541 /// This slicing takes care not to insert overlapping loads.
542 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
543 static Instruction *sliceUpLoadInst(LoadInst &LI,
544 InstCombiner::BuilderTy &Builder,
546 assert(LI.isSimple() && "We are trying to transform a non-simple load!");
548 // FIXME: If we want to support floating point and vector types, we should
549 // support bitcast and extract/insert element instructions.
550 Type *LITy = LI.getType();
551 if (!LITy->isIntegerTy()) return 0;
553 // Keep track of already used bits to detect overlapping values.
554 // In that case, we will just abort the transformation.
555 APInt UsedBits(LITy->getPrimitiveSizeInBits(), 0);
557 SmallVector<LoadedSlice, 4> LoadedSlices;
559 // Check if this load is used as several smaller chunks of bits.
560 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
561 // of computation for each trunc.
562 for (Value::use_iterator UI = LI.use_begin(), UIEnd = LI.use_end();
564 Instruction *User = cast<Instruction>(*UI);
567 // Check if this is a trunc(lshr).
568 if (User->getOpcode() == Instruction::LShr && User->hasOneUse() &&
569 isa<ConstantInt>(User->getOperand(1))) {
570 Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
571 User = User->use_back();
574 // At this point, User is a TruncInst, iff we encountered, trunc or
576 if (!isa<TruncInst>(User))
579 // The width of the type must be a power of 2 and greater than 8-bits.
580 // Otherwise the load cannot be represented in LLVM IR.
581 // Moreover, if we shifted with a non 8-bits multiple, the slice
582 // will be accross several bytes. We do not support that.
583 unsigned Width = User->getType()->getPrimitiveSizeInBits();
584 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
587 // Build the slice for this chain of computations.
588 LoadedSlice LS(User, &LI, Shift);
589 APInt CurrentUsedBits = LS.getUsedBits();
591 // Check if this slice overlaps with another.
592 if ((CurrentUsedBits & UsedBits) != 0)
594 // Update the bits used globally.
595 UsedBits |= CurrentUsedBits;
598 LoadedSlices.push_back(LS);
601 // Abort slicing if it does not seem to be profitable.
602 if (!isSlicingProfitable(LoadedSlices, UsedBits))
605 // Rewrite each chain to use an independent load.
606 // By construction, each chain can be represented by a unique load.
607 bool IsBigEndian = TD.isBigEndian();
608 for (SmallVectorImpl<LoadedSlice>::const_iterator LSIt = LoadedSlices.begin(),
609 LSItEnd = LoadedSlices.end(); LSIt != LSItEnd; ++LSIt) {
610 Instruction *SliceInst = LSIt->loadSlice(Builder, IsBigEndian);
612 DEBUG(dbgs() << "IC: Replacing " << *LSIt->Inst << "\n"
613 " with " << *SliceInst << '\n');
615 return 0; // Don't do anything with LI.
618 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
619 Value *Op = LI.getOperand(0);
621 // Attempt to improve the alignment.
623 unsigned KnownAlign =
624 getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
625 unsigned LoadAlign = LI.getAlignment();
626 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
627 TD->getABITypeAlignment(LI.getType());
629 if (KnownAlign > EffectiveLoadAlign)
630 LI.setAlignment(KnownAlign);
631 else if (LoadAlign == 0)
632 LI.setAlignment(EffectiveLoadAlign);
635 // load (cast X) --> cast (load X) iff safe.
636 if (isa<CastInst>(Op))
637 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
640 // None of the following transforms are legal for volatile/atomic loads.
641 // FIXME: Some of it is okay for atomic loads; needs refactoring.
642 if (!LI.isSimple()) return 0;
644 // Do really simple store-to-load forwarding and load CSE, to catch cases
645 // where there are several consecutive memory accesses to the same location,
646 // separated by a few arithmetic operations.
647 BasicBlock::iterator BBI = &LI;
648 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
649 return ReplaceInstUsesWith(LI, AvailableVal);
651 // load(gep null, ...) -> unreachable
652 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
653 const Value *GEPI0 = GEPI->getOperand(0);
654 // TODO: Consider a target hook for valid address spaces for this xform.
655 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
656 // Insert a new store to null instruction before the load to indicate
657 // that this code is not reachable. We do this instead of inserting
658 // an unreachable instruction directly because we cannot modify the
660 new StoreInst(UndefValue::get(LI.getType()),
661 Constant::getNullValue(Op->getType()), &LI);
662 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
666 // load null/undef -> unreachable
667 // TODO: Consider a target hook for valid address spaces for this xform.
668 if (isa<UndefValue>(Op) ||
669 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
670 // Insert a new store to null instruction before the load to indicate that
671 // this code is not reachable. We do this instead of inserting an
672 // unreachable instruction directly because we cannot modify the CFG.
673 new StoreInst(UndefValue::get(LI.getType()),
674 Constant::getNullValue(Op->getType()), &LI);
675 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
678 // Instcombine load (constantexpr_cast global) -> cast (load global)
679 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
681 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
684 if (Op->hasOneUse()) {
685 // Change select and PHI nodes to select values instead of addresses: this
686 // helps alias analysis out a lot, allows many others simplifications, and
687 // exposes redundancy in the code.
689 // Note that we cannot do the transformation unless we know that the
690 // introduced loads cannot trap! Something like this is valid as long as
691 // the condition is always false: load (select bool %C, int* null, int* %G),
692 // but it would not be valid if we transformed it to load from null
695 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
696 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
697 unsigned Align = LI.getAlignment();
698 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
699 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
700 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
701 SI->getOperand(1)->getName()+".val");
702 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
703 SI->getOperand(2)->getName()+".val");
704 V1->setAlignment(Align);
705 V2->setAlignment(Align);
706 return SelectInst::Create(SI->getCondition(), V1, V2);
709 // load (select (cond, null, P)) -> load P
710 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
711 if (C->isNullValue()) {
712 LI.setOperand(0, SI->getOperand(2));
716 // load (select (cond, P, null)) -> load P
717 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
718 if (C->isNullValue()) {
719 LI.setOperand(0, SI->getOperand(1));
725 // Try to split a load in smaller non-overlapping loads to expose independant
726 // chain of computations and get rid of trunc/lshr sequence of code.
727 // The data layout is required for that operation, as code generation will
728 // change with respect to endianess.
730 return sliceUpLoadInst(LI, *Builder, *TD);
734 /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
735 /// when possible. This makes it generally easy to do alias analysis and/or
736 /// SROA/mem2reg of the memory object.
737 static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
738 User *CI = cast<User>(SI.getOperand(1));
739 Value *CastOp = CI->getOperand(0);
741 Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
742 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
743 if (SrcTy == 0) return 0;
745 Type *SrcPTy = SrcTy->getElementType();
747 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
750 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
751 /// to its first element. This allows us to handle things like:
752 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
754 SmallVector<Value*, 4> NewGEPIndices;
756 // If the source is an array, the code below will not succeed. Check to
757 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for
759 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
760 // Index through pointer.
761 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
762 NewGEPIndices.push_back(Zero);
765 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
766 if (!STy->getNumElements()) /* Struct can be empty {} */
768 NewGEPIndices.push_back(Zero);
769 SrcPTy = STy->getElementType(0);
770 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
771 NewGEPIndices.push_back(Zero);
772 SrcPTy = ATy->getElementType();
778 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
781 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
784 // If the pointers point into different address spaces or if they point to
785 // values with different sizes, we can't do the transformation.
786 if (!IC.getDataLayout() ||
787 SrcTy->getAddressSpace() !=
788 cast<PointerType>(CI->getType())->getAddressSpace() ||
789 IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
790 IC.getDataLayout()->getTypeSizeInBits(DestPTy))
793 // Okay, we are casting from one integer or pointer type to another of
794 // the same size. Instead of casting the pointer before
795 // the store, cast the value to be stored.
797 Value *SIOp0 = SI.getOperand(0);
798 Instruction::CastOps opcode = Instruction::BitCast;
799 Type* CastSrcTy = SIOp0->getType();
800 Type* CastDstTy = SrcPTy;
801 if (CastDstTy->isPointerTy()) {
802 if (CastSrcTy->isIntegerTy())
803 opcode = Instruction::IntToPtr;
804 } else if (CastDstTy->isIntegerTy()) {
805 if (SIOp0->getType()->isPointerTy())
806 opcode = Instruction::PtrToInt;
809 // SIOp0 is a pointer to aggregate and this is a store to the first field,
810 // emit a GEP to index into its first field.
811 if (!NewGEPIndices.empty())
812 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
814 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
815 SIOp0->getName()+".c");
816 SI.setOperand(0, NewCast);
817 SI.setOperand(1, CastOp);
821 /// equivalentAddressValues - Test if A and B will obviously have the same
822 /// value. This includes recognizing that %t0 and %t1 will have the same
823 /// value in code like this:
824 /// %t0 = getelementptr \@a, 0, 3
825 /// store i32 0, i32* %t0
826 /// %t1 = getelementptr \@a, 0, 3
827 /// %t2 = load i32* %t1
829 static bool equivalentAddressValues(Value *A, Value *B) {
830 // Test if the values are trivially equivalent.
831 if (A == B) return true;
833 // Test if the values come form identical arithmetic instructions.
834 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
835 // its only used to compare two uses within the same basic block, which
836 // means that they'll always either have the same value or one of them
837 // will have an undefined value.
838 if (isa<BinaryOperator>(A) ||
841 isa<GetElementPtrInst>(A))
842 if (Instruction *BI = dyn_cast<Instruction>(B))
843 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
846 // Otherwise they may not be equivalent.
850 Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
851 Value *Val = SI.getOperand(0);
852 Value *Ptr = SI.getOperand(1);
854 // Attempt to improve the alignment.
856 unsigned KnownAlign =
857 getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
859 unsigned StoreAlign = SI.getAlignment();
860 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
861 TD->getABITypeAlignment(Val->getType());
863 if (KnownAlign > EffectiveStoreAlign)
864 SI.setAlignment(KnownAlign);
865 else if (StoreAlign == 0)
866 SI.setAlignment(EffectiveStoreAlign);
869 // Don't hack volatile/atomic stores.
870 // FIXME: Some bits are legal for atomic stores; needs refactoring.
871 if (!SI.isSimple()) return 0;
873 // If the RHS is an alloca with a single use, zapify the store, making the
875 if (Ptr->hasOneUse()) {
876 if (isa<AllocaInst>(Ptr))
877 return EraseInstFromFunction(SI);
878 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
879 if (isa<AllocaInst>(GEP->getOperand(0))) {
880 if (GEP->getOperand(0)->hasOneUse())
881 return EraseInstFromFunction(SI);
886 // Do really simple DSE, to catch cases where there are several consecutive
887 // stores to the same location, separated by a few arithmetic operations. This
888 // situation often occurs with bitfield accesses.
889 BasicBlock::iterator BBI = &SI;
890 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
893 // Don't count debug info directives, lest they affect codegen,
894 // and we skip pointer-to-pointer bitcasts, which are NOPs.
895 if (isa<DbgInfoIntrinsic>(BBI) ||
896 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
901 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
902 // Prev store isn't volatile, and stores to the same location?
903 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
907 EraseInstFromFunction(*PrevSI);
913 // If this is a load, we have to stop. However, if the loaded value is from
914 // the pointer we're loading and is producing the pointer we're storing,
915 // then *this* store is dead (X = load P; store X -> P).
916 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
917 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
919 return EraseInstFromFunction(SI);
921 // Otherwise, this is a load from some other location. Stores before it
926 // Don't skip over loads or things that can modify memory.
927 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
931 // store X, null -> turns into 'unreachable' in SimplifyCFG
932 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
933 if (!isa<UndefValue>(Val)) {
934 SI.setOperand(0, UndefValue::get(Val->getType()));
935 if (Instruction *U = dyn_cast<Instruction>(Val))
936 Worklist.Add(U); // Dropped a use.
938 return 0; // Do not modify these!
941 // store undef, Ptr -> noop
942 if (isa<UndefValue>(Val))
943 return EraseInstFromFunction(SI);
945 // If the pointer destination is a cast, see if we can fold the cast into the
947 if (isa<CastInst>(Ptr))
948 if (Instruction *Res = InstCombineStoreToCast(*this, SI))
950 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
952 if (Instruction *Res = InstCombineStoreToCast(*this, SI))
956 // If this store is the last instruction in the basic block (possibly
957 // excepting debug info instructions), and if the block ends with an
958 // unconditional branch, try to move it to the successor block.
962 } while (isa<DbgInfoIntrinsic>(BBI) ||
963 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
964 if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
965 if (BI->isUnconditional())
966 if (SimplifyStoreAtEndOfBlock(SI))
967 return 0; // xform done!
972 /// SimplifyStoreAtEndOfBlock - Turn things like:
973 /// if () { *P = v1; } else { *P = v2 }
974 /// into a phi node with a store in the successor.
976 /// Simplify things like:
977 /// *P = v1; if () { *P = v2; }
978 /// into a phi node with a store in the successor.
980 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
981 BasicBlock *StoreBB = SI.getParent();
983 // Check to see if the successor block has exactly two incoming edges. If
984 // so, see if the other predecessor contains a store to the same location.
985 // if so, insert a PHI node (if needed) and move the stores down.
986 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
988 // Determine whether Dest has exactly two predecessors and, if so, compute
989 // the other predecessor.
990 pred_iterator PI = pred_begin(DestBB);
992 BasicBlock *OtherBB = 0;
997 if (++PI == pred_end(DestBB))
1006 if (++PI != pred_end(DestBB))
1009 // Bail out if all the relevant blocks aren't distinct (this can happen,
1010 // for example, if SI is in an infinite loop)
1011 if (StoreBB == DestBB || OtherBB == DestBB)
1014 // Verify that the other block ends in a branch and is not otherwise empty.
1015 BasicBlock::iterator BBI = OtherBB->getTerminator();
1016 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1017 if (!OtherBr || BBI == OtherBB->begin())
1020 // If the other block ends in an unconditional branch, check for the 'if then
1021 // else' case. there is an instruction before the branch.
1022 StoreInst *OtherStore = 0;
1023 if (OtherBr->isUnconditional()) {
1025 // Skip over debugging info.
1026 while (isa<DbgInfoIntrinsic>(BBI) ||
1027 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1028 if (BBI==OtherBB->begin())
1032 // If this isn't a store, isn't a store to the same location, or is not the
1033 // right kind of store, bail out.
1034 OtherStore = dyn_cast<StoreInst>(BBI);
1035 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1036 !SI.isSameOperationAs(OtherStore))
1039 // Otherwise, the other block ended with a conditional branch. If one of the
1040 // destinations is StoreBB, then we have the if/then case.
1041 if (OtherBr->getSuccessor(0) != StoreBB &&
1042 OtherBr->getSuccessor(1) != StoreBB)
1045 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1046 // if/then triangle. See if there is a store to the same ptr as SI that
1047 // lives in OtherBB.
1049 // Check to see if we find the matching store.
1050 if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1051 if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1052 !SI.isSameOperationAs(OtherStore))
1056 // If we find something that may be using or overwriting the stored
1057 // value, or if we run out of instructions, we can't do the xform.
1058 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
1059 BBI == OtherBB->begin())
1063 // In order to eliminate the store in OtherBr, we have to
1064 // make sure nothing reads or overwrites the stored value in
1066 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1067 // FIXME: This should really be AA driven.
1068 if (I->mayReadFromMemory() || I->mayWriteToMemory())
1073 // Insert a PHI node now if we need it.
1074 Value *MergedVal = OtherStore->getOperand(0);
1075 if (MergedVal != SI.getOperand(0)) {
1076 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1077 PN->addIncoming(SI.getOperand(0), SI.getParent());
1078 PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1079 MergedVal = InsertNewInstBefore(PN, DestBB->front());
1082 // Advance to a place where it is safe to insert the new store and
1084 BBI = DestBB->getFirstInsertionPt();
1085 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1089 SI.getSynchScope());
1090 InsertNewInstBefore(NewSI, *BBI);
1091 NewSI->setDebugLoc(OtherStore->getDebugLoc());
1093 // If the two stores had the same TBAA tag, preserve it.
1094 if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
1095 if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
1096 OtherStore->getMetadata(LLVMContext::MD_tbaa))))
1097 NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
1100 // Nuke the old stores.
1101 EraseInstFromFunction(SI);
1102 EraseInstFromFunction(*OtherStore);