lib/CodeGen/CodeGenPrepare.cpp

   1 //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass munges the code in the input function to better prepare it for
  11 // SelectionDAG-based code generation. This works around limitations in it's
  12 // basic-block-at-a-time approach. It should eventually be removed.
  13 //
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "llvm/CodeGen/Passes.h"
  17 #include "llvm/ADT/DenseMap.h"
  18 #include "llvm/ADT/SmallSet.h"
  19 #include "llvm/ADT/Statistic.h"
  20 #include "llvm/Analysis/InstructionSimplify.h"
  21 #include "llvm/Analysis/TargetLibraryInfo.h"
  22 #include "llvm/Analysis/TargetTransformInfo.h"
  23 #include "llvm/IR/CallSite.h"
  24 #include "llvm/IR/Constants.h"
  25 #include "llvm/IR/DataLayout.h"
  26 #include "llvm/IR/DerivedTypes.h"
  27 #include "llvm/IR/Dominators.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/GetElementPtrTypeIterator.h"
  30 #include "llvm/IR/IRBuilder.h"
  31 #include "llvm/IR/InlineAsm.h"
  32 #include "llvm/IR/Instructions.h"
  33 #include "llvm/IR/IntrinsicInst.h"
  34 #include "llvm/IR/MDBuilder.h"
  35 #include "llvm/IR/PatternMatch.h"
  36 #include "llvm/IR/Statepoint.h"
  37 #include "llvm/IR/ValueHandle.h"
  38 #include "llvm/IR/ValueMap.h"
  39 #include "llvm/Pass.h"
  40 #include "llvm/Support/CommandLine.h"
  41 #include "llvm/Support/Debug.h"
  42 #include "llvm/Support/raw_ostream.h"
  43 #include "llvm/Target/TargetLowering.h"
  44 #include "llvm/Target/TargetSubtargetInfo.h"
  45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  46 #include "llvm/Transforms/Utils/BuildLibCalls.h"
  47 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
  48 #include "llvm/Transforms/Utils/Local.h"
  49 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
  50 using namespace llvm;
  51 using namespace llvm::PatternMatch;
  52
  53 #define DEBUG_TYPE "codegenprepare"
  54
  55 STATISTIC(NumBlocksElim, "Number of blocks eliminated");
  56 STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
  57 STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
  58 STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
  59                       "sunken Cmps");
  60 STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
  61                        "of sunken Casts");
  62 STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
  63                           "computations were sunk");
  64 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
  65 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
  66 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
  67 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
  68 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
  69 STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
  70 STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
  71
  72 static cl::opt<bool> DisableBranchOpts(
  73   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
  74   cl::desc("Disable branch optimizations in CodeGenPrepare"));
  75
  76 static cl::opt<bool>
  77     DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
  78                   cl::desc("Disable GC optimizations in CodeGenPrepare"));
  79
  80 static cl::opt<bool> DisableSelectToBranch(
  81   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
  82   cl::desc("Disable select to branch conversion."));
  83
  84 static cl::opt<bool> AddrSinkUsingGEPs(
  85   "addr-sink-using-gep", cl::Hidden, cl::init(false),
  86   cl::desc("Address sinking in CGP using GEPs."));
  87
  88 static cl::opt<bool> EnableAndCmpSinking(
  89    "enable-andcmp-sinking", cl::Hidden, cl::init(true),
  90    cl::desc("Enable sinkinig and/cmp into branches."));
  91
  92 static cl::opt<bool> DisableStoreExtract(
  93     "disable-cgp-store-extract", cl::Hidden, cl::init(false),
  94     cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
  95
  96 static cl::opt<bool> StressStoreExtract(
  97     "stress-cgp-store-extract", cl::Hidden, cl::init(false),
  98     cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
  99
 100 static cl::opt<bool> DisableExtLdPromotion(
 101     "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
 102     cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
 103              "CodeGenPrepare"));
 104
 105 static cl::opt<bool> StressExtLdPromotion(
 106     "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
 107     cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
 108              "optimization in CodeGenPrepare"));
 109
 110 namespace {
 111 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
 112 struct TypeIsSExt {
 113   Type *Ty;
 114   bool IsSExt;
 115   TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
 116 };
 117 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
 118 class TypePromotionTransaction;
 119
 120   class CodeGenPrepare : public FunctionPass {
 121     /// TLI - Keep a pointer of a TargetLowering to consult for determining
 122     /// transformation profitability.
 123     const TargetMachine *TM;
 124     const TargetLowering *TLI;
 125     const TargetTransformInfo *TTI;
 126     const TargetLibraryInfo *TLInfo;
 127
 128     /// CurInstIterator - As we scan instructions optimizing them, this is the
 129     /// next instruction to optimize.  Xforms that can invalidate this should
 130     /// update it.
 131     BasicBlock::iterator CurInstIterator;
 132
 133     /// Keeps track of non-local addresses that have been sunk into a block.
 134     /// This allows us to avoid inserting duplicate code for blocks with
 135     /// multiple load/stores of the same address.
 136     ValueMap<Value*, Value*> SunkAddrs;
 137
 138     /// Keeps track of all truncates inserted for the current function.
 139     SetOfInstrs InsertedTruncsSet;
 140     /// Keeps track of the type of the related instruction before their
 141     /// promotion for the current function.
 142     InstrToOrigTy PromotedInsts;
 143
 144     /// ModifiedDT - If CFG is modified in anyway.
 145     bool ModifiedDT;
 146
 147     /// OptSize - True if optimizing for size.
 148     bool OptSize;
 149
 150   public:
 151     static char ID; // Pass identification, replacement for typeid
 152     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
 153         : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
 154         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
 155       }
 156     bool runOnFunction(Function &F) override;
 157
 158     const char *getPassName() const override { return "CodeGen Prepare"; }
 159
 160     void getAnalysisUsage(AnalysisUsage &AU) const override {
 161       AU.addPreserved<DominatorTreeWrapperPass>();
 162       AU.addRequired<TargetLibraryInfoWrapperPass>();
 163       AU.addRequired<TargetTransformInfoWrapperPass>();
 164     }
 165
 166   private:
 167     bool EliminateFallThrough(Function &F);
 168     bool EliminateMostlyEmptyBlocks(Function &F);
 169     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
 170     void EliminateMostlyEmptyBlock(BasicBlock *BB);
 171     bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
 172     bool OptimizeInst(Instruction *I, bool& ModifiedDT);
 173     bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
 174     bool OptimizeInlineAsmInst(CallInst *CS);
 175     bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
 176     bool MoveExtToFormExtLoad(Instruction *&I);
 177     bool OptimizeExtUses(Instruction *I);
 178     bool OptimizeSelectInst(SelectInst *SI);
 179     bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
 180     bool OptimizeExtractElementInst(Instruction *Inst);
 181     bool DupRetToEnableTailCallOpts(BasicBlock *BB);
 182     bool PlaceDbgValues(Function &F);
 183     bool sinkAndCmp(Function &F);
 184     bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
 185                         Instruction *&Inst,
 186                         const SmallVectorImpl<Instruction *> &Exts,
 187                         unsigned CreatedInstCost);
 188     bool splitBranchCondition(Function &F);
 189     bool simplifyOffsetableRelocate(Instruction &I);
 190   };
 191 }
 192
 193 char CodeGenPrepare::ID = 0;
 194 INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
 195                    "Optimize for code generation", false, false)
 196
 197 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
 198   return new CodeGenPrepare(TM);
 199 }
 200
 201 bool CodeGenPrepare::runOnFunction(Function &F) {
 202   if (skipOptnoneFunction(F))
 203     return false;
 204
 205   bool EverMadeChange = false;
 206   // Clear per function information.
 207   InsertedTruncsSet.clear();
 208   PromotedInsts.clear();
 209
 210   ModifiedDT = false;
 211   if (TM)
 212     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
 213   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 214   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 215   OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
 216
 217   /// This optimization identifies DIV instructions that can be
 218   /// profitably bypassed and carried out with a shorter, faster divide.
 219   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
 220     const DenseMap<unsigned int, unsigned int> &BypassWidths =
 221        TLI->getBypassSlowDivWidths();
 222     for (Function::iterator I = F.begin(); I != F.end(); I++)
 223       EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
 224   }
 225
 226   // Eliminate blocks that contain only PHI nodes and an
 227   // unconditional branch.
 228   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
 229
 230   // llvm.dbg.value is far away from the value then iSel may not be able
 231   // handle it properly. iSel will drop llvm.dbg.value if it can not
 232   // find a node corresponding to the value.
 233   EverMadeChange |= PlaceDbgValues(F);
 234
 235   // If there is a mask, compare against zero, and branch that can be combined
 236   // into a single target instruction, push the mask and compare into branch
 237   // users. Do this before OptimizeBlock -> OptimizeInst ->
 238   // OptimizeCmpExpression, which perturbs the pattern being searched for.
 239   if (!DisableBranchOpts) {
 240     EverMadeChange |= sinkAndCmp(F);
 241     EverMadeChange |= splitBranchCondition(F);
 242   }
 243
 244   bool MadeChange = true;
 245   while (MadeChange) {
 246     MadeChange = false;
 247     for (Function::iterator I = F.begin(); I != F.end(); ) {
 248       BasicBlock *BB = I++;
 249       bool ModifiedDTOnIteration = false;
 250       MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
 251
 252       // Restart BB iteration if the dominator tree of the Function was changed
 253       if (ModifiedDTOnIteration)
 254         break;
 255     }
 256     EverMadeChange |= MadeChange;
 257   }
 258
 259   SunkAddrs.clear();
 260
 261   if (!DisableBranchOpts) {
 262     MadeChange = false;
 263     SmallPtrSet<BasicBlock*, 8> WorkList;
 264     for (BasicBlock &BB : F) {
 265       SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
 266       MadeChange |= ConstantFoldTerminator(&BB, true);
 267       if (!MadeChange) continue;
 268
 269       for (SmallVectorImpl<BasicBlock*>::iterator
 270              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
 271         if (pred_begin(*II) == pred_end(*II))
 272           WorkList.insert(*II);
 273     }
 274
 275     // Delete the dead blocks and any of their dead successors.
 276     MadeChange |= !WorkList.empty();
 277     while (!WorkList.empty()) {
 278       BasicBlock *BB = *WorkList.begin();
 279       WorkList.erase(BB);
 280       SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
 281
 282       DeleteDeadBlock(BB);
 283
 284       for (SmallVectorImpl<BasicBlock*>::iterator
 285              II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
 286         if (pred_begin(*II) == pred_end(*II))
 287           WorkList.insert(*II);
 288     }
 289
 290     // Merge pairs of basic blocks with unconditional branches, connected by
 291     // a single edge.
 292     if (EverMadeChange || MadeChange)
 293       MadeChange |= EliminateFallThrough(F);
 294
 295     EverMadeChange |= MadeChange;
 296   }
 297
 298   if (!DisableGCOpts) {
 299     SmallVector<Instruction *, 2> Statepoints;
 300     for (BasicBlock &BB : F)
 301       for (Instruction &I : BB)
 302         if (isStatepoint(I))
 303           Statepoints.push_back(&I);
 304     for (auto &I : Statepoints)
 305       EverMadeChange |= simplifyOffsetableRelocate(*I);
 306   }
 307
 308   return EverMadeChange;
 309 }
 310
 311 /// EliminateFallThrough - Merge basic blocks which are connected
 312 /// by a single edge, where one of the basic blocks has a single successor
 313 /// pointing to the other basic block, which has a single predecessor.
 314 bool CodeGenPrepare::EliminateFallThrough(Function &F) {
 315   bool Changed = false;
 316   // Scan all of the blocks in the function, except for the entry block.
 317   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
 318     BasicBlock *BB = I++;
 319     // If the destination block has a single pred, then this is a trivial
 320     // edge, just collapse it.
 321     BasicBlock *SinglePred = BB->getSinglePredecessor();
 322
 323     // Don't merge if BB's address is taken.
 324     if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
 325
 326     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
 327     if (Term && !Term->isConditional()) {
 328       Changed = true;
 329       DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
 330       // Remember if SinglePred was the entry block of the function.
 331       // If so, we will need to move BB back to the entry position.
 332       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
 333       MergeBasicBlockIntoOnlyPred(BB, nullptr);
 334
 335       if (isEntry && BB != &BB->getParent()->getEntryBlock())
 336         BB->moveBefore(&BB->getParent()->getEntryBlock());
 337
 338       // We have erased a block. Update the iterator.
 339       I = BB;
 340     }
 341   }
 342   return Changed;
 343 }
 344
 345 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
 346 /// debug info directives, and an unconditional branch.  Passes before isel
 347 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
 348 /// isel.  Start by eliminating these blocks so we can split them the way we
 349 /// want them.
 350 bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
 351   bool MadeChange = false;
 352   // Note that this intentionally skips the entry block.
 353   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
 354     BasicBlock *BB = I++;
 355
 356     // If this block doesn't end with an uncond branch, ignore it.
 357     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
 358     if (!BI || !BI->isUnconditional())
 359       continue;
 360
 361     // If the instruction before the branch (skipping debug info) isn't a phi
 362     // node, then other stuff is happening here.
 363     BasicBlock::iterator BBI = BI;
 364     if (BBI != BB->begin()) {
 365       --BBI;
 366       while (isa<DbgInfoIntrinsic>(BBI)) {
 367         if (BBI == BB->begin())
 368           break;
 369         --BBI;
 370       }
 371       if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
 372         continue;
 373     }
 374
 375     // Do not break infinite loops.
 376     BasicBlock *DestBB = BI->getSuccessor(0);
 377     if (DestBB == BB)
 378       continue;
 379
 380     if (!CanMergeBlocks(BB, DestBB))
 381       continue;
 382
 383     EliminateMostlyEmptyBlock(BB);
 384     MadeChange = true;
 385   }
 386   return MadeChange;
 387 }
 388
 389 /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
 390 /// single uncond branch between them, and BB contains no other non-phi
 391 /// instructions.
 392 bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
 393                                     const BasicBlock *DestBB) const {
 394   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
 395   // the successor.  If there are more complex condition (e.g. preheaders),
 396   // don't mess around with them.
 397   BasicBlock::const_iterator BBI = BB->begin();
 398   while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
 399     for (const User *U : PN->users()) {
 400       const Instruction *UI = cast<Instruction>(U);
 401       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
 402         return false;
 403       // If User is inside DestBB block and it is a PHINode then check
 404       // incoming value. If incoming value is not from BB then this is
 405       // a complex condition (e.g. preheaders) we want to avoid here.
 406       if (UI->getParent() == DestBB) {
 407         if (const PHINode *UPN = dyn_cast<PHINode>(UI))
 408           for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
 409             Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
 410             if (Insn && Insn->getParent() == BB &&
 411                 Insn->getParent() != UPN->getIncomingBlock(I))
 412               return false;
 413           }
 414       }
 415     }
 416   }
 417
 418   // If BB and DestBB contain any common predecessors, then the phi nodes in BB
 419   // and DestBB may have conflicting incoming values for the block.  If so, we
 420   // can't merge the block.
 421   const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
 422   if (!DestBBPN) return true;  // no conflict.
 423
 424   // Collect the preds of BB.
 425   SmallPtrSet<const BasicBlock*, 16> BBPreds;
 426   if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
 427     // It is faster to get preds from a PHI than with pred_iterator.
 428     for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
 429       BBPreds.insert(BBPN->getIncomingBlock(i));
 430   } else {
 431     BBPreds.insert(pred_begin(BB), pred_end(BB));
 432   }
 433
 434   // Walk the preds of DestBB.
 435   for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
 436     BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
 437     if (BBPreds.count(Pred)) {   // Common predecessor?
 438       BBI = DestBB->begin();
 439       while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
 440         const Value *V1 = PN->getIncomingValueForBlock(Pred);
 441         const Value *V2 = PN->getIncomingValueForBlock(BB);
 442
 443         // If V2 is a phi node in BB, look up what the mapped value will be.
 444         if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
 445           if (V2PN->getParent() == BB)
 446             V2 = V2PN->getIncomingValueForBlock(Pred);
 447
 448         // If there is a conflict, bail out.
 449         if (V1 != V2) return false;
 450       }
 451     }
 452   }
 453
 454   return true;
 455 }
 456
 457
 458 /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
 459 /// an unconditional branch in it.
 460 void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
 461   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
 462   BasicBlock *DestBB = BI->getSuccessor(0);
 463
 464   DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
 465
 466   // If the destination block has a single pred, then this is a trivial edge,
 467   // just collapse it.
 468   if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
 469     if (SinglePred != DestBB) {
 470       // Remember if SinglePred was the entry block of the function.  If so, we
 471       // will need to move BB back to the entry position.
 472       bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
 473       MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
 474
 475       if (isEntry && BB != &BB->getParent()->getEntryBlock())
 476         BB->moveBefore(&BB->getParent()->getEntryBlock());
 477
 478       DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 479       return;
 480     }
 481   }
 482
 483   // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
 484   // to handle the new incoming edges it is about to have.
 485   PHINode *PN;
 486   for (BasicBlock::iterator BBI = DestBB->begin();
 487        (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
 488     // Remove the incoming value for BB, and remember it.
 489     Value *InVal = PN->removeIncomingValue(BB, false);
 490
 491     // Two options: either the InVal is a phi node defined in BB or it is some
 492     // value that dominates BB.
 493     PHINode *InValPhi = dyn_cast<PHINode>(InVal);
 494     if (InValPhi && InValPhi->getParent() == BB) {
 495       // Add all of the input values of the input PHI as inputs of this phi.
 496       for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
 497         PN->addIncoming(InValPhi->getIncomingValue(i),
 498                         InValPhi->getIncomingBlock(i));
 499     } else {
 500       // Otherwise, add one instance of the dominating value for each edge that
 501       // we will be adding.
 502       if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
 503         for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
 504           PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
 505       } else {
 506         for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
 507           PN->addIncoming(InVal, *PI);
 508       }
 509     }
 510   }
 511
 512   // The PHIs are now updated, change everything that refers to BB to use
 513   // DestBB and remove BB.
 514   BB->replaceAllUsesWith(DestBB);
 515   BB->eraseFromParent();
 516   ++NumBlocksElim;
 517
 518   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 519 }
 520
 521 // Computes a map of base pointer relocation instructions to corresponding
 522 // derived pointer relocation instructions given a vector of all relocate calls
 523 static void computeBaseDerivedRelocateMap(
 524     const SmallVectorImpl<User *> &AllRelocateCalls,
 525     DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
 526         RelocateInstMap) {
 527   // Collect information in two maps: one primarily for locating the base object
 528   // while filling the second map; the second map is the final structure holding
 529   // a mapping between Base and corresponding Derived relocate calls
 530   DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
 531   for (auto &U : AllRelocateCalls) {
 532     GCRelocateOperands ThisRelocate(U);
 533     IntrinsicInst *I = cast<IntrinsicInst>(U);
 534     auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
 535                             ThisRelocate.getDerivedPtrIndex());
 536     RelocateIdxMap.insert(std::make_pair(K, I));
 537   }
 538   for (auto &Item : RelocateIdxMap) {
 539     std::pair<unsigned, unsigned> Key = Item.first;
 540     if (Key.first == Key.second)
 541       // Base relocation: nothing to insert
 542       continue;
 543
 544     IntrinsicInst *I = Item.second;
 545     auto BaseKey = std::make_pair(Key.first, Key.first);
 546
 547     // We're iterating over RelocateIdxMap so we cannot modify it.
 548     auto MaybeBase = RelocateIdxMap.find(BaseKey);
 549     if (MaybeBase == RelocateIdxMap.end())
 550       // TODO: We might want to insert a new base object relocate and gep off
 551       // that, if there are enough derived object relocates.
 552       continue;
 553
 554     RelocateInstMap[MaybeBase->second].push_back(I);
 555   }
 556 }
 557
 558 // Accepts a GEP and extracts the operands into a vector provided they're all
 559 // small integer constants
 560 static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
 561                                           SmallVectorImpl<Value *> &OffsetV) {
 562   for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
 563     // Only accept small constant integer operands
 564     auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
 565     if (!Op || Op->getZExtValue() > 20)
 566       return false;
 567   }
 568
 569   for (unsigned i = 1; i < GEP->getNumOperands(); i++)
 570     OffsetV.push_back(GEP->getOperand(i));
 571   return true;
 572 }
 573
 574 // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
 575 // replace, computes a replacement, and affects it.
 576 static bool
 577 simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
 578                           const SmallVectorImpl<IntrinsicInst *> &Targets) {
 579   bool MadeChange = false;
 580   for (auto &ToReplace : Targets) {
 581     GCRelocateOperands MasterRelocate(RelocatedBase);
 582     GCRelocateOperands ThisRelocate(ToReplace);
 583
 584     assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
 585            "Not relocating a derived object of the original base object");
 586     if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
 587       // A duplicate relocate call. TODO: coalesce duplicates.
 588       continue;
 589     }
 590
 591     Value *Base = ThisRelocate.getBasePtr();
 592     auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
 593     if (!Derived || Derived->getPointerOperand() != Base)
 594       continue;
 595
 596     SmallVector<Value *, 2> OffsetV;
 597     if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
 598       continue;
 599
 600     // Create a Builder and replace the target callsite with a gep
 601     IRBuilder<> Builder(ToReplace);
 602     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
 603
 604     // If gc_relocate does not match the actual type, cast it to the right type.
 605     // In theory, there must be a bitcast after gc_relocate if the type does not
 606     // match, and we should reuse it to get the derived pointer. But it could be
 607     // cases like this:
 608     // bb1:
 609     //  ...
 610     //  %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
 611     //  br label %merge
 612     //
 613     // bb2:
 614     //  ...
 615     //  %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
 616     //  br label %merge
 617     //
 618     // merge:
 619     //  %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
 620     //  %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
 621     //
 622     // In this case, we can not find the bitcast any more. So we insert a new bitcast
 623     // no matter there is already one or not. In this way, we can handle all cases, and
 624     // the extra bitcast should be optimized away in later passes.
 625     Instruction *ActualRelocatedBase = RelocatedBase;
 626     if (RelocatedBase->getType() != Base->getType()) {
 627       ActualRelocatedBase =
 628           cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
 629       ActualRelocatedBase->removeFromParent();
 630       ActualRelocatedBase->insertAfter(cast<Instruction>(RelocatedBase));
 631     }
 632     Value *Replacement = Builder.CreateGEP(
 633         Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
 634     Instruction *ReplacementInst = cast<Instruction>(Replacement);
 635     ReplacementInst->removeFromParent();
 636     ReplacementInst->insertAfter(ActualRelocatedBase);
 637     Replacement->takeName(ToReplace);
 638     // If the newly generated derived pointer's type does not match the original derived
 639     // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
 640     Instruction *ActualReplacement = ReplacementInst;
 641     if (ReplacementInst->getType() != ToReplace->getType()) {
 642       ActualReplacement =
 643           cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
 644       ActualReplacement->removeFromParent();
 645       ActualReplacement->insertAfter(ReplacementInst);
 646     }
 647     ToReplace->replaceAllUsesWith(ActualReplacement);
 648     ToReplace->eraseFromParent();
 649
 650     MadeChange = true;
 651   }
 652   return MadeChange;
 653 }
 654
 655 // Turns this:
 656 //
 657 // %base = ...
 658 // %ptr = gep %base + 15
 659 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
 660 // %base' = relocate(%tok, i32 4, i32 4)
 661 // %ptr' = relocate(%tok, i32 4, i32 5)
 662 // %val = load %ptr'
 663 //
 664 // into this:
 665 //
 666 // %base = ...
 667 // %ptr = gep %base + 15
 668 // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
 669 // %base' = gc.relocate(%tok, i32 4, i32 4)
 670 // %ptr' = gep %base' + 15
 671 // %val = load %ptr'
 672 bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
 673   bool MadeChange = false;
 674   SmallVector<User *, 2> AllRelocateCalls;
 675
 676   for (auto *U : I.users())
 677     if (isGCRelocate(dyn_cast<Instruction>(U)))
 678       // Collect all the relocate calls associated with a statepoint
 679       AllRelocateCalls.push_back(U);
 680
 681   // We need atleast one base pointer relocation + one derived pointer
 682   // relocation to mangle
 683   if (AllRelocateCalls.size() < 2)
 684     return false;
 685
 686   // RelocateInstMap is a mapping from the base relocate instruction to the
 687   // corresponding derived relocate instructions
 688   DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
 689   computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
 690   if (RelocateInstMap.empty())
 691     return false;
 692
 693   for (auto &Item : RelocateInstMap)
 694     // Item.first is the RelocatedBase to offset against
 695     // Item.second is the vector of Targets to replace
 696     MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
 697   return MadeChange;
 698 }
 699
 700 /// SinkCast - Sink the specified cast instruction into its user blocks
 701 static bool SinkCast(CastInst *CI) {
 702   BasicBlock *DefBB = CI->getParent();
 703
 704   /// InsertedCasts - Only insert a cast in each block once.
 705   DenseMap<BasicBlock*, CastInst*> InsertedCasts;
 706
 707   bool MadeChange = false;
 708   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
 709        UI != E; ) {
 710     Use &TheUse = UI.getUse();
 711     Instruction *User = cast<Instruction>(*UI);
 712
 713     // Figure out which BB this cast is used in.  For PHI's this is the
 714     // appropriate predecessor block.
 715     BasicBlock *UserBB = User->getParent();
 716     if (PHINode *PN = dyn_cast<PHINode>(User)) {
 717       UserBB = PN->getIncomingBlock(TheUse);
 718     }
 719
 720     // Preincrement use iterator so we don't invalidate it.
 721     ++UI;
 722
 723     // If this user is in the same block as the cast, don't change the cast.
 724     if (UserBB == DefBB) continue;
 725
 726     // If we have already inserted a cast into this block, use it.
 727     CastInst *&InsertedCast = InsertedCasts[UserBB];
 728
 729     if (!InsertedCast) {
 730       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
 731       InsertedCast =
 732         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
 733                          InsertPt);
 734     }
 735
 736     // Replace a use of the cast with a use of the new cast.
 737     TheUse = InsertedCast;
 738     MadeChange = true;
 739     ++NumCastUses;
 740   }
 741
 742   // If we removed all uses, nuke the cast.
 743   if (CI->use_empty()) {
 744     CI->eraseFromParent();
 745     MadeChange = true;
 746   }
 747
 748   return MadeChange;
 749 }
 750
 751 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
 752 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
 753 /// sink it into user blocks to reduce the number of virtual
 754 /// registers that must be created and coalesced.
 755 ///
 756 /// Return true if any changes are made.
 757 ///
 758 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
 759   // If this is a noop copy,
 760   EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
 761   EVT DstVT = TLI.getValueType(CI->getType());
 762
 763   // This is an fp<->int conversion?
 764   if (SrcVT.isInteger() != DstVT.isInteger())
 765     return false;
 766
 767   // If this is an extension, it will be a zero or sign extension, which
 768   // isn't a noop.
 769   if (SrcVT.bitsLT(DstVT)) return false;
 770
 771   // If these values will be promoted, find out what they will be promoted
 772   // to.  This helps us consider truncates on PPC as noop copies when they
 773   // are.
 774   if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
 775       TargetLowering::TypePromoteInteger)
 776     SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
 777   if (TLI.getTypeAction(CI->getContext(), DstVT) ==
 778       TargetLowering::TypePromoteInteger)
 779     DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
 780
 781   // If, after promotion, these are the same types, this is a noop copy.
 782   if (SrcVT != DstVT)
 783     return false;
 784
 785   return SinkCast(CI);
 786 }
 787
 788 /// CombineUAddWithOverflow - try to combine CI into a call to the
 789 /// llvm.uadd.with.overflow intrinsic if possible.
 790 ///
 791 /// Return true if any changes were made.
 792 static bool CombineUAddWithOverflow(CmpInst *CI) {
 793   Value *A, *B;
 794   Instruction *AddI;
 795   if (!match(CI,
 796              m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
 797     return false;
 798
 799   Type *Ty = AddI->getType();
 800   if (!isa<IntegerType>(Ty))
 801     return false;
 802
 803   // We don't want to move around uses of condition values this late, so we we
 804   // check if it is legal to create the call to the intrinsic in the basic
 805   // block containing the icmp:
 806
 807   if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
 808     return false;
 809
 810 #ifndef NDEBUG
 811   // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
 812   // for now:
 813   if (AddI->hasOneUse())
 814     assert(*AddI->user_begin() == CI && "expected!");
 815 #endif
 816
 817   Module *M = CI->getParent()->getParent()->getParent();
 818   Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
 819
 820   auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
 821
 822   auto *UAddWithOverflow =
 823       CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
 824   auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
 825   auto *Overflow =
 826       ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
 827
 828   CI->replaceAllUsesWith(Overflow);
 829   AddI->replaceAllUsesWith(UAdd);
 830   CI->eraseFromParent();
 831   AddI->eraseFromParent();
 832   return true;
 833 }
 834
 835 /// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 836 /// the number of virtual registers that must be created and coalesced.  This is
 837 /// a clear win except on targets with multiple condition code registers
 838 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 839 ///
 840 /// Return true if any changes are made.
 841 static bool SinkCmpExpression(CmpInst *CI) {
 842   BasicBlock *DefBB = CI->getParent();
 843
 844   /// InsertedCmp - Only insert a cmp in each block once.
 845   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
 846
 847   bool MadeChange = false;
 848   for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
 849        UI != E; ) {
 850     Use &TheUse = UI.getUse();
 851     Instruction *User = cast<Instruction>(*UI);
 852
 853     // Preincrement use iterator so we don't invalidate it.
 854     ++UI;
 855
 856     // Don't bother for PHI nodes.
 857     if (isa<PHINode>(User))
 858       continue;
 859
 860     // Figure out which BB this cmp is used in.
 861     BasicBlock *UserBB = User->getParent();
 862
 863     // If this user is in the same block as the cmp, don't change the cmp.
 864     if (UserBB == DefBB) continue;
 865
 866     // If we have already inserted a cmp into this block, use it.
 867     CmpInst *&InsertedCmp = InsertedCmps[UserBB];
 868
 869     if (!InsertedCmp) {
 870       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
 871       InsertedCmp =
 872         CmpInst::Create(CI->getOpcode(),
 873                         CI->getPredicate(),  CI->getOperand(0),
 874                         CI->getOperand(1), "", InsertPt);
 875     }
 876
 877     // Replace a use of the cmp with a use of the new cmp.
 878     TheUse = InsertedCmp;
 879     MadeChange = true;
 880     ++NumCmpUses;
 881   }
 882
 883   // If we removed all uses, nuke the cmp.
 884   if (CI->use_empty()) {
 885     CI->eraseFromParent();
 886     MadeChange = true;
 887   }
 888
 889   return MadeChange;
 890 }
 891
 892 static bool OptimizeCmpExpression(CmpInst *CI) {
 893   if (SinkCmpExpression(CI))
 894     return true;
 895
 896   if (CombineUAddWithOverflow(CI))
 897     return true;
 898
 899   return false;
 900 }
 901
 902 /// isExtractBitsCandidateUse - Check if the candidates could
 903 /// be combined with shift instruction, which includes:
 904 /// 1. Truncate instruction
 905 /// 2. And instruction and the imm is a mask of the low bits:
 906 /// imm & (imm+1) == 0
 907 static bool isExtractBitsCandidateUse(Instruction *User) {
 908   if (!isa<TruncInst>(User)) {
 909     if (User->getOpcode() != Instruction::And ||
 910         !isa<ConstantInt>(User->getOperand(1)))
 911       return false;
 912
 913     const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
 914
 915     if ((Cimm & (Cimm + 1)).getBoolValue())
 916       return false;
 917   }
 918   return true;
 919 }
 920
 921 /// SinkShiftAndTruncate - sink both shift and truncate instruction
 922 /// to the use of truncate's BB.
 923 static bool
 924 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
 925                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
 926                      const TargetLowering &TLI) {
 927   BasicBlock *UserBB = User->getParent();
 928   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
 929   TruncInst *TruncI = dyn_cast<TruncInst>(User);
 930   bool MadeChange = false;
 931
 932   for (Value::user_iterator TruncUI = TruncI->user_begin(),
 933                             TruncE = TruncI->user_end();
 934        TruncUI != TruncE;) {
 935
 936     Use &TruncTheUse = TruncUI.getUse();
 937     Instruction *TruncUser = cast<Instruction>(*TruncUI);
 938     // Preincrement use iterator so we don't invalidate it.
 939
 940     ++TruncUI;
 941
 942     int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
 943     if (!ISDOpcode)
 944       continue;
 945
 946     // If the use is actually a legal node, there will not be an
 947     // implicit truncate.
 948     // FIXME: always querying the result type is just an
 949     // approximation; some nodes' legality is determined by the
 950     // operand or other means. There's no good way to find out though.
 951     if (TLI.isOperationLegalOrCustom(
 952             ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
 953       continue;
 954
 955     // Don't bother for PHI nodes.
 956     if (isa<PHINode>(TruncUser))
 957       continue;
 958
 959     BasicBlock *TruncUserBB = TruncUser->getParent();
 960
 961     if (UserBB == TruncUserBB)
 962       continue;
 963
 964     BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
 965     CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
 966
 967     if (!InsertedShift && !InsertedTrunc) {
 968       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
 969       // Sink the shift
 970       if (ShiftI->getOpcode() == Instruction::AShr)
 971         InsertedShift =
 972             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
 973       else
 974         InsertedShift =
 975             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
 976
 977       // Sink the trunc
 978       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
 979       TruncInsertPt++;
 980
 981       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
 982                                        TruncI->getType(), "", TruncInsertPt);
 983
 984       MadeChange = true;
 985
 986       TruncTheUse = InsertedTrunc;
 987     }
 988   }
 989   return MadeChange;
 990 }
 991
 992 /// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
 993 /// the uses could potentially be combined with this shift instruction and
 994 /// generate BitExtract instruction. It will only be applied if the architecture
 995 /// supports BitExtract instruction. Here is an example:
 996 /// BB1:
 997 ///   %x.extract.shift = lshr i64 %arg1, 32
 998 /// BB2:
 999 ///   %x.extract.trunc = trunc i64 %x.extract.shift to i16
1000 /// ==>
1001 ///
1002 /// BB2:
1003 ///   %x.extract.shift.1 = lshr i64 %arg1, 32
1004 ///   %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1005 ///
1006 /// CodeGen will recoginze the pattern in BB2 and generate BitExtract
1007 /// instruction.
1008 /// Return true if any changes are made.
1009 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1010                                 const TargetLowering &TLI) {
1011   BasicBlock *DefBB = ShiftI->getParent();
1012
1013   /// Only insert instructions in each block once.
1014   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1015
1016   bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
1017
1018   bool MadeChange = false;
1019   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1020        UI != E;) {
1021     Use &TheUse = UI.getUse();
1022     Instruction *User = cast<Instruction>(*UI);
1023     // Preincrement use iterator so we don't invalidate it.
1024     ++UI;
1025
1026     // Don't bother for PHI nodes.
1027     if (isa<PHINode>(User))
1028       continue;
1029
1030     if (!isExtractBitsCandidateUse(User))
1031       continue;
1032
1033     BasicBlock *UserBB = User->getParent();
1034
1035     if (UserBB == DefBB) {
1036       // If the shift and truncate instruction are in the same BB. The use of
1037       // the truncate(TruncUse) may still introduce another truncate if not
1038       // legal. In this case, we would like to sink both shift and truncate
1039       // instruction to the BB of TruncUse.
1040       // for example:
1041       // BB1:
1042       // i64 shift.result = lshr i64 opnd, imm
1043       // trunc.result = trunc shift.result to i16
1044       //
1045       // BB2:
1046       //   ----> We will have an implicit truncate here if the architecture does
1047       //   not have i16 compare.
1048       // cmp i16 trunc.result, opnd2
1049       //
1050       if (isa<TruncInst>(User) && shiftIsLegal
1051           // If the type of the truncate is legal, no trucate will be
1052           // introduced in other basic blocks.
1053           && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
1054         MadeChange =
1055             SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
1056
1057       continue;
1058     }
1059     // If we have already inserted a shift into this block, use it.
1060     BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1061
1062     if (!InsertedShift) {
1063       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1064
1065       if (ShiftI->getOpcode() == Instruction::AShr)
1066         InsertedShift =
1067             BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
1068       else
1069         InsertedShift =
1070             BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
1071
1072       MadeChange = true;
1073     }
1074
1075     // Replace a use of the shift with a use of the new shift.
1076     TheUse = InsertedShift;
1077   }
1078
1079   // If we removed all uses, nuke the shift.
1080   if (ShiftI->use_empty())
1081     ShiftI->eraseFromParent();
1082
1083   return MadeChange;
1084 }
1085
1086 //  ScalarizeMaskedLoad() translates masked load intrinsic, like
1087 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
1088 //                               <16 x i1> %mask, <16 x i32> %passthru)
1089 // to a chain of basic blocks, whith loading element one-by-one if
1090 // the appropriate mask bit is set
1091 //
1092 //  %1 = bitcast i8* %addr to i32*
1093 //  %2 = extractelement <16 x i1> %mask, i32 0
1094 //  %3 = icmp eq i1 %2, true
1095 //  br i1 %3, label %cond.load, label %else
1096 //
1097 //cond.load:                                        ; preds = %0
1098 //  %4 = getelementptr i32* %1, i32 0
1099 //  %5 = load i32* %4
1100 //  %6 = insertelement <16 x i32> undef, i32 %5, i32 0
1101 //  br label %else
1102 //
1103 //else:                                             ; preds = %0, %cond.load
1104 //  %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
1105 //  %7 = extractelement <16 x i1> %mask, i32 1
1106 //  %8 = icmp eq i1 %7, true
1107 //  br i1 %8, label %cond.load1, label %else2
1108 //
1109 //cond.load1:                                       ; preds = %else
1110 //  %9 = getelementptr i32* %1, i32 1
1111 //  %10 = load i32* %9
1112 //  %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
1113 //  br label %else2
1114 //
1115 //else2:                                            ; preds = %else, %cond.load1
1116 //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1117 //  %12 = extractelement <16 x i1> %mask, i32 2
1118 //  %13 = icmp eq i1 %12, true
1119 //  br i1 %13, label %cond.load4, label %else5
1120 //
1121 static void ScalarizeMaskedLoad(CallInst *CI) {
1122   Value *Ptr  = CI->getArgOperand(0);
1123   Value *Src0 = CI->getArgOperand(3);
1124   Value *Mask = CI->getArgOperand(2);
1125   VectorType *VecType = dyn_cast<VectorType>(CI->getType());
1126   Type *EltTy = VecType->getElementType();
1127
1128   assert(VecType && "Unexpected return type of masked load intrinsic");
1129
1130   IRBuilder<> Builder(CI->getContext());
1131   Instruction *InsertPt = CI;
1132   BasicBlock *IfBlock = CI->getParent();
1133   BasicBlock *CondBlock = nullptr;
1134   BasicBlock *PrevIfBlock = CI->getParent();
1135   Builder.SetInsertPoint(InsertPt);
1136
1137   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1138
1139   // Bitcast %addr fron i8* to EltTy*
1140   Type *NewPtrType =
1141     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1142   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1143   Value *UndefVal = UndefValue::get(VecType);
1144
1145   // The result vector
1146   Value *VResult = UndefVal;
1147
1148   PHINode *Phi = nullptr;
1149   Value *PrevPhi = UndefVal;
1150
1151   unsigned VectorWidth = VecType->getNumElements();
1152   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1153
1154     // Fill the "else" block, created in the previous iteration
1155     //
1156     //  %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
1157     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1158     //  %to_load = icmp eq i1 %mask_1, true
1159     //  br i1 %to_load, label %cond.load, label %else
1160     //
1161     if (Idx > 0) {
1162       Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
1163       Phi->addIncoming(VResult, CondBlock);
1164       Phi->addIncoming(PrevPhi, PrevIfBlock);
1165       PrevPhi = Phi;
1166       VResult = Phi;
1167     }
1168
1169     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1170     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1171                                     ConstantInt::get(Predicate->getType(), 1));
1172
1173     // Create "cond" block
1174     //
1175     //  %EltAddr = getelementptr i32* %1, i32 0
1176     //  %Elt = load i32* %EltAddr
1177     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
1178     //
1179     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
1180     Builder.SetInsertPoint(InsertPt);
1181
1182     Value *Gep =
1183         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1184     LoadInst* Load = Builder.CreateLoad(Gep, false);
1185     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
1186
1187     // Create "else" block, fill it in the next iteration
1188     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1189     Builder.SetInsertPoint(InsertPt);
1190     Instruction *OldBr = IfBlock->getTerminator();
1191     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1192     OldBr->eraseFromParent();
1193     PrevIfBlock = IfBlock;
1194     IfBlock = NewIfBlock;
1195   }
1196
1197   Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
1198   Phi->addIncoming(VResult, CondBlock);
1199   Phi->addIncoming(PrevPhi, PrevIfBlock);
1200   Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
1201   CI->replaceAllUsesWith(NewI);
1202   CI->eraseFromParent();
1203 }
1204
1205 //  ScalarizeMaskedStore() translates masked store intrinsic, like
1206 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
1207 //                               <16 x i1> %mask)
1208 // to a chain of basic blocks, that stores element one-by-one if
1209 // the appropriate mask bit is set
1210 //
1211 //   %1 = bitcast i8* %addr to i32*
1212 //   %2 = extractelement <16 x i1> %mask, i32 0
1213 //   %3 = icmp eq i1 %2, true
1214 //   br i1 %3, label %cond.store, label %else
1215 //
1216 // cond.store:                                       ; preds = %0
1217 //   %4 = extractelement <16 x i32> %val, i32 0
1218 //   %5 = getelementptr i32* %1, i32 0
1219 //   store i32 %4, i32* %5
1220 //   br label %else
1221 //
1222 // else:                                             ; preds = %0, %cond.store
1223 //   %6 = extractelement <16 x i1> %mask, i32 1
1224 //   %7 = icmp eq i1 %6, true
1225 //   br i1 %7, label %cond.store1, label %else2
1226 //
1227 // cond.store1:                                      ; preds = %else
1228 //   %8 = extractelement <16 x i32> %val, i32 1
1229 //   %9 = getelementptr i32* %1, i32 1
1230 //   store i32 %8, i32* %9
1231 //   br label %else2
1232 //   . . .
1233 static void ScalarizeMaskedStore(CallInst *CI) {
1234   Value *Ptr  = CI->getArgOperand(1);
1235   Value *Src = CI->getArgOperand(0);
1236   Value *Mask = CI->getArgOperand(3);
1237
1238   VectorType *VecType = dyn_cast<VectorType>(Src->getType());
1239   Type *EltTy = VecType->getElementType();
1240
1241   assert(VecType && "Unexpected data type in masked store intrinsic");
1242
1243   IRBuilder<> Builder(CI->getContext());
1244   Instruction *InsertPt = CI;
1245   BasicBlock *IfBlock = CI->getParent();
1246   Builder.SetInsertPoint(InsertPt);
1247   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1248
1249   // Bitcast %addr fron i8* to EltTy*
1250   Type *NewPtrType =
1251     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
1252   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
1253
1254   unsigned VectorWidth = VecType->getNumElements();
1255   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
1256
1257     // Fill the "else" block, created in the previous iteration
1258     //
1259     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
1260     //  %to_store = icmp eq i1 %mask_1, true
1261     //  br i1 %to_load, label %cond.store, label %else
1262     //
1263     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
1264     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
1265                                     ConstantInt::get(Predicate->getType(), 1));
1266
1267     // Create "cond" block
1268     //
1269     //  %OneElt = extractelement <16 x i32> %Src, i32 Idx
1270     //  %EltAddr = getelementptr i32* %1, i32 0
1271     //  %store i32 %OneElt, i32* %EltAddr
1272     //
1273     BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
1274     Builder.SetInsertPoint(InsertPt);
1275
1276     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
1277     Value *Gep =
1278         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
1279     Builder.CreateStore(OneElt, Gep);
1280
1281     // Create "else" block, fill it in the next iteration
1282     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
1283     Builder.SetInsertPoint(InsertPt);
1284     Instruction *OldBr = IfBlock->getTerminator();
1285     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
1286     OldBr->eraseFromParent();
1287     IfBlock = NewIfBlock;
1288   }
1289   CI->eraseFromParent();
1290 }
1291
1292 bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
1293   BasicBlock *BB = CI->getParent();
1294
1295   // Lower inline assembly if we can.
1296   // If we found an inline asm expession, and if the target knows how to
1297   // lower it to normal LLVM code, do so now.
1298   if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1299     if (TLI->ExpandInlineAsm(CI)) {
1300       // Avoid invalidating the iterator.
1301       CurInstIterator = BB->begin();
1302       // Avoid processing instructions out of order, which could cause
1303       // reuse before a value is defined.
1304       SunkAddrs.clear();
1305       return true;
1306     }
1307     // Sink address computing for memory operands into the block.
1308     if (OptimizeInlineAsmInst(CI))
1309       return true;
1310   }
1311
1312   const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
1313
1314   // Align the pointer arguments to this call if the target thinks it's a good
1315   // idea
1316   unsigned MinSize, PrefAlign;
1317   if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1318     for (auto &Arg : CI->arg_operands()) {
1319       // We want to align both objects whose address is used directly and
1320       // objects whose address is used in casts and GEPs, though it only makes
1321       // sense for GEPs if the offset is a multiple of the desired alignment and
1322       // if size - offset meets the size threshold.
1323       if (!Arg->getType()->isPointerTy())
1324         continue;
1325       APInt Offset(TD->getPointerSizeInBits(
1326                      cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
1327       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
1328       uint64_t Offset2 = Offset.getLimitedValue();
1329       if ((Offset2 & (PrefAlign-1)) != 0)
1330         continue;
1331       AllocaInst *AI;
1332       if ((AI = dyn_cast<AllocaInst>(Val)) &&
1333           AI->getAlignment() < PrefAlign &&
1334           TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1335         AI->setAlignment(PrefAlign);
1336       // Global variables can only be aligned if they are defined in this
1337       // object (i.e. they are uniquely initialized in this object), and
1338       // over-aligning global variables that have an explicit section is
1339       // forbidden.
1340       GlobalVariable *GV;
1341       if ((GV = dyn_cast<GlobalVariable>(Val)) &&
1342           GV->hasUniqueInitializer() &&
1343           !GV->hasSection() &&
1344           GV->getAlignment() < PrefAlign &&
1345           TD->getTypeAllocSize(
1346             GV->getType()->getElementType()) >= MinSize + Offset2)
1347         GV->setAlignment(PrefAlign);
1348     }
1349     // If this is a memcpy (or similar) then we may be able to improve the
1350     // alignment
1351     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1352       unsigned Align = getKnownAlignment(MI->getDest(), *TD);
1353       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
1354         Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
1355       if (Align > MI->getAlignment())
1356         MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
1357     }
1358   }
1359
1360   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1361   if (II) {
1362     switch (II->getIntrinsicID()) {
1363     default: break;
1364     case Intrinsic::objectsize: {
1365       // Lower all uses of llvm.objectsize.*
1366       bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
1367       Type *ReturnTy = CI->getType();
1368       Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
1369
1370       // Substituting this can cause recursive simplifications, which can
1371       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
1372       // happens.
1373       WeakVH IterHandle(CurInstIterator);
1374
1375       replaceAndRecursivelySimplify(CI, RetVal,
1376                                     TLInfo, nullptr);
1377
1378       // If the iterator instruction was recursively deleted, start over at the
1379       // start of the block.
1380       if (IterHandle != CurInstIterator) {
1381         CurInstIterator = BB->begin();
1382         SunkAddrs.clear();
1383       }
1384       return true;
1385     }
1386     case Intrinsic::masked_load: {
1387       // Scalarize unsupported vector masked load
1388       if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
1389         ScalarizeMaskedLoad(CI);
1390         ModifiedDT = true;
1391         return true;
1392       }
1393       return false;
1394     }
1395     case Intrinsic::masked_store: {
1396       if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
1397         ScalarizeMaskedStore(CI);
1398         ModifiedDT = true;
1399         return true;
1400       }
1401       return false;
1402     }
1403     }
1404
1405     if (TLI) {
1406       SmallVector<Value*, 2> PtrOps;
1407       Type *AccessTy;
1408       if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
1409         while (!PtrOps.empty())
1410           if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
1411             return true;
1412     }
1413   }
1414
1415   // From here on out we're working with named functions.
1416   if (!CI->getCalledFunction()) return false;
1417
1418   // Lower all default uses of _chk calls.  This is very similar
1419   // to what InstCombineCalls does, but here we are only lowering calls
1420   // to fortified library functions (e.g. __memcpy_chk) that have the default
1421   // "don't know" as the objectsize.  Anything else should be left alone.
1422   FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1423   if (Value *V = Simplifier.optimizeCall(CI)) {
1424     CI->replaceAllUsesWith(V);
1425     CI->eraseFromParent();
1426     return true;
1427   }
1428   return false;
1429 }
1430
1431 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
1432 /// instructions to the predecessor to enable tail call optimizations. The
1433 /// case it is currently looking for is:
1434 /// @code
1435 /// bb0:
1436 ///   %tmp0 = tail call i32 @f0()
1437 ///   br label %return
1438 /// bb1:
1439 ///   %tmp1 = tail call i32 @f1()
1440 ///   br label %return
1441 /// bb2:
1442 ///   %tmp2 = tail call i32 @f2()
1443 ///   br label %return
1444 /// return:
1445 ///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1446 ///   ret i32 %retval
1447 /// @endcode
1448 ///
1449 /// =>
1450 ///
1451 /// @code
1452 /// bb0:
1453 ///   %tmp0 = tail call i32 @f0()
1454 ///   ret i32 %tmp0
1455 /// bb1:
1456 ///   %tmp1 = tail call i32 @f1()
1457 ///   ret i32 %tmp1
1458 /// bb2:
1459 ///   %tmp2 = tail call i32 @f2()
1460 ///   ret i32 %tmp2
1461 /// @endcode
1462 bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
1463   if (!TLI)
1464     return false;
1465
1466   ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
1467   if (!RI)
1468     return false;
1469
1470   PHINode *PN = nullptr;
1471   BitCastInst *BCI = nullptr;
1472   Value *V = RI->getReturnValue();
1473   if (V) {
1474     BCI = dyn_cast<BitCastInst>(V);
1475     if (BCI)
1476       V = BCI->getOperand(0);
1477
1478     PN = dyn_cast<PHINode>(V);
1479     if (!PN)
1480       return false;
1481   }
1482
1483   if (PN && PN->getParent() != BB)
1484     return false;
1485
1486   // It's not safe to eliminate the sign / zero extension of the return value.
1487   // See llvm::isInTailCallPosition().
1488   const Function *F = BB->getParent();
1489   AttributeSet CallerAttrs = F->getAttributes();
1490   if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
1491       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
1492     return false;
1493
1494   // Make sure there are no instructions between the PHI and return, or that the
1495   // return is the first instruction in the block.
1496   if (PN) {
1497     BasicBlock::iterator BI = BB->begin();
1498     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
1499     if (&*BI == BCI)
1500       // Also skip over the bitcast.
1501       ++BI;
1502     if (&*BI != RI)
1503       return false;
1504   } else {
1505     BasicBlock::iterator BI = BB->begin();
1506     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
1507     if (&*BI != RI)
1508       return false;
1509   }
1510
1511   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
1512   /// call.
1513   SmallVector<CallInst*, 4> TailCalls;
1514   if (PN) {
1515     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
1516       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
1517       // Make sure the phi value is indeed produced by the tail call.
1518       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
1519           TLI->mayBeEmittedAsTailCall(CI))
1520         TailCalls.push_back(CI);
1521     }
1522   } else {
1523     SmallPtrSet<BasicBlock*, 4> VisitedBBs;
1524     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
1525       if (!VisitedBBs.insert(*PI).second)
1526         continue;
1527
1528       BasicBlock::InstListType &InstList = (*PI)->getInstList();
1529       BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
1530       BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
1531       do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
1532       if (RI == RE)
1533         continue;
1534
1535       CallInst *CI = dyn_cast<CallInst>(&*RI);
1536       if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
1537         TailCalls.push_back(CI);
1538     }
1539   }
1540
1541   bool Changed = false;
1542   for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
1543     CallInst *CI = TailCalls[i];
1544     CallSite CS(CI);
1545
1546     // Conservatively require the attributes of the call to match those of the
1547     // return. Ignore noalias because it doesn't affect the call sequence.
1548     AttributeSet CalleeAttrs = CS.getAttributes();
1549     if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
1550           removeAttribute(Attribute::NoAlias) !=
1551         AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
1552           removeAttribute(Attribute::NoAlias))
1553       continue;
1554
1555     // Make sure the call instruction is followed by an unconditional branch to
1556     // the return block.
1557     BasicBlock *CallBB = CI->getParent();
1558     BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
1559     if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
1560       continue;
1561
1562     // Duplicate the return into CallBB.
1563     (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
1564     ModifiedDT = Changed = true;
1565     ++NumRetsDup;
1566   }
1567
1568   // If we eliminated all predecessors of the block, delete the block now.
1569   if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
1570     BB->eraseFromParent();
1571
1572   return Changed;
1573 }
1574
1575 //===----------------------------------------------------------------------===//
1576 // Memory Optimization
1577 //===----------------------------------------------------------------------===//
1578
1579 namespace {
1580
1581 /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
1582 /// which holds actual Value*'s for register values.
1583 struct ExtAddrMode : public TargetLowering::AddrMode {
1584   Value *BaseReg;
1585   Value *ScaledReg;
1586   ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
1587   void print(raw_ostream &OS) const;
1588   void dump() const;
1589
1590   bool operator==(const ExtAddrMode& O) const {
1591     return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
1592            (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
1593            (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
1594   }
1595 };
1596
1597 #ifndef NDEBUG
1598 static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
1599   AM.print(OS);
1600   return OS;
1601 }
1602 #endif
1603
1604 void ExtAddrMode::print(raw_ostream &OS) const {
1605   bool NeedPlus = false;
1606   OS << "[";
1607   if (BaseGV) {
1608     OS << (NeedPlus ? " + " : "")
1609        << "GV:";
1610     BaseGV->printAsOperand(OS, /*PrintType=*/false);
1611     NeedPlus = true;
1612   }
1613
1614   if (BaseOffs) {
1615     OS << (NeedPlus ? " + " : "")
1616        << BaseOffs;
1617     NeedPlus = true;
1618   }
1619
1620   if (BaseReg) {
1621     OS << (NeedPlus ? " + " : "")
1622        << "Base:";
1623     BaseReg->printAsOperand(OS, /*PrintType=*/false);
1624     NeedPlus = true;
1625   }
1626   if (Scale) {
1627     OS << (NeedPlus ? " + " : "")
1628        << Scale << "*";
1629     ScaledReg->printAsOperand(OS, /*PrintType=*/false);
1630   }
1631
1632   OS << ']';
1633 }
1634
1635 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1636 void ExtAddrMode::dump() const {
1637   print(dbgs());
1638   dbgs() << '\n';
1639 }
1640 #endif
1641
1642 /// \brief This class provides transaction based operation on the IR.
1643 /// Every change made through this class is recorded in the internal state and
1644 /// can be undone (rollback) until commit is called.
1645 class TypePromotionTransaction {
1646
1647   /// \brief This represents the common interface of the individual transaction.
1648   /// Each class implements the logic for doing one specific modification on
1649   /// the IR via the TypePromotionTransaction.
1650   class TypePromotionAction {
1651   protected:
1652     /// The Instruction modified.
1653     Instruction *Inst;
1654
1655   public:
1656     /// \brief Constructor of the action.
1657     /// The constructor performs the related action on the IR.
1658     TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
1659
1660     virtual ~TypePromotionAction() {}
1661
1662     /// \brief Undo the modification done by this action.
1663     /// When this method is called, the IR must be in the same state as it was
1664     /// before this action was applied.
1665     /// \pre Undoing the action works if and only if the IR is in the exact same
1666     /// state as it was directly after this action was applied.
1667     virtual void undo() = 0;
1668
1669     /// \brief Advocate every change made by this action.
1670     /// When the results on the IR of the action are to be kept, it is important
1671     /// to call this function, otherwise hidden information may be kept forever.
1672     virtual void commit() {
1673       // Nothing to be done, this action is not doing anything.
1674     }
1675   };
1676
1677   /// \brief Utility to remember the position of an instruction.
1678   class InsertionHandler {
1679     /// Position of an instruction.
1680     /// Either an instruction:
1681     /// - Is the first in a basic block: BB is used.
1682     /// - Has a previous instructon: PrevInst is used.
1683     union {
1684       Instruction *PrevInst;
1685       BasicBlock *BB;
1686     } Point;
1687     /// Remember whether or not the instruction had a previous instruction.
1688     bool HasPrevInstruction;
1689
1690   public:
1691     /// \brief Record the position of \p Inst.
1692     InsertionHandler(Instruction *Inst) {
1693       BasicBlock::iterator It = Inst;
1694       HasPrevInstruction = (It != (Inst->getParent()->begin()));
1695       if (HasPrevInstruction)
1696         Point.PrevInst = --It;
1697       else
1698         Point.BB = Inst->getParent();
1699     }
1700
1701     /// \brief Insert \p Inst at the recorded position.
1702     void insert(Instruction *Inst) {
1703       if (HasPrevInstruction) {
1704         if (Inst->getParent())
1705           Inst->removeFromParent();
1706         Inst->insertAfter(Point.PrevInst);
1707       } else {
1708         Instruction *Position = Point.BB->getFirstInsertionPt();
1709         if (Inst->getParent())
1710           Inst->moveBefore(Position);
1711         else
1712           Inst->insertBefore(Position);
1713       }
1714     }
1715   };
1716
1717   /// \brief Move an instruction before another.
1718   class InstructionMoveBefore : public TypePromotionAction {
1719     /// Original position of the instruction.
1720     InsertionHandler Position;
1721
1722   public:
1723     /// \brief Move \p Inst before \p Before.
1724     InstructionMoveBefore(Instruction *Inst, Instruction *Before)
1725         : TypePromotionAction(Inst), Position(Inst) {
1726       DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
1727       Inst->moveBefore(Before);
1728     }
1729
1730     /// \brief Move the instruction back to its original position.
1731     void undo() override {
1732       DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
1733       Position.insert(Inst);
1734     }
1735   };
1736
1737   /// \brief Set the operand of an instruction with a new value.
1738   class OperandSetter : public TypePromotionAction {
1739     /// Original operand of the instruction.
1740     Value *Origin;
1741     /// Index of the modified instruction.
1742     unsigned Idx;
1743
1744   public:
1745     /// \brief Set \p Idx operand of \p Inst with \p NewVal.
1746     OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
1747         : TypePromotionAction(Inst), Idx(Idx) {
1748       DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
1749                    << "for:" << *Inst << "\n"
1750                    << "with:" << *NewVal << "\n");
1751       Origin = Inst->getOperand(Idx);
1752       Inst->setOperand(Idx, NewVal);
1753     }
1754
1755     /// \brief Restore the original value of the instruction.
1756     void undo() override {
1757       DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
1758                    << "for: " << *Inst << "\n"
1759                    << "with: " << *Origin << "\n");
1760       Inst->setOperand(Idx, Origin);
1761     }
1762   };
1763
1764   /// \brief Hide the operands of an instruction.
1765   /// Do as if this instruction was not using any of its operands.
1766   class OperandsHider : public TypePromotionAction {
1767     /// The list of original operands.
1768     SmallVector<Value *, 4> OriginalValues;
1769
1770   public:
1771     /// \brief Remove \p Inst from the uses of the operands of \p Inst.
1772     OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
1773       DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
1774       unsigned NumOpnds = Inst->getNumOperands();
1775       OriginalValues.reserve(NumOpnds);
1776       for (unsigned It = 0; It < NumOpnds; ++It) {
1777         // Save the current operand.
1778         Value *Val = Inst->getOperand(It);
1779         OriginalValues.push_back(Val);
1780         // Set a dummy one.
1781         // We could use OperandSetter here, but that would implied an overhead
1782         // that we are not willing to pay.
1783         Inst->setOperand(It, UndefValue::get(Val->getType()));
1784       }
1785     }
1786
1787     /// \brief Restore the original list of uses.
1788     void undo() override {
1789       DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
1790       for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
1791         Inst->setOperand(It, OriginalValues[It]);
1792     }
1793   };
1794
1795   /// \brief Build a truncate instruction.
1796   class TruncBuilder : public TypePromotionAction {
1797     Value *Val;
1798   public:
1799     /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
1800     /// result.
1801     /// trunc Opnd to Ty.
1802     TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
1803       IRBuilder<> Builder(Opnd);
1804       Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
1805       DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
1806     }
1807
1808     /// \brief Get the built value.
1809     Value *getBuiltValue() { return Val; }
1810
1811     /// \brief Remove the built instruction.
1812     void undo() override {
1813       DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
1814       if (Instruction *IVal = dyn_cast<Instruction>(Val))
1815         IVal->eraseFromParent();
1816     }
1817   };
1818
1819   /// \brief Build a sign extension instruction.
1820   class SExtBuilder : public TypePromotionAction {
1821     Value *Val;
1822   public:
1823     /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
1824     /// result.
1825     /// sext Opnd to Ty.
1826     SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
1827         : TypePromotionAction(InsertPt) {
1828       IRBuilder<> Builder(InsertPt);
1829       Val = Builder.CreateSExt(Opnd, Ty, "promoted");
1830       DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
1831     }
1832
1833     /// \brief Get the built value.
1834     Value *getBuiltValue() { return Val; }
1835
1836     /// \brief Remove the built instruction.
1837     void undo() override {
1838       DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
1839       if (Instruction *IVal = dyn_cast<Instruction>(Val))
1840         IVal->eraseFromParent();
1841     }
1842   };
1843
1844   /// \brief Build a zero extension instruction.
1845   class ZExtBuilder : public TypePromotionAction {
1846     Value *Val;
1847   public:
1848     /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
1849     /// result.
1850     /// zext Opnd to Ty.
1851     ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
1852         : TypePromotionAction(InsertPt) {
1853       IRBuilder<> Builder(InsertPt);
1854       Val = Builder.CreateZExt(Opnd, Ty, "promoted");
1855       DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
1856     }
1857
1858     /// \brief Get the built value.
1859     Value *getBuiltValue() { return Val; }
1860
1861     /// \brief Remove the built instruction.
1862     void undo() override {
1863       DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
1864       if (Instruction *IVal = dyn_cast<Instruction>(Val))
1865         IVal->eraseFromParent();
1866     }
1867   };
1868
1869   /// \brief Mutate an instruction to another type.
1870   class TypeMutator : public TypePromotionAction {
1871     /// Record the original type.
1872     Type *OrigTy;
1873
1874   public:
1875     /// \brief Mutate the type of \p Inst into \p NewTy.
1876     TypeMutator(Instruction *Inst, Type *NewTy)
1877         : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
1878       DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
1879                    << "\n");
1880       Inst->mutateType(NewTy);
1881     }
1882
1883     /// \brief Mutate the instruction back to its original type.
1884     void undo() override {
1885       DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
1886                    << "\n");
1887       Inst->mutateType(OrigTy);
1888     }
1889   };
1890
1891   /// \brief Replace the uses of an instruction by another instruction.
1892   class UsesReplacer : public TypePromotionAction {
1893     /// Helper structure to keep track of the replaced uses.
1894     struct InstructionAndIdx {
1895       /// The instruction using the instruction.
1896       Instruction *Inst;
1897       /// The index where this instruction is used for Inst.
1898       unsigned Idx;
1899       InstructionAndIdx(Instruction *Inst, unsigned Idx)
1900           : Inst(Inst), Idx(Idx) {}
1901     };
1902
1903     /// Keep track of the original uses (pair Instruction, Index).
1904     SmallVector<InstructionAndIdx, 4> OriginalUses;
1905     typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
1906
1907   public:
1908     /// \brief Replace all the use of \p Inst by \p New.
1909     UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
1910       DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
1911                    << "\n");
1912       // Record the original uses.
1913       for (Use &U : Inst->uses()) {
1914         Instruction *UserI = cast<Instruction>(U.getUser());
1915         OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
1916       }
1917       // Now, we can replace the uses.
1918       Inst->replaceAllUsesWith(New);
1919     }
1920
1921     /// \brief Reassign the original uses of Inst to Inst.
1922     void undo() override {
1923       DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
1924       for (use_iterator UseIt = OriginalUses.begin(),
1925                         EndIt = OriginalUses.end();
1926            UseIt != EndIt; ++UseIt) {
1927         UseIt->Inst->setOperand(UseIt->Idx, Inst);
1928       }
1929     }
1930   };
1931
1932   /// \brief Remove an instruction from the IR.
1933   class InstructionRemover : public TypePromotionAction {
1934     /// Original position of the instruction.
1935     InsertionHandler Inserter;
1936     /// Helper structure to hide all the link to the instruction. In other
1937     /// words, this helps to do as if the instruction was removed.
1938     OperandsHider Hider;
1939     /// Keep track of the uses replaced, if any.
1940     UsesReplacer *Replacer;
1941
1942   public:
1943     /// \brief Remove all reference of \p Inst and optinally replace all its
1944     /// uses with New.
1945     /// \pre If !Inst->use_empty(), then New != nullptr
1946     InstructionRemover(Instruction *Inst, Value *New = nullptr)
1947         : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
1948           Replacer(nullptr) {
1949       if (New)
1950         Replacer = new UsesReplacer(Inst, New);
1951       DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
1952       Inst->removeFromParent();
1953     }
1954
1955     ~InstructionRemover() override { delete Replacer; }
1956
1957     /// \brief Really remove the instruction.
1958     void commit() override { delete Inst; }
1959
1960     /// \brief Resurrect the instruction and reassign it to the proper uses if
1961     /// new value was provided when build this action.
1962     void undo() override {
1963       DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
1964       Inserter.insert(Inst);
1965       if (Replacer)
1966         Replacer->undo();
1967       Hider.undo();
1968     }
1969   };
1970
1971 public:
1972   /// Restoration point.
1973   /// The restoration point is a pointer to an action instead of an iterator
1974   /// because the iterator may be invalidated but not the pointer.
1975   typedef const TypePromotionAction *ConstRestorationPt;
1976   /// Advocate every changes made in that transaction.
1977   void commit();
1978   /// Undo all the changes made after the given point.
1979   void rollback(ConstRestorationPt Point);
1980   /// Get the current restoration point.
1981   ConstRestorationPt getRestorationPoint() const;
1982
1983   /// \name API for IR modification with state keeping to support rollback.
1984   /// @{
1985   /// Same as Instruction::setOperand.
1986   void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
1987   /// Same as Instruction::eraseFromParent.
1988   void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
1989   /// Same as Value::replaceAllUsesWith.
1990   void replaceAllUsesWith(Instruction *Inst, Value *New);
1991   /// Same as Value::mutateType.
1992   void mutateType(Instruction *Inst, Type *NewTy);
1993   /// Same as IRBuilder::createTrunc.
1994   Value *createTrunc(Instruction *Opnd, Type *Ty);
1995   /// Same as IRBuilder::createSExt.
1996   Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
1997   /// Same as IRBuilder::createZExt.
1998   Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
1999   /// Same as Instruction::moveBefore.
2000   void moveBefore(Instruction *Inst, Instruction *Before);
2001   /// @}
2002
2003 private:
2004   /// The ordered list of actions made so far.
2005   SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2006   typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
2007 };
2008
2009 void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2010                                           Value *NewVal) {
2011   Actions.push_back(
2012       make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
2013 }
2014
2015 void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2016                                                 Value *NewVal) {
2017   Actions.push_back(
2018       make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
2019 }
2020
2021 void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2022                                                   Value *New) {
2023   Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2024 }
2025
2026 void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2027   Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2028 }
2029
2030 Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2031                                              Type *Ty) {
2032   std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2033   Value *Val = Ptr->getBuiltValue();
2034   Actions.push_back(std::move(Ptr));
2035   return Val;
2036 }
2037
2038 Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2039                                             Value *Opnd, Type *Ty) {
2040   std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2041   Value *Val = Ptr->getBuiltValue();
2042   Actions.push_back(std::move(Ptr));
2043   return Val;
2044 }
2045
2046 Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2047                                             Value *Opnd, Type *Ty) {
2048   std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2049   Value *Val = Ptr->getBuiltValue();
2050   Actions.push_back(std::move(Ptr));
2051   return Val;
2052 }
2053
2054 void TypePromotionTransaction::moveBefore(Instruction *Inst,
2055                                           Instruction *Before) {
2056   Actions.push_back(
2057       make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
2058 }
2059
2060 TypePromotionTransaction::ConstRestorationPt
2061 TypePromotionTransaction::getRestorationPoint() const {
2062   return !Actions.empty() ? Actions.back().get() : nullptr;
2063 }
2064
2065 void TypePromotionTransaction::commit() {
2066   for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2067        ++It)
2068     (*It)->commit();
2069   Actions.clear();
2070 }
2071
2072 void TypePromotionTransaction::rollback(
2073     TypePromotionTransaction::ConstRestorationPt Point) {
2074   while (!Actions.empty() && Point != Actions.back().get()) {
2075     std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2076     Curr->undo();
2077   }
2078 }
2079
2080 /// \brief A helper class for matching addressing modes.
2081 ///
2082 /// This encapsulates the logic for matching the target-legal addressing modes.
2083 class AddressingModeMatcher {
2084   SmallVectorImpl<Instruction*> &AddrModeInsts;
2085   const TargetMachine &TM;
2086   const TargetLowering &TLI;
2087
2088   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2089   /// the memory instruction that we're computing this address for.
2090   Type *AccessTy;
2091   Instruction *MemoryInst;
2092
2093   /// AddrMode - This is the addressing mode that we're building up.  This is
2094   /// part of the return value of this addressing mode matching stuff.
2095   ExtAddrMode &AddrMode;
2096
2097   /// The truncate instruction inserted by other CodeGenPrepare optimizations.
2098   const SetOfInstrs &InsertedTruncs;
2099   /// A map from the instructions to their type before promotion.
2100   InstrToOrigTy &PromotedInsts;
2101   /// The ongoing transaction where every action should be registered.
2102   TypePromotionTransaction &TPT;
2103
2104   /// IgnoreProfitability - This is set to true when we should not do
2105   /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
2106   /// always returns true.
2107   bool IgnoreProfitability;
2108
2109   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
2110                         const TargetMachine &TM, Type *AT, Instruction *MI,
2111                         ExtAddrMode &AM, const SetOfInstrs &InsertedTruncs,
2112                         InstrToOrigTy &PromotedInsts,
2113                         TypePromotionTransaction &TPT)
2114       : AddrModeInsts(AMI), TM(TM),
2115         TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
2116                  ->getTargetLowering()),
2117         AccessTy(AT), MemoryInst(MI), AddrMode(AM),
2118         InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
2119     IgnoreProfitability = false;
2120   }
2121 public:
2122
2123   /// Match - Find the maximal addressing mode that a load/store of V can fold,
2124   /// give an access type of AccessTy.  This returns a list of involved
2125   /// instructions in AddrModeInsts.
2126   /// \p InsertedTruncs The truncate instruction inserted by other
2127   /// CodeGenPrepare
2128   /// optimizations.
2129   /// \p PromotedInsts maps the instructions to their type before promotion.
2130   /// \p The ongoing transaction where every action should be registered.
2131   static ExtAddrMode Match(Value *V, Type *AccessTy,
2132                            Instruction *MemoryInst,
2133                            SmallVectorImpl<Instruction*> &AddrModeInsts,
2134                            const TargetMachine &TM,
2135                            const SetOfInstrs &InsertedTruncs,
2136                            InstrToOrigTy &PromotedInsts,
2137                            TypePromotionTransaction &TPT) {
2138     ExtAddrMode Result;
2139
2140     bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy,
2141                                          MemoryInst, Result, InsertedTruncs,
2142                                          PromotedInsts, TPT).MatchAddr(V, 0);
2143     (void)Success; assert(Success && "Couldn't select *anything*?");
2144     return Result;
2145   }
2146 private:
2147   bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2148   bool MatchAddr(Value *V, unsigned Depth);
2149   bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
2150                           bool *MovedAway = nullptr);
2151   bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
2152                                             ExtAddrMode &AMBefore,
2153                                             ExtAddrMode &AMAfter);
2154   bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2155   bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
2156                              Value *PromotedOperand) const;
2157 };
2158
2159 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
2160 /// Return true and update AddrMode if this addr mode is legal for the target,
2161 /// false if not.
2162 bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
2163                                              unsigned Depth) {
2164   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
2165   // mode.  Just process that directly.
2166   if (Scale == 1)
2167     return MatchAddr(ScaleReg, Depth);
2168
2169   // If the scale is 0, it takes nothing to add this.
2170   if (Scale == 0)
2171     return true;
2172
2173   // If we already have a scale of this value, we can add to it, otherwise, we
2174   // need an available scale field.
2175   if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
2176     return false;
2177
2178   ExtAddrMode TestAddrMode = AddrMode;
2179
2180   // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
2181   // [A+B + A*7] -> [B+A*8].
2182   TestAddrMode.Scale += Scale;
2183   TestAddrMode.ScaledReg = ScaleReg;
2184
2185   // If the new address isn't legal, bail out.
2186   if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
2187     return false;
2188
2189   // It was legal, so commit it.
2190   AddrMode = TestAddrMode;
2191
2192   // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
2193   // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
2194   // X*Scale + C*Scale to addr mode.
2195   ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
2196   if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
2197       match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
2198     TestAddrMode.ScaledReg = AddLHS;
2199     TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
2200
2201     // If this addressing mode is legal, commit it and remember that we folded
2202     // this instruction.
2203     if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
2204       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
2205       AddrMode = TestAddrMode;
2206       return true;
2207     }
2208   }
2209
2210   // Otherwise, not (x+c)*scale, just return what we have.
2211   return true;
2212 }
2213
2214 /// MightBeFoldableInst - This is a little filter, which returns true if an
2215 /// addressing computation involving I might be folded into a load/store
2216 /// accessing it.  This doesn't need to be perfect, but needs to accept at least
2217 /// the set of instructions that MatchOperationAddr can.
2218 static bool MightBeFoldableInst(Instruction *I) {
2219   switch (I->getOpcode()) {
2220   case Instruction::BitCast:
2221   case Instruction::AddrSpaceCast:
2222     // Don't touch identity bitcasts.
2223     if (I->getType() == I->getOperand(0)->getType())
2224       return false;
2225     return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
2226   case Instruction::PtrToInt:
2227     // PtrToInt is always a noop, as we know that the int type is pointer sized.
2228     return true;
2229   case Instruction::IntToPtr:
2230     // We know the input is intptr_t, so this is foldable.
2231     return true;
2232   case Instruction::Add:
2233     return true;
2234   case Instruction::Mul:
2235   case Instruction::Shl:
2236     // Can only handle X*C and X << C.
2237     return isa<ConstantInt>(I->getOperand(1));
2238   case Instruction::GetElementPtr:
2239     return true;
2240   default:
2241     return false;
2242   }
2243 }
2244
2245 /// \brief Check whether or not \p Val is a legal instruction for \p TLI.
2246 /// \note \p Val is assumed to be the product of some type promotion.
2247 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
2248 /// to be legal, as the non-promoted value would have had the same state.
2249 static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
2250   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
2251   if (!PromotedInst)
2252     return false;
2253   int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
2254   // If the ISDOpcode is undefined, it was undefined before the promotion.
2255   if (!ISDOpcode)
2256     return true;
2257   // Otherwise, check if the promoted instruction is legal or not.
2258   return TLI.isOperationLegalOrCustom(
2259       ISDOpcode, TLI.getValueType(PromotedInst->getType()));
2260 }
2261
2262 /// \brief Hepler class to perform type promotion.
2263 class TypePromotionHelper {
2264   /// \brief Utility function to check whether or not a sign or zero extension
2265   /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
2266   /// either using the operands of \p Inst or promoting \p Inst.
2267   /// The type of the extension is defined by \p IsSExt.
2268   /// In other words, check if:
2269   /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
2270   /// #1 Promotion applies:
2271   /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
2272   /// #2 Operand reuses:
2273   /// ext opnd1 to ConsideredExtType.
2274   /// \p PromotedInsts maps the instructions to their type before promotion.
2275   static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
2276                             const InstrToOrigTy &PromotedInsts, bool IsSExt);
2277
2278   /// \brief Utility function to determine if \p OpIdx should be promoted when
2279   /// promoting \p Inst.
2280   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
2281     if (isa<SelectInst>(Inst) && OpIdx == 0)
2282       return false;
2283     return true;
2284   }
2285
2286   /// \brief Utility function to promote the operand of \p Ext when this
2287   /// operand is a promotable trunc or sext or zext.
2288   /// \p PromotedInsts maps the instructions to their type before promotion.
2289   /// \p CreatedInstsCost[out] contains the cost of all instructions
2290   /// created to promote the operand of Ext.
2291   /// Newly added extensions are inserted in \p Exts.
2292   /// Newly added truncates are inserted in \p Truncs.
2293   /// Should never be called directly.
2294   /// \return The promoted value which is used instead of Ext.
2295   static Value *promoteOperandForTruncAndAnyExt(
2296       Instruction *Ext, TypePromotionTransaction &TPT,
2297       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2298       SmallVectorImpl<Instruction *> *Exts,
2299       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
2300
2301   /// \brief Utility function to promote the operand of \p Ext when this
2302   /// operand is promotable and is not a supported trunc or sext.
2303   /// \p PromotedInsts maps the instructions to their type before promotion.
2304   /// \p CreatedInstsCost[out] contains the cost of all the instructions
2305   /// created to promote the operand of Ext.
2306   /// Newly added extensions are inserted in \p Exts.
2307   /// Newly added truncates are inserted in \p Truncs.
2308   /// Should never be called directly.
2309   /// \return The promoted value which is used instead of Ext.
2310   static Value *promoteOperandForOther(Instruction *Ext,
2311                                        TypePromotionTransaction &TPT,
2312                                        InstrToOrigTy &PromotedInsts,
2313                                        unsigned &CreatedInstsCost,
2314                                        SmallVectorImpl<Instruction *> *Exts,
2315                                        SmallVectorImpl<Instruction *> *Truncs,
2316                                        const TargetLowering &TLI, bool IsSExt);
2317
2318   /// \see promoteOperandForOther.
2319   static Value *signExtendOperandForOther(
2320       Instruction *Ext, TypePromotionTransaction &TPT,
2321       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2322       SmallVectorImpl<Instruction *> *Exts,
2323       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2324     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2325                                   Exts, Truncs, TLI, true);
2326   }
2327
2328   /// \see promoteOperandForOther.
2329   static Value *zeroExtendOperandForOther(
2330       Instruction *Ext, TypePromotionTransaction &TPT,
2331       InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2332       SmallVectorImpl<Instruction *> *Exts,
2333       SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2334     return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
2335                                   Exts, Truncs, TLI, false);
2336   }
2337
2338 public:
2339   /// Type for the utility function that promotes the operand of Ext.
2340   typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
2341                            InstrToOrigTy &PromotedInsts,
2342                            unsigned &CreatedInstsCost,
2343                            SmallVectorImpl<Instruction *> *Exts,
2344                            SmallVectorImpl<Instruction *> *Truncs,
2345                            const TargetLowering &TLI);
2346   /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
2347   /// action to promote the operand of \p Ext instead of using Ext.
2348   /// \return NULL if no promotable action is possible with the current
2349   /// sign extension.
2350   /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
2351   /// the others CodeGenPrepare optimizations. This information is important
2352   /// because we do not want to promote these instructions as CodeGenPrepare
2353   /// will reinsert them later. Thus creating an infinite loop: create/remove.
2354   /// \p PromotedInsts maps the instructions to their type before promotion.
2355   static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs,
2356                           const TargetLowering &TLI,
2357                           const InstrToOrigTy &PromotedInsts);
2358 };
2359
2360 bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
2361                                         Type *ConsideredExtType,
2362                                         const InstrToOrigTy &PromotedInsts,
2363                                         bool IsSExt) {
2364   // The promotion helper does not know how to deal with vector types yet.
2365   // To be able to fix that, we would need to fix the places where we
2366   // statically extend, e.g., constants and such.
2367   if (Inst->getType()->isVectorTy())
2368     return false;
2369
2370   // We can always get through zext.
2371   if (isa<ZExtInst>(Inst))
2372     return true;
2373
2374   // sext(sext) is ok too.
2375   if (IsSExt && isa<SExtInst>(Inst))
2376     return true;
2377
2378   // We can get through binary operator, if it is legal. In other words, the
2379   // binary operator must have a nuw or nsw flag.
2380   const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
2381   if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
2382       ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
2383        (IsSExt && BinOp->hasNoSignedWrap())))
2384     return true;
2385
2386   // Check if we can do the following simplification.
2387   // ext(trunc(opnd)) --> ext(opnd)
2388   if (!isa<TruncInst>(Inst))
2389     return false;
2390
2391   Value *OpndVal = Inst->getOperand(0);
2392   // Check if we can use this operand in the extension.
2393   // If the type is larger than the result type of the extension,
2394   // we cannot.
2395   if (!OpndVal->getType()->isIntegerTy() ||
2396       OpndVal->getType()->getIntegerBitWidth() >
2397           ConsideredExtType->getIntegerBitWidth())
2398     return false;
2399
2400   // If the operand of the truncate is not an instruction, we will not have
2401   // any information on the dropped bits.
2402   // (Actually we could for constant but it is not worth the extra logic).
2403   Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
2404   if (!Opnd)
2405     return false;
2406
2407   // Check if the source of the type is narrow enough.
2408   // I.e., check that trunc just drops extended bits of the same kind of
2409   // the extension.
2410   // #1 get the type of the operand and check the kind of the extended bits.
2411   const Type *OpndType;
2412   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
2413   if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
2414     OpndType = It->second.Ty;
2415   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
2416     OpndType = Opnd->getOperand(0)->getType();
2417   else
2418     return false;
2419
2420   // #2 check that the truncate just drop extended bits.
2421   if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
2422     return true;
2423
2424   return false;
2425 }
2426
2427 TypePromotionHelper::Action TypePromotionHelper::getAction(
2428     Instruction *Ext, const SetOfInstrs &InsertedTruncs,
2429     const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
2430   assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
2431          "Unexpected instruction type");
2432   Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
2433   Type *ExtTy = Ext->getType();
2434   bool IsSExt = isa<SExtInst>(Ext);
2435   // If the operand of the extension is not an instruction, we cannot
2436   // get through.
2437   // If it, check we can get through.
2438   if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
2439     return nullptr;
2440
2441   // Do not promote if the operand has been added by codegenprepare.
2442   // Otherwise, it means we are undoing an optimization that is likely to be
2443   // redone, thus causing potential infinite loop.
2444   if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd))
2445     return nullptr;
2446
2447   // SExt or Trunc instructions.
2448   // Return the related handler.
2449   if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
2450       isa<ZExtInst>(ExtOpnd))
2451     return promoteOperandForTruncAndAnyExt;
2452
2453   // Regular instruction.
2454   // Abort early if we will have to insert non-free instructions.
2455   if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
2456     return nullptr;
2457   return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
2458 }
2459
2460 Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
2461     llvm::Instruction *SExt, TypePromotionTransaction &TPT,
2462     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2463     SmallVectorImpl<Instruction *> *Exts,
2464     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
2465   // By construction, the operand of SExt is an instruction. Otherwise we cannot
2466   // get through it and this method should not be called.
2467   Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
2468   Value *ExtVal = SExt;
2469   bool HasMergedNonFreeExt = false;
2470   if (isa<ZExtInst>(SExtOpnd)) {
2471     // Replace s|zext(zext(opnd))
2472     // => zext(opnd).
2473     HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
2474     Value *ZExt =
2475         TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
2476     TPT.replaceAllUsesWith(SExt, ZExt);
2477     TPT.eraseInstruction(SExt);
2478     ExtVal = ZExt;
2479   } else {
2480     // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
2481     // => z|sext(opnd).
2482     TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
2483   }
2484   CreatedInstsCost = 0;
2485
2486   // Remove dead code.
2487   if (SExtOpnd->use_empty())
2488     TPT.eraseInstruction(SExtOpnd);
2489
2490   // Check if the extension is still needed.
2491   Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
2492   if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
2493     if (ExtInst) {
2494       if (Exts)
2495         Exts->push_back(ExtInst);
2496       CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
2497     }
2498     return ExtVal;
2499   }
2500
2501   // At this point we have: ext ty opnd to ty.
2502   // Reassign the uses of ExtInst to the opnd and remove ExtInst.
2503   Value *NextVal = ExtInst->getOperand(0);
2504   TPT.eraseInstruction(ExtInst, NextVal);
2505   return NextVal;
2506 }
2507
2508 Value *TypePromotionHelper::promoteOperandForOther(
2509     Instruction *Ext, TypePromotionTransaction &TPT,
2510     InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
2511     SmallVectorImpl<Instruction *> *Exts,
2512     SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
2513     bool IsSExt) {
2514   // By construction, the operand of Ext is an instruction. Otherwise we cannot
2515   // get through it and this method should not be called.
2516   Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
2517   CreatedInstsCost = 0;
2518   if (!ExtOpnd->hasOneUse()) {
2519     // ExtOpnd will be promoted.
2520     // All its uses, but Ext, will need to use a truncated value of the
2521     // promoted version.
2522     // Create the truncate now.
2523     Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
2524     if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
2525       ITrunc->removeFromParent();
2526       // Insert it just after the definition.
2527       ITrunc->insertAfter(ExtOpnd);
2528       if (Truncs)
2529         Truncs->push_back(ITrunc);
2530     }
2531
2532     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
2533     // Restore the operand of Ext (which has been replace by the previous call
2534     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
2535     TPT.setOperand(Ext, 0, ExtOpnd);
2536   }
2537
2538   // Get through the Instruction:
2539   // 1. Update its type.
2540   // 2. Replace the uses of Ext by Inst.
2541   // 3. Extend each operand that needs to be extended.
2542
2543   // Remember the original type of the instruction before promotion.
2544   // This is useful to know that the high bits are sign extended bits.
2545   PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
2546       ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
2547   // Step #1.
2548   TPT.mutateType(ExtOpnd, Ext->getType());
2549   // Step #2.
2550   TPT.replaceAllUsesWith(Ext, ExtOpnd);
2551   // Step #3.
2552   Instruction *ExtForOpnd = Ext;
2553
2554   DEBUG(dbgs() << "Propagate Ext to operands\n");
2555   for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
2556        ++OpIdx) {
2557     DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
2558     if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
2559         !shouldExtOperand(ExtOpnd, OpIdx)) {
2560       DEBUG(dbgs() << "No need to propagate\n");
2561       continue;
2562     }
2563     // Check if we can statically extend the operand.
2564     Value *Opnd = ExtOpnd->getOperand(OpIdx);
2565     if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
2566       DEBUG(dbgs() << "Statically extend\n");
2567       unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
2568       APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
2569                             : Cst->getValue().zext(BitWidth);
2570       TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
2571       continue;
2572     }
2573     // UndefValue are typed, so we have to statically sign extend them.
2574     if (isa<UndefValue>(Opnd)) {
2575       DEBUG(dbgs() << "Statically extend\n");
2576       TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
2577       continue;
2578     }
2579
2580     // Otherwise we have to explicity sign extend the operand.
2581     // Check if Ext was reused to extend an operand.
2582     if (!ExtForOpnd) {
2583       // If yes, create a new one.
2584       DEBUG(dbgs() << "More operands to ext\n");
2585       Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
2586         : TPT.createZExt(Ext, Opnd, Ext->getType());
2587       if (!isa<Instruction>(ValForExtOpnd)) {
2588         TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
2589         continue;
2590       }
2591       ExtForOpnd = cast<Instruction>(ValForExtOpnd);
2592     }
2593     if (Exts)
2594       Exts->push_back(ExtForOpnd);
2595     TPT.setOperand(ExtForOpnd, 0, Opnd);
2596
2597     // Move the sign extension before the insertion point.
2598     TPT.moveBefore(ExtForOpnd, ExtOpnd);
2599     TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
2600     CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
2601     // If more sext are required, new instructions will have to be created.
2602     ExtForOpnd = nullptr;
2603   }
2604   if (ExtForOpnd == Ext) {
2605     DEBUG(dbgs() << "Extension is useless now\n");
2606     TPT.eraseInstruction(Ext);
2607   }
2608   return ExtOpnd;
2609 }
2610
2611 /// IsPromotionProfitable - Check whether or not promoting an instruction
2612 /// to a wider type was profitable.
2613 /// \p NewCost gives the cost of extension instructions created by the
2614 /// promotion.
2615 /// \p OldCost gives the cost of extension instructions before the promotion
2616 /// plus the number of instructions that have been
2617 /// matched in the addressing mode the promotion.
2618 /// \p PromotedOperand is the value that has been promoted.
2619 /// \return True if the promotion is profitable, false otherwise.
2620 bool AddressingModeMatcher::IsPromotionProfitable(
2621     unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
2622   DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
2623   // The cost of the new extensions is greater than the cost of the
2624   // old extension plus what we folded.
2625   // This is not profitable.
2626   if (NewCost > OldCost)
2627     return false;
2628   if (NewCost < OldCost)
2629     return true;
2630   // The promotion is neutral but it may help folding the sign extension in
2631   // loads for instance.
2632   // Check that we did not create an illegal instruction.
2633   return isPromotedInstructionLegal(TLI, PromotedOperand);
2634 }
2635
2636 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
2637 /// fold the operation into the addressing mode.  If so, update the addressing
2638 /// mode and return true, otherwise return false without modifying AddrMode.
2639 /// If \p MovedAway is not NULL, it contains the information of whether or
2640 /// not AddrInst has to be folded into the addressing mode on success.
2641 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
2642 /// because it has been moved away.
2643 /// Thus AddrInst must not be added in the matched instructions.
2644 /// This state can happen when AddrInst is a sext, since it may be moved away.
2645 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
2646 /// not be referenced anymore.
2647 bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
2648                                                unsigned Depth,
2649                                                bool *MovedAway) {
2650   // Avoid exponential behavior on extremely deep expression trees.
2651   if (Depth >= 5) return false;
2652
2653   // By default, all matched instructions stay in place.
2654   if (MovedAway)
2655     *MovedAway = false;
2656
2657   switch (Opcode) {
2658   case Instruction::PtrToInt:
2659     // PtrToInt is always a noop, as we know that the int type is pointer sized.
2660     return MatchAddr(AddrInst->getOperand(0), Depth);
2661   case Instruction::IntToPtr:
2662     // This inttoptr is a no-op if the integer type is pointer sized.
2663     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
2664         TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
2665       return MatchAddr(AddrInst->getOperand(0), Depth);
2666     return false;
2667   case Instruction::BitCast:
2668   case Instruction::AddrSpaceCast:
2669     // BitCast is always a noop, and we can handle it as long as it is
2670     // int->int or pointer->pointer (we don't want int<->fp or something).
2671     if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
2672          AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
2673         // Don't touch identity bitcasts.  These were probably put here by LSR,
2674         // and we don't want to mess around with them.  Assume it knows what it
2675         // is doing.
2676         AddrInst->getOperand(0)->getType() != AddrInst->getType())
2677       return MatchAddr(AddrInst->getOperand(0), Depth);
2678     return false;
2679   case Instruction::Add: {
2680     // Check to see if we can merge in the RHS then the LHS.  If so, we win.
2681     ExtAddrMode BackupAddrMode = AddrMode;
2682     unsigned OldSize = AddrModeInsts.size();
2683     // Start a transaction at this point.
2684     // The LHS may match but not the RHS.
2685     // Therefore, we need a higher level restoration point to undo partially
2686     // matched operation.
2687     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2688         TPT.getRestorationPoint();
2689
2690     if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
2691         MatchAddr(AddrInst->getOperand(0), Depth+1))
2692       return true;
2693
2694     // Restore the old addr mode info.
2695     AddrMode = BackupAddrMode;
2696     AddrModeInsts.resize(OldSize);
2697     TPT.rollback(LastKnownGood);
2698
2699     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
2700     if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
2701         MatchAddr(AddrInst->getOperand(1), Depth+1))
2702       return true;
2703
2704     // Otherwise we definitely can't merge the ADD in.
2705     AddrMode = BackupAddrMode;
2706     AddrModeInsts.resize(OldSize);
2707     TPT.rollback(LastKnownGood);
2708     break;
2709   }
2710   //case Instruction::Or:
2711   // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
2712   //break;
2713   case Instruction::Mul:
2714   case Instruction::Shl: {
2715     // Can only handle X*C and X << C.
2716     ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
2717     if (!RHS)
2718       return false;
2719     int64_t Scale = RHS->getSExtValue();
2720     if (Opcode == Instruction::Shl)
2721       Scale = 1LL << Scale;
2722
2723     return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
2724   }
2725   case Instruction::GetElementPtr: {
2726     // Scan the GEP.  We check it if it contains constant offsets and at most
2727     // one variable offset.
2728     int VariableOperand = -1;
2729     unsigned VariableScale = 0;
2730
2731     int64_t ConstantOffset = 0;
2732     const DataLayout *TD = TLI.getDataLayout();
2733     gep_type_iterator GTI = gep_type_begin(AddrInst);
2734     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
2735       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
2736         const StructLayout *SL = TD->getStructLayout(STy);
2737         unsigned Idx =
2738           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
2739         ConstantOffset += SL->getElementOffset(Idx);
2740       } else {
2741         uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
2742         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
2743           ConstantOffset += CI->getSExtValue()*TypeSize;
2744         } else if (TypeSize) {  // Scales of zero don't do anything.
2745           // We only allow one variable index at the moment.
2746           if (VariableOperand != -1)
2747             return false;
2748
2749           // Remember the variable index.
2750           VariableOperand = i;
2751           VariableScale = TypeSize;
2752         }
2753       }
2754     }
2755
2756     // A common case is for the GEP to only do a constant offset.  In this case,
2757     // just add it to the disp field and check validity.
2758     if (VariableOperand == -1) {
2759       AddrMode.BaseOffs += ConstantOffset;
2760       if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
2761         // Check to see if we can fold the base pointer in too.
2762         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
2763           return true;
2764       }
2765       AddrMode.BaseOffs -= ConstantOffset;
2766       return false;
2767     }
2768
2769     // Save the valid addressing mode in case we can't match.
2770     ExtAddrMode BackupAddrMode = AddrMode;
2771     unsigned OldSize = AddrModeInsts.size();
2772
2773     // See if the scale and offset amount is valid for this target.
2774     AddrMode.BaseOffs += ConstantOffset;
2775
2776     // Match the base operand of the GEP.
2777     if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
2778       // If it couldn't be matched, just stuff the value in a register.
2779       if (AddrMode.HasBaseReg) {
2780         AddrMode = BackupAddrMode;
2781         AddrModeInsts.resize(OldSize);
2782         return false;
2783       }
2784       AddrMode.HasBaseReg = true;
2785       AddrMode.BaseReg = AddrInst->getOperand(0);
2786     }
2787
2788     // Match the remaining variable portion of the GEP.
2789     if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
2790                           Depth)) {
2791       // If it couldn't be matched, try stuffing the base into a register
2792       // instead of matching it, and retrying the match of the scale.
2793       AddrMode = BackupAddrMode;
2794       AddrModeInsts.resize(OldSize);
2795       if (AddrMode.HasBaseReg)
2796         return false;
2797       AddrMode.HasBaseReg = true;
2798       AddrMode.BaseReg = AddrInst->getOperand(0);
2799       AddrMode.BaseOffs += ConstantOffset;
2800       if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
2801                             VariableScale, Depth)) {
2802         // If even that didn't work, bail.
2803         AddrMode = BackupAddrMode;
2804         AddrModeInsts.resize(OldSize);
2805         return false;
2806       }
2807     }
2808
2809     return true;
2810   }
2811   case Instruction::SExt:
2812   case Instruction::ZExt: {
2813     Instruction *Ext = dyn_cast<Instruction>(AddrInst);
2814     if (!Ext)
2815       return false;
2816
2817     // Try to move this ext out of the way of the addressing mode.
2818     // Ask for a method for doing so.
2819     TypePromotionHelper::Action TPH =
2820         TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts);
2821     if (!TPH)
2822       return false;
2823
2824     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2825         TPT.getRestorationPoint();
2826     unsigned CreatedInstsCost = 0;
2827     unsigned ExtCost = !TLI.isExtFree(Ext);
2828     Value *PromotedOperand =
2829         TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
2830     // SExt has been moved away.
2831     // Thus either it will be rematched later in the recursive calls or it is
2832     // gone. Anyway, we must not fold it into the addressing mode at this point.
2833     // E.g.,
2834     // op = add opnd, 1
2835     // idx = ext op
2836     // addr = gep base, idx
2837     // is now:
2838     // promotedOpnd = ext opnd            <- no match here
2839     // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
2840     // addr = gep base, op                <- match
2841     if (MovedAway)
2842       *MovedAway = true;
2843
2844     assert(PromotedOperand &&
2845            "TypePromotionHelper should have filtered out those cases");
2846
2847     ExtAddrMode BackupAddrMode = AddrMode;
2848     unsigned OldSize = AddrModeInsts.size();
2849
2850     if (!MatchAddr(PromotedOperand, Depth) ||
2851         // The total of the new cost is equals to the cost of the created
2852         // instructions.
2853         // The total of the old cost is equals to the cost of the extension plus
2854         // what we have saved in the addressing mode.
2855         !IsPromotionProfitable(CreatedInstsCost,
2856                                ExtCost + (AddrModeInsts.size() - OldSize),
2857                                PromotedOperand)) {
2858       AddrMode = BackupAddrMode;
2859       AddrModeInsts.resize(OldSize);
2860       DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
2861       TPT.rollback(LastKnownGood);
2862       return false;
2863     }
2864     return true;
2865   }
2866   }
2867   return false;
2868 }
2869
2870 /// MatchAddr - If we can, try to add the value of 'Addr' into the current
2871 /// addressing mode.  If Addr can't be added to AddrMode this returns false and
2872 /// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
2873 /// or intptr_t for the target.
2874 ///
2875 bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
2876   // Start a transaction at this point that we will rollback if the matching
2877   // fails.
2878   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
2879       TPT.getRestorationPoint();
2880   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
2881     // Fold in immediates if legal for the target.
2882     AddrMode.BaseOffs += CI->getSExtValue();
2883     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
2884       return true;
2885     AddrMode.BaseOffs -= CI->getSExtValue();
2886   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
2887     // If this is a global variable, try to fold it into the addressing mode.
2888     if (!AddrMode.BaseGV) {
2889       AddrMode.BaseGV = GV;
2890       if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
2891         return true;
2892       AddrMode.BaseGV = nullptr;
2893     }
2894   } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
2895     ExtAddrMode BackupAddrMode = AddrMode;
2896     unsigned OldSize = AddrModeInsts.size();
2897
2898     // Check to see if it is possible to fold this operation.
2899     bool MovedAway = false;
2900     if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
2901       // This instruction may have been move away. If so, there is nothing
2902       // to check here.
2903       if (MovedAway)
2904         return true;
2905       // Okay, it's possible to fold this.  Check to see if it is actually
2906       // *profitable* to do so.  We use a simple cost model to avoid increasing
2907       // register pressure too much.
2908       if (I->hasOneUse() ||
2909           IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
2910         AddrModeInsts.push_back(I);
2911         return true;
2912       }
2913
2914       // It isn't profitable to do this, roll back.
2915       //cerr << "NOT FOLDING: " << *I;
2916       AddrMode = BackupAddrMode;
2917       AddrModeInsts.resize(OldSize);
2918       TPT.rollback(LastKnownGood);
2919     }
2920   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
2921     if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
2922       return true;
2923     TPT.rollback(LastKnownGood);
2924   } else if (isa<ConstantPointerNull>(Addr)) {
2925     // Null pointer gets folded without affecting the addressing mode.
2926     return true;
2927   }
2928
2929   // Worse case, the target should support [reg] addressing modes. :)
2930   if (!AddrMode.HasBaseReg) {
2931     AddrMode.HasBaseReg = true;
2932     AddrMode.BaseReg = Addr;
2933     // Still check for legality in case the target supports [imm] but not [i+r].
2934     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
2935       return true;
2936     AddrMode.HasBaseReg = false;
2937     AddrMode.BaseReg = nullptr;
2938   }
2939
2940   // If the base register is already taken, see if we can do [r+r].
2941   if (AddrMode.Scale == 0) {
2942     AddrMode.Scale = 1;
2943     AddrMode.ScaledReg = Addr;
2944     if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
2945       return true;
2946     AddrMode.Scale = 0;
2947     AddrMode.ScaledReg = nullptr;
2948   }
2949   // Couldn't match.
2950   TPT.rollback(LastKnownGood);
2951   return false;
2952 }
2953
2954 /// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
2955 /// inline asm call are due to memory operands.  If so, return true, otherwise
2956 /// return false.
2957 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
2958                                     const TargetMachine &TM) {
2959   const Function *F = CI->getParent()->getParent();
2960   const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
2961   const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
2962   TargetLowering::AsmOperandInfoVector TargetConstraints =
2963       TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
2964   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
2965     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
2966
2967     // Compute the constraint code and ConstraintType to use.
2968     TLI->ComputeConstraintToUse(OpInfo, SDValue());
2969
2970     // If this asm operand is our Value*, and if it isn't an indirect memory
2971     // operand, we can't fold it!
2972     if (OpInfo.CallOperandVal == OpVal &&
2973         (OpInfo.ConstraintType != TargetLowering::C_Memory ||
2974          !OpInfo.isIndirect))
2975       return false;
2976   }
2977
2978   return true;
2979 }
2980
2981 /// FindAllMemoryUses - Recursively walk all the uses of I until we find a
2982 /// memory use.  If we find an obviously non-foldable instruction, return true.
2983 /// Add the ultimately found memory instructions to MemoryUses.
2984 static bool FindAllMemoryUses(
2985     Instruction *I,
2986     SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
2987     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
2988   // If we already considered this instruction, we're done.
2989   if (!ConsideredInsts.insert(I).second)
2990     return false;
2991
2992   // If this is an obviously unfoldable instruction, bail out.
2993   if (!MightBeFoldableInst(I))
2994     return true;
2995
2996   // Loop over all the uses, recursively processing them.
2997   for (Use &U : I->uses()) {
2998     Instruction *UserI = cast<Instruction>(U.getUser());
2999
3000     if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
3001       MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
3002       continue;
3003     }
3004
3005     if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
3006       unsigned opNo = U.getOperandNo();
3007       if (opNo == 0) return true; // Storing addr, not into addr.
3008       MemoryUses.push_back(std::make_pair(SI, opNo));
3009       continue;
3010     }
3011
3012     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
3013       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
3014       if (!IA) return true;
3015
3016       // If this is a memory operand, we're cool, otherwise bail out.
3017       if (!IsOperandAMemoryOperand(CI, IA, I, TM))
3018         return true;
3019       continue;
3020     }
3021
3022     if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
3023       return true;
3024   }
3025
3026   return false;
3027 }
3028
3029 /// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
3030 /// the use site that we're folding it into.  If so, there is no cost to
3031 /// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
3032 /// that we know are live at the instruction already.
3033 bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
3034                                                    Value *KnownLive2) {
3035   // If Val is either of the known-live values, we know it is live!
3036   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
3037     return true;
3038
3039   // All values other than instructions and arguments (e.g. constants) are live.
3040   if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
3041
3042   // If Val is a constant sized alloca in the entry block, it is live, this is
3043   // true because it is just a reference to the stack/frame pointer, which is
3044   // live for the whole function.
3045   if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
3046     if (AI->isStaticAlloca())
3047       return true;
3048
3049   // Check to see if this value is already used in the memory instruction's
3050   // block.  If so, it's already live into the block at the very least, so we
3051   // can reasonably fold it.
3052   return Val->isUsedInBasicBlock(MemoryInst->getParent());
3053 }
3054
3055 /// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
3056 /// mode of the machine to fold the specified instruction into a load or store
3057 /// that ultimately uses it.  However, the specified instruction has multiple
3058 /// uses.  Given this, it may actually increase register pressure to fold it
3059 /// into the load.  For example, consider this code:
3060 ///
3061 ///     X = ...
3062 ///     Y = X+1
3063 ///     use(Y)   -> nonload/store
3064 ///     Z = Y+1
3065 ///     load Z
3066 ///
3067 /// In this case, Y has multiple uses, and can be folded into the load of Z
3068 /// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
3069 /// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
3070 /// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
3071 /// number of computations either.
3072 ///
3073 /// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
3074 /// X was live across 'load Z' for other reasons, we actually *would* want to
3075 /// fold the addressing mode in the Z case.  This would make Y die earlier.
3076 bool AddressingModeMatcher::
3077 IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
3078                                      ExtAddrMode &AMAfter) {
3079   if (IgnoreProfitability) return true;
3080
3081   // AMBefore is the addressing mode before this instruction was folded into it,
3082   // and AMAfter is the addressing mode after the instruction was folded.  Get
3083   // the set of registers referenced by AMAfter and subtract out those
3084   // referenced by AMBefore: this is the set of values which folding in this
3085   // address extends the lifetime of.
3086   //
3087   // Note that there are only two potential values being referenced here,
3088   // BaseReg and ScaleReg (global addresses are always available, as are any
3089   // folded immediates).
3090   Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
3091
3092   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
3093   // lifetime wasn't extended by adding this instruction.
3094   if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3095     BaseReg = nullptr;
3096   if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
3097     ScaledReg = nullptr;
3098
3099   // If folding this instruction (and it's subexprs) didn't extend any live
3100   // ranges, we're ok with it.
3101   if (!BaseReg && !ScaledReg)
3102     return true;
3103
3104   // If all uses of this instruction are ultimately load/store/inlineasm's,
3105   // check to see if their addressing modes will include this instruction.  If
3106   // so, we can fold it into all uses, so it doesn't matter if it has multiple
3107   // uses.
3108   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
3109   SmallPtrSet<Instruction*, 16> ConsideredInsts;
3110   if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
3111     return false;  // Has a non-memory, non-foldable use!
3112
3113   // Now that we know that all uses of this instruction are part of a chain of
3114   // computation involving only operations that could theoretically be folded
3115   // into a memory use, loop over each of these uses and see if they could
3116   // *actually* fold the instruction.
3117   SmallVector<Instruction*, 32> MatchedAddrModeInsts;
3118   for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
3119     Instruction *User = MemoryUses[i].first;
3120     unsigned OpNo = MemoryUses[i].second;
3121
3122     // Get the access type of this use.  If the use isn't a pointer, we don't
3123     // know what it accesses.
3124     Value *Address = User->getOperand(OpNo);
3125     if (!Address->getType()->isPointerTy())
3126       return false;
3127     Type *AddressAccessTy = Address->getType()->getPointerElementType();
3128
3129     // Do a match against the root of this address, ignoring profitability. This
3130     // will tell us if the addressing mode for the memory operation will
3131     // *actually* cover the shared instruction.
3132     ExtAddrMode Result;
3133     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3134         TPT.getRestorationPoint();
3135     AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy,
3136                                   MemoryInst, Result, InsertedTruncs,
3137                                   PromotedInsts, TPT);
3138     Matcher.IgnoreProfitability = true;
3139     bool Success = Matcher.MatchAddr(Address, 0);
3140     (void)Success; assert(Success && "Couldn't select *anything*?");
3141
3142     // The match was to check the profitability, the changes made are not
3143     // part of the original matcher. Therefore, they should be dropped
3144     // otherwise the original matcher will not present the right state.
3145     TPT.rollback(LastKnownGood);
3146
3147     // If the match didn't cover I, then it won't be shared by it.
3148     if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
3149                   I) == MatchedAddrModeInsts.end())
3150       return false;
3151
3152     MatchedAddrModeInsts.clear();
3153   }
3154
3155   return true;
3156 }
3157
3158 } // end anonymous namespace
3159
3160 /// IsNonLocalValue - Return true if the specified values are defined in a
3161 /// different basic block than BB.
3162 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
3163   if (Instruction *I = dyn_cast<Instruction>(V))
3164     return I->getParent() != BB;
3165   return false;
3166 }
3167
3168 /// OptimizeMemoryInst - Load and Store Instructions often have
3169 /// addressing modes that can do significant amounts of computation.  As such,
3170 /// instruction selection will try to get the load or store to do as much
3171 /// computation as possible for the program.  The problem is that isel can only
3172 /// see within a single block.  As such, we sink as much legal addressing mode
3173 /// stuff into the block as possible.
3174 ///
3175 /// This method is used to optimize both load/store and inline asms with memory
3176 /// operands.
3177 bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
3178                                         Type *AccessTy) {
3179   Value *Repl = Addr;
3180
3181   // Try to collapse single-value PHI nodes.  This is necessary to undo
3182   // unprofitable PRE transformations.
3183   SmallVector<Value*, 8> worklist;
3184   SmallPtrSet<Value*, 16> Visited;
3185   worklist.push_back(Addr);
3186
3187   // Use a worklist to iteratively look through PHI nodes, and ensure that
3188   // the addressing mode obtained from the non-PHI roots of the graph
3189   // are equivalent.
3190   Value *Consensus = nullptr;
3191   unsigned NumUsesConsensus = 0;
3192   bool IsNumUsesConsensusValid = false;
3193   SmallVector<Instruction*, 16> AddrModeInsts;
3194   ExtAddrMode AddrMode;
3195   TypePromotionTransaction TPT;
3196   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3197       TPT.getRestorationPoint();
3198   while (!worklist.empty()) {
3199     Value *V = worklist.back();
3200     worklist.pop_back();
3201
3202     // Break use-def graph loops.
3203     if (!Visited.insert(V).second) {
3204       Consensus = nullptr;
3205       break;
3206     }
3207
3208     // For a PHI node, push all of its incoming values.
3209     if (PHINode *P = dyn_cast<PHINode>(V)) {
3210       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
3211         worklist.push_back(P->getIncomingValue(i));
3212       continue;
3213     }
3214
3215     // For non-PHIs, determine the addressing mode being computed.
3216     SmallVector<Instruction*, 16> NewAddrModeInsts;
3217     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
3218         V, AccessTy, MemoryInst, NewAddrModeInsts, *TM, InsertedTruncsSet,
3219         PromotedInsts, TPT);
3220
3221     // This check is broken into two cases with very similar code to avoid using
3222     // getNumUses() as much as possible. Some values have a lot of uses, so
3223     // calling getNumUses() unconditionally caused a significant compile-time
3224     // regression.
3225     if (!Consensus) {
3226       Consensus = V;
3227       AddrMode = NewAddrMode;
3228       AddrModeInsts = NewAddrModeInsts;
3229       continue;
3230     } else if (NewAddrMode == AddrMode) {
3231       if (!IsNumUsesConsensusValid) {
3232         NumUsesConsensus = Consensus->getNumUses();
3233         IsNumUsesConsensusValid = true;
3234       }
3235
3236       // Ensure that the obtained addressing mode is equivalent to that obtained
3237       // for all other roots of the PHI traversal.  Also, when choosing one
3238       // such root as representative, select the one with the most uses in order
3239       // to keep the cost modeling heuristics in AddressingModeMatcher
3240       // applicable.
3241       unsigned NumUses = V->getNumUses();
3242       if (NumUses > NumUsesConsensus) {
3243         Consensus = V;
3244         NumUsesConsensus = NumUses;
3245         AddrModeInsts = NewAddrModeInsts;
3246       }
3247       continue;
3248     }
3249
3250     Consensus = nullptr;
3251     break;
3252   }
3253
3254   // If the addressing mode couldn't be determined, or if multiple different
3255   // ones were determined, bail out now.
3256   if (!Consensus) {
3257     TPT.rollback(LastKnownGood);
3258     return false;
3259   }
3260   TPT.commit();
3261
3262   // Check to see if any of the instructions supersumed by this addr mode are
3263   // non-local to I's BB.
3264   bool AnyNonLocal = false;
3265   for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
3266     if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
3267       AnyNonLocal = true;
3268       break;
3269     }
3270   }
3271
3272   // If all the instructions matched are already in this BB, don't do anything.
3273   if (!AnyNonLocal) {
3274     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
3275     return false;
3276   }
3277
3278   // Insert this computation right after this user.  Since our caller is
3279   // scanning from the top of the BB to the bottom, reuse of the expr are
3280   // guaranteed to happen later.
3281   IRBuilder<> Builder(MemoryInst);
3282
3283   // Now that we determined the addressing expression we want to use and know
3284   // that we have to sink it into this block.  Check to see if we have already
3285   // done this for some other load/store instr in this block.  If so, reuse the
3286   // computation.
3287   Value *&SunkAddr = SunkAddrs[Addr];
3288   if (SunkAddr) {
3289     DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
3290                  << *MemoryInst << "\n");
3291     if (SunkAddr->getType() != Addr->getType())
3292       SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
3293   } else if (AddrSinkUsingGEPs ||
3294              (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
3295               TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
3296                   ->useAA())) {
3297     // By default, we use the GEP-based method when AA is used later. This
3298     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
3299     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
3300                  << *MemoryInst << "\n");
3301     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
3302     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
3303
3304     // First, find the pointer.
3305     if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
3306       ResultPtr = AddrMode.BaseReg;
3307       AddrMode.BaseReg = nullptr;
3308     }
3309
3310     if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
3311       // We can't add more than one pointer together, nor can we scale a
3312       // pointer (both of which seem meaningless).
3313       if (ResultPtr || AddrMode.Scale != 1)
3314         return false;
3315
3316       ResultPtr = AddrMode.ScaledReg;
3317       AddrMode.Scale = 0;
3318     }
3319
3320     if (AddrMode.BaseGV) {
3321       if (ResultPtr)
3322         return false;
3323
3324       ResultPtr = AddrMode.BaseGV;
3325     }
3326
3327     // If the real base value actually came from an inttoptr, then the matcher
3328     // will look through it and provide only the integer value. In that case,
3329     // use it here.
3330     if (!ResultPtr && AddrMode.BaseReg) {
3331       ResultPtr =
3332         Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
3333       AddrMode.BaseReg = nullptr;
3334     } else if (!ResultPtr && AddrMode.Scale == 1) {
3335       ResultPtr =
3336         Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
3337       AddrMode.Scale = 0;
3338     }
3339
3340     if (!ResultPtr &&
3341         !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
3342       SunkAddr = Constant::getNullValue(Addr->getType());
3343     } else if (!ResultPtr) {
3344       return false;
3345     } else {
3346       Type *I8PtrTy =
3347           Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
3348       Type *I8Ty = Builder.getInt8Ty();
3349
3350       // Start with the base register. Do this first so that subsequent address
3351       // matching finds it last, which will prevent it from trying to match it
3352       // as the scaled value in case it happens to be a mul. That would be
3353       // problematic if we've sunk a different mul for the scale, because then
3354       // we'd end up sinking both muls.
3355       if (AddrMode.BaseReg) {
3356         Value *V = AddrMode.BaseReg;
3357         if (V->getType() != IntPtrTy)
3358           V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
3359
3360         ResultIndex = V;
3361       }
3362
3363       // Add the scale value.
3364       if (AddrMode.Scale) {
3365         Value *V = AddrMode.ScaledReg;
3366         if (V->getType() == IntPtrTy) {
3367           // done.
3368         } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
3369                    cast<IntegerType>(V->getType())->getBitWidth()) {
3370           V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
3371         } else {
3372           // It is only safe to sign extend the BaseReg if we know that the math
3373           // required to create it did not overflow before we extend it. Since
3374           // the original IR value was tossed in favor of a constant back when
3375           // the AddrMode was created we need to bail out gracefully if widths
3376           // do not match instead of extending it.
3377           Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
3378           if (I && (ResultIndex != AddrMode.BaseReg))
3379             I->eraseFromParent();
3380           return false;
3381         }
3382
3383         if (AddrMode.Scale != 1)
3384           V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
3385                                 "sunkaddr");
3386         if (ResultIndex)
3387           ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
3388         else
3389           ResultIndex = V;
3390       }
3391
3392       // Add in the Base Offset if present.
3393       if (AddrMode.BaseOffs) {
3394         Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
3395         if (ResultIndex) {
3396           // We need to add this separately from the scale above to help with
3397           // SDAG consecutive load/store merging.
3398           if (ResultPtr->getType() != I8PtrTy)
3399             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
3400           ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
3401         }
3402
3403         ResultIndex = V;
3404       }
3405
3406       if (!ResultIndex) {
3407         SunkAddr = ResultPtr;
3408       } else {
3409         if (ResultPtr->getType() != I8PtrTy)
3410           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
3411         SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
3412       }
3413
3414       if (SunkAddr->getType() != Addr->getType())
3415         SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
3416     }
3417   } else {
3418     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
3419                  << *MemoryInst << "\n");
3420     Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
3421     Value *Result = nullptr;
3422
3423     // Start with the base register. Do this first so that subsequent address
3424     // matching finds it last, which will prevent it from trying to match it
3425     // as the scaled value in case it happens to be a mul. That would be
3426     // problematic if we've sunk a different mul for the scale, because then
3427     // we'd end up sinking both muls.
3428     if (AddrMode.BaseReg) {
3429       Value *V = AddrMode.BaseReg;
3430       if (V->getType()->isPointerTy())
3431         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
3432       if (V->getType() != IntPtrTy)
3433         V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
3434       Result = V;
3435     }
3436
3437     // Add the scale value.
3438     if (AddrMode.Scale) {
3439       Value *V = AddrMode.ScaledReg;
3440       if (V->getType() == IntPtrTy) {
3441         // done.
3442       } else if (V->getType()->isPointerTy()) {
3443         V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
3444       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
3445                  cast<IntegerType>(V->getType())->getBitWidth()) {
3446         V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
3447       } else {
3448         // It is only safe to sign extend the BaseReg if we know that the math
3449         // required to create it did not overflow before we extend it. Since
3450         // the original IR value was tossed in favor of a constant back when
3451         // the AddrMode was created we need to bail out gracefully if widths
3452         // do not match instead of extending it.
3453         Instruction *I = dyn_cast_or_null<Instruction>(Result);
3454         if (I && (Result != AddrMode.BaseReg))
3455           I->eraseFromParent();
3456         return false;
3457       }
3458       if (AddrMode.Scale != 1)
3459         V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
3460                               "sunkaddr");
3461       if (Result)
3462         Result = Builder.CreateAdd(Result, V, "sunkaddr");
3463       else
3464         Result = V;
3465     }
3466
3467     // Add in the BaseGV if present.
3468     if (AddrMode.BaseGV) {
3469       Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
3470       if (Result)
3471         Result = Builder.CreateAdd(Result, V, "sunkaddr");
3472       else
3473         Result = V;
3474     }
3475
3476     // Add in the Base Offset if present.
3477     if (AddrMode.BaseOffs) {
3478       Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
3479       if (Result)
3480         Result = Builder.CreateAdd(Result, V, "sunkaddr");
3481       else
3482         Result = V;
3483     }
3484
3485     if (!Result)
3486       SunkAddr = Constant::getNullValue(Addr->getType());
3487     else
3488       SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
3489   }
3490
3491   MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
3492
3493   // If we have no uses, recursively delete the value and all dead instructions
3494   // using it.
3495   if (Repl->use_empty()) {
3496     // This can cause recursive deletion, which can invalidate our iterator.
3497     // Use a WeakVH to hold onto it in case this happens.
3498     WeakVH IterHandle(CurInstIterator);
3499     BasicBlock *BB = CurInstIterator->getParent();
3500
3501     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
3502
3503     if (IterHandle != CurInstIterator) {
3504       // If the iterator instruction was recursively deleted, start over at the
3505       // start of the block.
3506       CurInstIterator = BB->begin();
3507       SunkAddrs.clear();
3508     }
3509   }
3510   ++NumMemoryInsts;
3511   return true;
3512 }
3513
3514 /// OptimizeInlineAsmInst - If there are any memory operands, use
3515 /// OptimizeMemoryInst to sink their address computing into the block when
3516 /// possible / profitable.
3517 bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
3518   bool MadeChange = false;
3519
3520   const TargetRegisterInfo *TRI =
3521       TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
3522   TargetLowering::AsmOperandInfoVector
3523     TargetConstraints = TLI->ParseConstraints(TRI, CS);
3524   unsigned ArgNo = 0;
3525   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
3526     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
3527
3528     // Compute the constraint code and ConstraintType to use.
3529     TLI->ComputeConstraintToUse(OpInfo, SDValue());
3530
3531     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
3532         OpInfo.isIndirect) {
3533       Value *OpVal = CS->getArgOperand(ArgNo++);
3534       MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
3535     } else if (OpInfo.Type == InlineAsm::isInput)
3536       ArgNo++;
3537   }
3538
3539   return MadeChange;
3540 }
3541
3542 /// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
3543 /// sign extensions.
3544 static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
3545   assert(!Inst->use_empty() && "Input must have at least one use");
3546   const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
3547   bool IsSExt = isa<SExtInst>(FirstUser);
3548   Type *ExtTy = FirstUser->getType();
3549   for (const User *U : Inst->users()) {
3550     const Instruction *UI = cast<Instruction>(U);
3551     if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
3552       return false;
3553     Type *CurTy = UI->getType();
3554     // Same input and output types: Same instruction after CSE.
3555     if (CurTy == ExtTy)
3556       continue;
3557
3558     // If IsSExt is true, we are in this situation:
3559     // a = Inst
3560     // b = sext ty1 a to ty2
3561     // c = sext ty1 a to ty3
3562     // Assuming ty2 is shorter than ty3, this could be turned into:
3563     // a = Inst
3564     // b = sext ty1 a to ty2
3565     // c = sext ty2 b to ty3
3566     // However, the last sext is not free.
3567     if (IsSExt)
3568       return false;
3569
3570     // This is a ZExt, maybe this is free to extend from one type to another.
3571     // In that case, we would not account for a different use.
3572     Type *NarrowTy;
3573     Type *LargeTy;
3574     if (ExtTy->getScalarType()->getIntegerBitWidth() >
3575         CurTy->getScalarType()->getIntegerBitWidth()) {
3576       NarrowTy = CurTy;
3577       LargeTy = ExtTy;
3578     } else {
3579       NarrowTy = ExtTy;
3580       LargeTy = CurTy;
3581     }
3582
3583     if (!TLI.isZExtFree(NarrowTy, LargeTy))
3584       return false;
3585   }
3586   // All uses are the same or can be derived from one another for free.
3587   return true;
3588 }
3589
3590 /// \brief Try to form ExtLd by promoting \p Exts until they reach a
3591 /// load instruction.
3592 /// If an ext(load) can be formed, it is returned via \p LI for the load
3593 /// and \p Inst for the extension.
3594 /// Otherwise LI == nullptr and Inst == nullptr.
3595 /// When some promotion happened, \p TPT contains the proper state to
3596 /// revert them.
3597 ///
3598 /// \return true when promoting was necessary to expose the ext(load)
3599 /// opportunity, false otherwise.
3600 ///
3601 /// Example:
3602 /// \code
3603 /// %ld = load i32* %addr
3604 /// %add = add nuw i32 %ld, 4
3605 /// %zext = zext i32 %add to i64
3606 /// \endcode
3607 /// =>
3608 /// \code
3609 /// %ld = load i32* %addr
3610 /// %zext = zext i32 %ld to i64
3611 /// %add = add nuw i64 %zext, 4
3612 /// \encode
3613 /// Thanks to the promotion, we can match zext(load i32*) to i64.
3614 bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
3615                                     LoadInst *&LI, Instruction *&Inst,
3616                                     const SmallVectorImpl<Instruction *> &Exts,
3617                                     unsigned CreatedInstsCost = 0) {
3618   // Iterate over all the extensions to see if one form an ext(load).
3619   for (auto I : Exts) {
3620     // Check if we directly have ext(load).
3621     if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
3622       Inst = I;
3623       // No promotion happened here.
3624       return false;
3625     }
3626     // Check whether or not we want to do any promotion.
3627     if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
3628       continue;
3629     // Get the action to perform the promotion.
3630     TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
3631         I, InsertedTruncsSet, *TLI, PromotedInsts);
3632     // Check if we can promote.
3633     if (!TPH)
3634       continue;
3635     // Save the current state.
3636     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3637         TPT.getRestorationPoint();
3638     SmallVector<Instruction *, 4> NewExts;
3639     unsigned NewCreatedInstsCost = 0;
3640     unsigned ExtCost = !TLI->isExtFree(I);
3641     // Promote.
3642     Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
3643                              &NewExts, nullptr, *TLI);
3644     assert(PromotedVal &&
3645            "TypePromotionHelper should have filtered out those cases");
3646
3647     // We would be able to merge only one extension in a load.
3648     // Therefore, if we have more than 1 new extension we heuristically
3649     // cut this search path, because it means we degrade the code quality.
3650     // With exactly 2, the transformation is neutral, because we will merge
3651     // one extension but leave one. However, we optimistically keep going,
3652     // because the new extension may be removed too.
3653     long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
3654     TotalCreatedInstsCost -= ExtCost;
3655     if (!StressExtLdPromotion &&
3656         (TotalCreatedInstsCost > 1 ||
3657          !isPromotedInstructionLegal(*TLI, PromotedVal))) {
3658       // The promotion is not profitable, rollback to the previous state.
3659       TPT.rollback(LastKnownGood);
3660       continue;
3661     }
3662     // The promotion is profitable.
3663     // Check if it exposes an ext(load).
3664     (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
3665     if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
3666                // If we have created a new extension, i.e., now we have two
3667                // extensions. We must make sure one of them is merged with
3668                // the load, otherwise we may degrade the code quality.
3669                (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
3670       // Promotion happened.
3671       return true;
3672     // If this does not help to expose an ext(load) then, rollback.
3673     TPT.rollback(LastKnownGood);
3674   }
3675   // None of the extension can form an ext(load).
3676   LI = nullptr;
3677   Inst = nullptr;
3678   return false;
3679 }
3680
3681 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
3682 /// basic block as the load, unless conditions are unfavorable. This allows
3683 /// SelectionDAG to fold the extend into the load.
3684 /// \p I[in/out] the extension may be modified during the process if some
3685 /// promotions apply.
3686 ///
3687 bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
3688   // Try to promote a chain of computation if it allows to form
3689   // an extended load.
3690   TypePromotionTransaction TPT;
3691   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3692     TPT.getRestorationPoint();
3693   SmallVector<Instruction *, 1> Exts;
3694   Exts.push_back(I);
3695   // Look for a load being extended.
3696   LoadInst *LI = nullptr;
3697   Instruction *OldExt = I;
3698   bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
3699   if (!LI || !I) {
3700     assert(!HasPromoted && !LI && "If we did not match any load instruction "
3701                                   "the code must remain the same");
3702     I = OldExt;
3703     return false;
3704   }
3705
3706   // If they're already in the same block, there's nothing to do.
3707   // Make the cheap checks first if we did not promote.
3708   // If we promoted, we need to check if it is indeed profitable.
3709   if (!HasPromoted && LI->getParent() == I->getParent())
3710     return false;
3711
3712   EVT VT = TLI->getValueType(I->getType());
3713   EVT LoadVT = TLI->getValueType(LI->getType());
3714
3715   // If the load has other users and the truncate is not free, this probably
3716   // isn't worthwhile.
3717   if (!LI->hasOneUse() && TLI &&
3718       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
3719       !TLI->isTruncateFree(I->getType(), LI->getType())) {
3720     I = OldExt;
3721     TPT.rollback(LastKnownGood);
3722     return false;
3723   }
3724
3725   // Check whether the target supports casts folded into loads.
3726   unsigned LType;
3727   if (isa<ZExtInst>(I))
3728     LType = ISD::ZEXTLOAD;
3729   else {
3730     assert(isa<SExtInst>(I) && "Unexpected ext type!");
3731     LType = ISD::SEXTLOAD;
3732   }
3733   if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
3734     I = OldExt;
3735     TPT.rollback(LastKnownGood);
3736     return false;
3737   }
3738
3739   // Move the extend into the same block as the load, so that SelectionDAG
3740   // can fold it.
3741   TPT.commit();
3742   I->removeFromParent();
3743   I->insertAfter(LI);
3744   ++NumExtsMoved;
3745   return true;
3746 }
3747
3748 bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
3749   BasicBlock *DefBB = I->getParent();
3750
3751   // If the result of a {s|z}ext and its source are both live out, rewrite all
3752   // other uses of the source with result of extension.
3753   Value *Src = I->getOperand(0);
3754   if (Src->hasOneUse())
3755     return false;
3756
3757   // Only do this xform if truncating is free.
3758   if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
3759     return false;
3760
3761   // Only safe to perform the optimization if the source is also defined in
3762   // this block.
3763   if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
3764     return false;
3765
3766   bool DefIsLiveOut = false;
3767   for (User *U : I->users()) {
3768     Instruction *UI = cast<Instruction>(U);
3769
3770     // Figure out which BB this ext is used in.
3771     BasicBlock *UserBB = UI->getParent();
3772     if (UserBB == DefBB) continue;
3773     DefIsLiveOut = true;
3774     break;
3775   }
3776   if (!DefIsLiveOut)
3777     return false;
3778
3779   // Make sure none of the uses are PHI nodes.
3780   for (User *U : Src->users()) {
3781     Instruction *UI = cast<Instruction>(U);
3782     BasicBlock *UserBB = UI->getParent();
3783     if (UserBB == DefBB) continue;
3784     // Be conservative. We don't want this xform to end up introducing
3785     // reloads just before load / store instructions.
3786     if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
3787       return false;
3788   }
3789
3790   // InsertedTruncs - Only insert one trunc in each block once.
3791   DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
3792
3793   bool MadeChange = false;
3794   for (Use &U : Src->uses()) {
3795     Instruction *User = cast<Instruction>(U.getUser());
3796
3797     // Figure out which BB this ext is used in.
3798     BasicBlock *UserBB = User->getParent();
3799     if (UserBB == DefBB) continue;
3800
3801     // Both src and def are live in this block. Rewrite the use.
3802     Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
3803
3804     if (!InsertedTrunc) {
3805       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
3806       InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
3807       InsertedTruncsSet.insert(InsertedTrunc);
3808     }
3809
3810     // Replace a use of the {s|z}ext source with a use of the result.
3811     U = InsertedTrunc;
3812     ++NumExtUses;
3813     MadeChange = true;
3814   }
3815
3816   return MadeChange;
3817 }
3818
3819 /// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
3820 /// turned into an explicit branch.
3821 static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
3822   // FIXME: This should use the same heuristics as IfConversion to determine
3823   // whether a select is better represented as a branch.  This requires that
3824   // branch probability metadata is preserved for the select, which is not the
3825   // case currently.
3826
3827   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
3828
3829   // If the branch is predicted right, an out of order CPU can avoid blocking on
3830   // the compare.  Emit cmovs on compares with a memory operand as branches to
3831   // avoid stalls on the load from memory.  If the compare has more than one use
3832   // there's probably another cmov or setcc around so it's not worth emitting a
3833   // branch.
3834   if (!Cmp)
3835     return false;
3836
3837   Value *CmpOp0 = Cmp->getOperand(0);
3838   Value *CmpOp1 = Cmp->getOperand(1);
3839
3840   // We check that the memory operand has one use to avoid uses of the loaded
3841   // value directly after the compare, making branches unprofitable.
3842   return Cmp->hasOneUse() &&
3843          ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
3844           (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
3845 }
3846
3847
3848 /// If we have a SelectInst that will likely profit from branch prediction,
3849 /// turn it into a branch.
3850 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
3851   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
3852
3853   // Can we convert the 'select' to CF ?
3854   if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
3855     return false;
3856
3857   TargetLowering::SelectSupportKind SelectKind;
3858   if (VectorCond)
3859     SelectKind = TargetLowering::VectorMaskSelect;
3860   else if (SI->getType()->isVectorTy())
3861     SelectKind = TargetLowering::ScalarCondVectorVal;
3862   else
3863     SelectKind = TargetLowering::ScalarValSelect;
3864
3865   // Do we have efficient codegen support for this kind of 'selects' ?
3866   if (TLI->isSelectSupported(SelectKind)) {
3867     // We have efficient codegen support for the select instruction.
3868     // Check if it is profitable to keep this 'select'.
3869     if (!TLI->isPredictableSelectExpensive() ||
3870         !isFormingBranchFromSelectProfitable(SI))
3871       return false;
3872   }
3873
3874   ModifiedDT = true;
3875
3876   // First, we split the block containing the select into 2 blocks.
3877   BasicBlock *StartBlock = SI->getParent();
3878   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
3879   BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
3880
3881   // Create a new block serving as the landing pad for the branch.
3882   BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
3883                                              NextBlock->getParent(), NextBlock);
3884
3885   // Move the unconditional branch from the block with the select in it into our
3886   // landing pad block.
3887   StartBlock->getTerminator()->eraseFromParent();
3888   BranchInst::Create(NextBlock, SmallBlock);
3889
3890   // Insert the real conditional branch based on the original condition.
3891   BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
3892
3893   // The select itself is replaced with a PHI Node.
3894   PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
3895   PN->takeName(SI);
3896   PN->addIncoming(SI->getTrueValue(), StartBlock);
3897   PN->addIncoming(SI->getFalseValue(), SmallBlock);
3898   SI->replaceAllUsesWith(PN);
3899   SI->eraseFromParent();
3900
3901   // Instruct OptimizeBlock to skip to the next block.
3902   CurInstIterator = StartBlock->end();
3903   ++NumSelectsExpanded;
3904   return true;
3905 }
3906
3907 static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
3908   SmallVector<int, 16> Mask(SVI->getShuffleMask());
3909   int SplatElem = -1;
3910   for (unsigned i = 0; i < Mask.size(); ++i) {
3911     if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
3912       return false;
3913     SplatElem = Mask[i];
3914   }
3915
3916   return true;
3917 }
3918
3919 /// Some targets have expensive vector shifts if the lanes aren't all the same
3920 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
3921 /// it's often worth sinking a shufflevector splat down to its use so that
3922 /// codegen can spot all lanes are identical.
3923 bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
3924   BasicBlock *DefBB = SVI->getParent();
3925
3926   // Only do this xform if variable vector shifts are particularly expensive.
3927   if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
3928     return false;
3929
3930   // We only expect better codegen by sinking a shuffle if we can recognise a
3931   // constant splat.
3932   if (!isBroadcastShuffle(SVI))
3933     return false;
3934
3935   // InsertedShuffles - Only insert a shuffle in each block once.
3936   DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
3937
3938   bool MadeChange = false;
3939   for (User *U : SVI->users()) {
3940     Instruction *UI = cast<Instruction>(U);
3941
3942     // Figure out which BB this ext is used in.
3943     BasicBlock *UserBB = UI->getParent();
3944     if (UserBB == DefBB) continue;
3945
3946     // For now only apply this when the splat is used by a shift instruction.
3947     if (!UI->isShift()) continue;
3948
3949     // Everything checks out, sink the shuffle if the user's block doesn't
3950     // already have a copy.
3951     Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
3952
3953     if (!InsertedShuffle) {
3954       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
3955       InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
3956                                               SVI->getOperand(1),
3957                                               SVI->getOperand(2), "", InsertPt);
3958     }
3959
3960     UI->replaceUsesOfWith(SVI, InsertedShuffle);
3961     MadeChange = true;
3962   }
3963
3964   // If we removed all uses, nuke the shuffle.
3965   if (SVI->use_empty()) {
3966     SVI->eraseFromParent();
3967     MadeChange = true;
3968   }
3969
3970   return MadeChange;
3971 }
3972
3973 namespace {
3974 /// \brief Helper class to promote a scalar operation to a vector one.
3975 /// This class is used to move downward extractelement transition.
3976 /// E.g.,
3977 /// a = vector_op <2 x i32>
3978 /// b = extractelement <2 x i32> a, i32 0
3979 /// c = scalar_op b
3980 /// store c
3981 ///
3982 /// =>
3983 /// a = vector_op <2 x i32>
3984 /// c = vector_op a (equivalent to scalar_op on the related lane)
3985 /// * d = extractelement <2 x i32> c, i32 0
3986 /// * store d
3987 /// Assuming both extractelement and store can be combine, we get rid of the
3988 /// transition.
3989 class VectorPromoteHelper {
3990   /// Used to perform some checks on the legality of vector operations.
3991   const TargetLowering &TLI;
3992
3993   /// Used to estimated the cost of the promoted chain.
3994   const TargetTransformInfo &TTI;
3995
3996   /// The transition being moved downwards.
3997   Instruction *Transition;
3998   /// The sequence of instructions to be promoted.
3999   SmallVector<Instruction *, 4> InstsToBePromoted;
4000   /// Cost of combining a store and an extract.
4001   unsigned StoreExtractCombineCost;
4002   /// Instruction that will be combined with the transition.
4003   Instruction *CombineInst;
4004
4005   /// \brief The instruction that represents the current end of the transition.
4006   /// Since we are faking the promotion until we reach the end of the chain
4007   /// of computation, we need a way to get the current end of the transition.
4008   Instruction *getEndOfTransition() const {
4009     if (InstsToBePromoted.empty())
4010       return Transition;
4011     return InstsToBePromoted.back();
4012   }
4013
4014   /// \brief Return the index of the original value in the transition.
4015   /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
4016   /// c, is at index 0.
4017   unsigned getTransitionOriginalValueIdx() const {
4018     assert(isa<ExtractElementInst>(Transition) &&
4019            "Other kind of transitions are not supported yet");
4020     return 0;
4021   }
4022
4023   /// \brief Return the index of the index in the transition.
4024   /// E.g., for "extractelement <2 x i32> c, i32 0" the index
4025   /// is at index 1.
4026   unsigned getTransitionIdx() const {
4027     assert(isa<ExtractElementInst>(Transition) &&
4028            "Other kind of transitions are not supported yet");
4029     return 1;
4030   }
4031
4032   /// \brief Get the type of the transition.
4033   /// This is the type of the original value.
4034   /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
4035   /// transition is <2 x i32>.
4036   Type *getTransitionType() const {
4037     return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
4038   }
4039
4040   /// \brief Promote \p ToBePromoted by moving \p Def downward through.
4041   /// I.e., we have the following sequence:
4042   /// Def = Transition <ty1> a to <ty2>
4043   /// b = ToBePromoted <ty2> Def, ...
4044   /// =>
4045   /// b = ToBePromoted <ty1> a, ...
4046   /// Def = Transition <ty1> ToBePromoted to <ty2>
4047   void promoteImpl(Instruction *ToBePromoted);
4048
4049   /// \brief Check whether or not it is profitable to promote all the
4050   /// instructions enqueued to be promoted.
4051   bool isProfitableToPromote() {
4052     Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
4053     unsigned Index = isa<ConstantInt>(ValIdx)
4054                          ? cast<ConstantInt>(ValIdx)->getZExtValue()
4055                          : -1;
4056     Type *PromotedType = getTransitionType();
4057
4058     StoreInst *ST = cast<StoreInst>(CombineInst);
4059     unsigned AS = ST->getPointerAddressSpace();
4060     unsigned Align = ST->getAlignment();
4061     // Check if this store is supported.
4062     if (!TLI.allowsMisalignedMemoryAccesses(
4063             TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
4064       // If this is not supported, there is no way we can combine
4065       // the extract with the store.
4066       return false;
4067     }
4068
4069     // The scalar chain of computation has to pay for the transition
4070     // scalar to vector.
4071     // The vector chain has to account for the combining cost.
4072     uint64_t ScalarCost =
4073         TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
4074     uint64_t VectorCost = StoreExtractCombineCost;
4075     for (const auto &Inst : InstsToBePromoted) {
4076       // Compute the cost.
4077       // By construction, all instructions being promoted are arithmetic ones.
4078       // Moreover, one argument is a constant that can be viewed as a splat
4079       // constant.
4080       Value *Arg0 = Inst->getOperand(0);
4081       bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
4082                             isa<ConstantFP>(Arg0);
4083       TargetTransformInfo::OperandValueKind Arg0OVK =
4084           IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
4085                          : TargetTransformInfo::OK_AnyValue;
4086       TargetTransformInfo::OperandValueKind Arg1OVK =
4087           !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
4088                           : TargetTransformInfo::OK_AnyValue;
4089       ScalarCost += TTI.getArithmeticInstrCost(
4090           Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
4091       VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
4092                                                Arg0OVK, Arg1OVK);
4093     }
4094     DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
4095                  << ScalarCost << "\nVector: " << VectorCost << '\n');
4096     return ScalarCost > VectorCost;
4097   }
4098
4099   /// \brief Generate a constant vector with \p Val with the same
4100   /// number of elements as the transition.
4101   /// \p UseSplat defines whether or not \p Val should be replicated
4102   /// accross the whole vector.
4103   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
4104   /// otherwise we generate a vector with as many undef as possible:
4105   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
4106   /// used at the index of the extract.
4107   Value *getConstantVector(Constant *Val, bool UseSplat) const {
4108     unsigned ExtractIdx = UINT_MAX;
4109     if (!UseSplat) {
4110       // If we cannot determine where the constant must be, we have to
4111       // use a splat constant.
4112       Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
4113       if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
4114         ExtractIdx = CstVal->getSExtValue();
4115       else
4116         UseSplat = true;
4117     }
4118
4119     unsigned End = getTransitionType()->getVectorNumElements();
4120     if (UseSplat)
4121       return ConstantVector::getSplat(End, Val);
4122
4123     SmallVector<Constant *, 4> ConstVec;
4124     UndefValue *UndefVal = UndefValue::get(Val->getType());
4125     for (unsigned Idx = 0; Idx != End; ++Idx) {
4126       if (Idx == ExtractIdx)
4127         ConstVec.push_back(Val);
4128       else
4129         ConstVec.push_back(UndefVal);
4130     }
4131     return ConstantVector::get(ConstVec);
4132   }
4133
4134   /// \brief Check if promoting to a vector type an operand at \p OperandIdx
4135   /// in \p Use can trigger undefined behavior.
4136   static bool canCauseUndefinedBehavior(const Instruction *Use,
4137                                         unsigned OperandIdx) {
4138     // This is not safe to introduce undef when the operand is on
4139     // the right hand side of a division-like instruction.
4140     if (OperandIdx != 1)
4141       return false;
4142     switch (Use->getOpcode()) {
4143     default:
4144       return false;
4145     case Instruction::SDiv:
4146     case Instruction::UDiv:
4147     case Instruction::SRem:
4148     case Instruction::URem:
4149       return true;
4150     case Instruction::FDiv:
4151     case Instruction::FRem:
4152       return !Use->hasNoNaNs();
4153     }
4154     llvm_unreachable(nullptr);
4155   }
4156
4157 public:
4158   VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
4159                       Instruction *Transition, unsigned CombineCost)
4160       : TLI(TLI), TTI(TTI), Transition(Transition),
4161         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
4162     assert(Transition && "Do not know how to promote null");
4163   }
4164
4165   /// \brief Check if we can promote \p ToBePromoted to \p Type.
4166   bool canPromote(const Instruction *ToBePromoted) const {
4167     // We could support CastInst too.
4168     return isa<BinaryOperator>(ToBePromoted);
4169   }
4170
4171   /// \brief Check if it is profitable to promote \p ToBePromoted
4172   /// by moving downward the transition through.
4173   bool shouldPromote(const Instruction *ToBePromoted) const {
4174     // Promote only if all the operands can be statically expanded.
4175     // Indeed, we do not want to introduce any new kind of transitions.
4176     for (const Use &U : ToBePromoted->operands()) {
4177       const Value *Val = U.get();
4178       if (Val == getEndOfTransition()) {
4179         // If the use is a division and the transition is on the rhs,
4180         // we cannot promote the operation, otherwise we may create a
4181         // division by zero.
4182         if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
4183           return false;
4184         continue;
4185       }
4186       if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
4187           !isa<ConstantFP>(Val))
4188         return false;
4189     }
4190     // Check that the resulting operation is legal.
4191     int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
4192     if (!ISDOpcode)
4193       return false;
4194     return StressStoreExtract ||
4195            TLI.isOperationLegalOrCustom(
4196                ISDOpcode, TLI.getValueType(getTransitionType(), true));
4197   }
4198
4199   /// \brief Check whether or not \p Use can be combined
4200   /// with the transition.
4201   /// I.e., is it possible to do Use(Transition) => AnotherUse?
4202   bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
4203
4204   /// \brief Record \p ToBePromoted as part of the chain to be promoted.
4205   void enqueueForPromotion(Instruction *ToBePromoted) {
4206     InstsToBePromoted.push_back(ToBePromoted);
4207   }
4208
4209   /// \brief Set the instruction that will be combined with the transition.
4210   void recordCombineInstruction(Instruction *ToBeCombined) {
4211     assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
4212     CombineInst = ToBeCombined;
4213   }
4214
4215   /// \brief Promote all the instructions enqueued for promotion if it is
4216   /// is profitable.
4217   /// \return True if the promotion happened, false otherwise.
4218   bool promote() {
4219     // Check if there is something to promote.
4220     // Right now, if we do not have anything to combine with,
4221     // we assume the promotion is not profitable.
4222     if (InstsToBePromoted.empty() || !CombineInst)
4223       return false;
4224
4225     // Check cost.
4226     if (!StressStoreExtract && !isProfitableToPromote())
4227       return false;
4228
4229     // Promote.
4230     for (auto &ToBePromoted : InstsToBePromoted)
4231       promoteImpl(ToBePromoted);
4232     InstsToBePromoted.clear();
4233     return true;
4234   }
4235 };
4236 } // End of anonymous namespace.
4237
4238 void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
4239   // At this point, we know that all the operands of ToBePromoted but Def
4240   // can be statically promoted.
4241   // For Def, we need to use its parameter in ToBePromoted:
4242   // b = ToBePromoted ty1 a
4243   // Def = Transition ty1 b to ty2
4244   // Move the transition down.
4245   // 1. Replace all uses of the promoted operation by the transition.
4246   // = ... b => = ... Def.
4247   assert(ToBePromoted->getType() == Transition->getType() &&
4248          "The type of the result of the transition does not match "
4249          "the final type");
4250   ToBePromoted->replaceAllUsesWith(Transition);
4251   // 2. Update the type of the uses.
4252   // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
4253   Type *TransitionTy = getTransitionType();
4254   ToBePromoted->mutateType(TransitionTy);
4255   // 3. Update all the operands of the promoted operation with promoted
4256   // operands.
4257   // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
4258   for (Use &U : ToBePromoted->operands()) {
4259     Value *Val = U.get();
4260     Value *NewVal = nullptr;
4261     if (Val == Transition)
4262       NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
4263     else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
4264              isa<ConstantFP>(Val)) {
4265       // Use a splat constant if it is not safe to use undef.
4266       NewVal = getConstantVector(
4267           cast<Constant>(Val),
4268           isa<UndefValue>(Val) ||
4269               canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
4270     } else
4271       llvm_unreachable("Did you modified shouldPromote and forgot to update "
4272                        "this?");
4273     ToBePromoted->setOperand(U.getOperandNo(), NewVal);
4274   }
4275   Transition->removeFromParent();
4276   Transition->insertAfter(ToBePromoted);
4277   Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
4278 }
4279
4280 /// Some targets can do store(extractelement) with one instruction.
4281 /// Try to push the extractelement towards the stores when the target
4282 /// has this feature and this is profitable.
4283 bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
4284   unsigned CombineCost = UINT_MAX;
4285   if (DisableStoreExtract || !TLI ||
4286       (!StressStoreExtract &&
4287        !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
4288                                        Inst->getOperand(1), CombineCost)))
4289     return false;
4290
4291   // At this point we know that Inst is a vector to scalar transition.
4292   // Try to move it down the def-use chain, until:
4293   // - We can combine the transition with its single use
4294   //   => we got rid of the transition.
4295   // - We escape the current basic block
4296   //   => we would need to check that we are moving it at a cheaper place and
4297   //      we do not do that for now.
4298   BasicBlock *Parent = Inst->getParent();
4299   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
4300   VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
4301   // If the transition has more than one use, assume this is not going to be
4302   // beneficial.
4303   while (Inst->hasOneUse()) {
4304     Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
4305     DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
4306
4307     if (ToBePromoted->getParent() != Parent) {
4308       DEBUG(dbgs() << "Instruction to promote is in a different block ("
4309                    << ToBePromoted->getParent()->getName()
4310                    << ") than the transition (" << Parent->getName() << ").\n");
4311       return false;
4312     }
4313
4314     if (VPH.canCombine(ToBePromoted)) {
4315       DEBUG(dbgs() << "Assume " << *Inst << '\n'
4316                    << "will be combined with: " << *ToBePromoted << '\n');
4317       VPH.recordCombineInstruction(ToBePromoted);
4318       bool Changed = VPH.promote();
4319       NumStoreExtractExposed += Changed;
4320       return Changed;
4321     }
4322
4323     DEBUG(dbgs() << "Try promoting.\n");
4324     if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
4325       return false;
4326
4327     DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
4328
4329     VPH.enqueueForPromotion(ToBePromoted);
4330     Inst = ToBePromoted;
4331   }
4332   return false;
4333 }
4334
4335 bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
4336   if (PHINode *P = dyn_cast<PHINode>(I)) {
4337     // It is possible for very late stage optimizations (such as SimplifyCFG)
4338     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
4339     // trivial PHI, go ahead and zap it here.
4340     const DataLayout &DL = I->getModule()->getDataLayout();
4341     if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
4342       P->replaceAllUsesWith(V);
4343       P->eraseFromParent();
4344       ++NumPHIsElim;
4345       return true;
4346     }
4347     return false;
4348   }
4349
4350   if (CastInst *CI = dyn_cast<CastInst>(I)) {
4351     // If the source of the cast is a constant, then this should have
4352     // already been constant folded.  The only reason NOT to constant fold
4353     // it is if something (e.g. LSR) was careful to place the constant
4354     // evaluation in a block other than then one that uses it (e.g. to hoist
4355     // the address of globals out of a loop).  If this is the case, we don't
4356     // want to forward-subst the cast.
4357     if (isa<Constant>(CI->getOperand(0)))
4358       return false;
4359
4360     if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
4361       return true;
4362
4363     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
4364       /// Sink a zext or sext into its user blocks if the target type doesn't
4365       /// fit in one register
4366       if (TLI && TLI->getTypeAction(CI->getContext(),
4367                                     TLI->getValueType(CI->getType())) ==
4368                      TargetLowering::TypeExpandInteger) {
4369         return SinkCast(CI);
4370       } else {
4371         bool MadeChange = MoveExtToFormExtLoad(I);
4372         return MadeChange | OptimizeExtUses(I);
4373       }
4374     }
4375     return false;
4376   }
4377
4378   if (CmpInst *CI = dyn_cast<CmpInst>(I))
4379     if (!TLI || !TLI->hasMultipleConditionRegisters())
4380       return OptimizeCmpExpression(CI);
4381
4382   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
4383     if (TLI)
4384       return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
4385     return false;
4386   }
4387
4388   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
4389     if (TLI)
4390       return OptimizeMemoryInst(I, SI->getOperand(1),
4391                                 SI->getOperand(0)->getType());
4392     return false;
4393   }
4394
4395   BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
4396
4397   if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
4398                 BinOp->getOpcode() == Instruction::LShr)) {
4399     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
4400     if (TLI && CI && TLI->hasExtractBitsInsn())
4401       return OptimizeExtractBits(BinOp, CI, *TLI);
4402
4403     return false;
4404   }
4405
4406   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
4407     if (GEPI->hasAllZeroIndices()) {
4408       /// The GEP operand must be a pointer, so must its result -> BitCast
4409       Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
4410                                         GEPI->getName(), GEPI);
4411       GEPI->replaceAllUsesWith(NC);
4412       GEPI->eraseFromParent();
4413       ++NumGEPsElim;
4414       OptimizeInst(NC, ModifiedDT);
4415       return true;
4416     }
4417     return false;
4418   }
4419
4420   if (CallInst *CI = dyn_cast<CallInst>(I))
4421     return OptimizeCallInst(CI, ModifiedDT);
4422
4423   if (SelectInst *SI = dyn_cast<SelectInst>(I))
4424     return OptimizeSelectInst(SI);
4425
4426   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
4427     return OptimizeShuffleVectorInst(SVI);
4428
4429   if (isa<ExtractElementInst>(I))
4430     return OptimizeExtractElementInst(I);
4431
4432   return false;
4433 }
4434
4435 // In this pass we look for GEP and cast instructions that are used
4436 // across basic blocks and rewrite them to improve basic-block-at-a-time
4437 // selection.
4438 bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
4439   SunkAddrs.clear();
4440   bool MadeChange = false;
4441
4442   CurInstIterator = BB.begin();
4443   while (CurInstIterator != BB.end()) {
4444     MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
4445     if (ModifiedDT)
4446       return true;
4447   }
4448   MadeChange |= DupRetToEnableTailCallOpts(&BB);
4449
4450   return MadeChange;
4451 }
4452
4453 // llvm.dbg.value is far away from the value then iSel may not be able
4454 // handle it properly. iSel will drop llvm.dbg.value if it can not
4455 // find a node corresponding to the value.
4456 bool CodeGenPrepare::PlaceDbgValues(Function &F) {
4457   bool MadeChange = false;
4458   for (BasicBlock &BB : F) {
4459     Instruction *PrevNonDbgInst = nullptr;
4460     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
4461       Instruction *Insn = BI++;
4462       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
4463       // Leave dbg.values that refer to an alloca alone. These
4464       // instrinsics describe the address of a variable (= the alloca)
4465       // being taken.  They should not be moved next to the alloca
4466       // (and to the beginning of the scope), but rather stay close to
4467       // where said address is used.
4468       if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
4469         PrevNonDbgInst = Insn;
4470         continue;
4471       }
4472
4473       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
4474       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
4475         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
4476         DVI->removeFromParent();
4477         if (isa<PHINode>(VI))
4478           DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
4479         else
4480           DVI->insertAfter(VI);
4481         MadeChange = true;
4482         ++NumDbgValueMoved;
4483       }
4484     }
4485   }
4486   return MadeChange;
4487 }
4488
4489 // If there is a sequence that branches based on comparing a single bit
4490 // against zero that can be combined into a single instruction, and the
4491 // target supports folding these into a single instruction, sink the
4492 // mask and compare into the branch uses. Do this before OptimizeBlock ->
4493 // OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
4494 // searched for.
4495 bool CodeGenPrepare::sinkAndCmp(Function &F) {
4496   if (!EnableAndCmpSinking)
4497     return false;
4498   if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
4499     return false;
4500   bool MadeChange = false;
4501   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
4502     BasicBlock *BB = I++;
4503
4504     // Does this BB end with the following?
4505     //   %andVal = and %val, #single-bit-set
4506     //   %icmpVal = icmp %andResult, 0
4507     //   br i1 %cmpVal label %dest1, label %dest2"
4508     BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
4509     if (!Brcc || !Brcc->isConditional())
4510       continue;
4511     ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
4512     if (!Cmp || Cmp->getParent() != BB)
4513       continue;
4514     ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
4515     if (!Zero || !Zero->isZero())
4516       continue;
4517     Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
4518     if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
4519       continue;
4520     ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
4521     if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
4522       continue;
4523     DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
4524
4525     // Push the "and; icmp" for any users that are conditional branches.
4526     // Since there can only be one branch use per BB, we don't need to keep
4527     // track of which BBs we insert into.
4528     for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
4529          UI != E; ) {
4530       Use &TheUse = *UI;
4531       // Find brcc use.
4532       BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
4533       ++UI;
4534       if (!BrccUser || !BrccUser->isConditional())
4535         continue;
4536       BasicBlock *UserBB = BrccUser->getParent();
4537       if (UserBB == BB) continue;
4538       DEBUG(dbgs() << "found Brcc use\n");
4539
4540       // Sink the "and; icmp" to use.
4541       MadeChange = true;
4542       BinaryOperator *NewAnd =
4543         BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
4544                                   BrccUser);
4545       CmpInst *NewCmp =
4546         CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
4547                         "", BrccUser);
4548       TheUse = NewCmp;
4549       ++NumAndCmpsMoved;
4550       DEBUG(BrccUser->getParent()->dump());
4551     }
4552   }
4553   return MadeChange;
4554 }
4555
4556 /// \brief Retrieve the probabilities of a conditional branch. Returns true on
4557 /// success, or returns false if no or invalid metadata was found.
4558 static bool extractBranchMetadata(BranchInst *BI,
4559                                   uint64_t &ProbTrue, uint64_t &ProbFalse) {
4560   assert(BI->isConditional() &&
4561          "Looking for probabilities on unconditional branch?");
4562   auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
4563   if (!ProfileData || ProfileData->getNumOperands() != 3)
4564     return false;
4565
4566   const auto *CITrue =
4567       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1));
4568   const auto *CIFalse =
4569       mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2));
4570   if (!CITrue || !CIFalse)
4571     return false;
4572
4573   ProbTrue = CITrue->getValue().getZExtValue();
4574   ProbFalse = CIFalse->getValue().getZExtValue();
4575
4576   return true;
4577 }
4578
4579 /// \brief Scale down both weights to fit into uint32_t.
4580 static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
4581   uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
4582   uint32_t Scale = (NewMax / UINT32_MAX) + 1;
4583   NewTrue = NewTrue / Scale;
4584   NewFalse = NewFalse / Scale;
4585 }
4586
4587 /// \brief Some targets prefer to split a conditional branch like:
4588 /// \code
4589 ///   %0 = icmp ne i32 %a, 0
4590 ///   %1 = icmp ne i32 %b, 0
4591 ///   %or.cond = or i1 %0, %1
4592 ///   br i1 %or.cond, label %TrueBB, label %FalseBB
4593 /// \endcode
4594 /// into multiple branch instructions like:
4595 /// \code
4596 ///   bb1:
4597 ///     %0 = icmp ne i32 %a, 0
4598 ///     br i1 %0, label %TrueBB, label %bb2
4599 ///   bb2:
4600 ///     %1 = icmp ne i32 %b, 0
4601 ///     br i1 %1, label %TrueBB, label %FalseBB
4602 /// \endcode
4603 /// This usually allows instruction selection to do even further optimizations
4604 /// and combine the compare with the branch instruction. Currently this is
4605 /// applied for targets which have "cheap" jump instructions.
4606 ///
4607 /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
4608 ///
4609 bool CodeGenPrepare::splitBranchCondition(Function &F) {
4610   if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
4611     return false;
4612
4613   bool MadeChange = false;
4614   for (auto &BB : F) {
4615     // Does this BB end with the following?
4616     //   %cond1 = icmp|fcmp|binary instruction ...
4617     //   %cond2 = icmp|fcmp|binary instruction ...
4618     //   %cond.or = or|and i1 %cond1, cond2
4619     //   br i1 %cond.or label %dest1, label %dest2"
4620     BinaryOperator *LogicOp;
4621     BasicBlock *TBB, *FBB;
4622     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
4623       continue;
4624
4625     unsigned Opc;
4626     Value *Cond1, *Cond2;
4627     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
4628                              m_OneUse(m_Value(Cond2)))))
4629       Opc = Instruction::And;
4630     else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
4631                                  m_OneUse(m_Value(Cond2)))))
4632       Opc = Instruction::Or;
4633     else
4634       continue;
4635
4636     if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
4637         !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp()))   )
4638       continue;
4639
4640     DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
4641
4642     // Create a new BB.
4643     auto *InsertBefore = std::next(Function::iterator(BB))
4644         .getNodePtrUnchecked();
4645     auto TmpBB = BasicBlock::Create(BB.getContext(),
4646                                     BB.getName() + ".cond.split",
4647                                     BB.getParent(), InsertBefore);
4648
4649     // Update original basic block by using the first condition directly by the
4650     // branch instruction and removing the no longer needed and/or instruction.
4651     auto *Br1 = cast<BranchInst>(BB.getTerminator());
4652     Br1->setCondition(Cond1);
4653     LogicOp->eraseFromParent();
4654
4655     // Depending on the conditon we have to either replace the true or the false
4656     // successor of the original branch instruction.
4657     if (Opc == Instruction::And)
4658       Br1->setSuccessor(0, TmpBB);
4659     else
4660       Br1->setSuccessor(1, TmpBB);
4661
4662     // Fill in the new basic block.
4663     auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
4664     if (auto *I = dyn_cast<Instruction>(Cond2)) {
4665       I->removeFromParent();
4666       I->insertBefore(Br2);
4667     }
4668
4669     // Update PHI nodes in both successors. The original BB needs to be
4670     // replaced in one succesor's PHI nodes, because the branch comes now from
4671     // the newly generated BB (NewBB). In the other successor we need to add one
4672     // incoming edge to the PHI nodes, because both branch instructions target
4673     // now the same successor. Depending on the original branch condition
4674     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
4675     // we perfrom the correct update for the PHI nodes.
4676     // This doesn't change the successor order of the just created branch
4677     // instruction (or any other instruction).
4678     if (Opc == Instruction::Or)
4679       std::swap(TBB, FBB);
4680
4681     // Replace the old BB with the new BB.
4682     for (auto &I : *TBB) {
4683       PHINode *PN = dyn_cast<PHINode>(&I);
4684       if (!PN)
4685         break;
4686       int i;
4687       while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
4688         PN->setIncomingBlock(i, TmpBB);
4689     }
4690
4691     // Add another incoming edge form the new BB.
4692     for (auto &I : *FBB) {
4693       PHINode *PN = dyn_cast<PHINode>(&I);
4694       if (!PN)
4695         break;
4696       auto *Val = PN->getIncomingValueForBlock(&BB);
4697       PN->addIncoming(Val, TmpBB);
4698     }
4699
4700     // Update the branch weights (from SelectionDAGBuilder::
4701     // FindMergedConditions).
4702     if (Opc == Instruction::Or) {
4703       // Codegen X | Y as:
4704       // BB1:
4705       //   jmp_if_X TBB
4706       //   jmp TmpBB
4707       // TmpBB:
4708       //   jmp_if_Y TBB
4709       //   jmp FBB
4710       //
4711
4712       // We have flexibility in setting Prob for BB1 and Prob for NewBB.
4713       // The requirement is that
4714       //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
4715       //     = TrueProb for orignal BB.
4716       // Assuming the orignal weights are A and B, one choice is to set BB1's
4717       // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
4718       // assumes that
4719       //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
4720       // Another choice is to assume TrueProb for BB1 equals to TrueProb for
4721       // TmpBB, but the math is more complicated.
4722       uint64_t TrueWeight, FalseWeight;
4723       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
4724         uint64_t NewTrueWeight = TrueWeight;
4725         uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
4726         scaleWeights(NewTrueWeight, NewFalseWeight);
4727         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
4728                          .createBranchWeights(TrueWeight, FalseWeight));
4729
4730         NewTrueWeight = TrueWeight;
4731         NewFalseWeight = 2 * FalseWeight;
4732         scaleWeights(NewTrueWeight, NewFalseWeight);
4733         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
4734                          .createBranchWeights(TrueWeight, FalseWeight));
4735       }
4736     } else {
4737       // Codegen X & Y as:
4738       // BB1:
4739       //   jmp_if_X TmpBB
4740       //   jmp FBB
4741       // TmpBB:
4742       //   jmp_if_Y TBB
4743       //   jmp FBB
4744       //
4745       //  This requires creation of TmpBB after CurBB.
4746
4747       // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
4748       // The requirement is that
4749       //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
4750       //     = FalseProb for orignal BB.
4751       // Assuming the orignal weights are A and B, one choice is to set BB1's
4752       // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
4753       // assumes that
4754       //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
4755       uint64_t TrueWeight, FalseWeight;
4756       if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) {
4757         uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
4758         uint64_t NewFalseWeight = FalseWeight;
4759         scaleWeights(NewTrueWeight, NewFalseWeight);
4760         Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
4761                          .createBranchWeights(TrueWeight, FalseWeight));
4762
4763         NewTrueWeight = 2 * TrueWeight;
4764         NewFalseWeight = FalseWeight;
4765         scaleWeights(NewTrueWeight, NewFalseWeight);
4766         Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
4767                          .createBranchWeights(TrueWeight, FalseWeight));
4768       }
4769     }
4770
4771     // Note: No point in getting fancy here, since the DT info is never
4772     // available to CodeGenPrepare.
4773     ModifiedDT = true;
4774
4775     MadeChange = true;
4776
4777     DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
4778           TmpBB->dump());
4779   }
4780   return MadeChange;
4781 }