Allow min/max detection to see through casts.

[oota-llvm.git] / lib / CodeGen / CodeGenPrepare.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp

index 367cff217346c76c0dde52ad4767ac5fb7dda802..f37a2874b2565ac79ced410323b4d9ca9d0e2582 100644 (file)
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -124,7 +124,6 @@ class TypePromotionTransaction;
      const TargetLowering *TLI;
      const TargetTransformInfo *TTI;
      const TargetLibraryInfo *TLInfo;
-    DominatorTree *DT;
  
      /// CurInstIterator - As we scan instructions optimizing them, this is the
      /// next instruction to optimize.  Xforms that can invalidate this should
@@ -142,8 +141,7 @@ class TypePromotionTransaction;
      /// promotion for the current function.
      InstrToOrigTy PromotedInsts;
  
-    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
-    /// be updated.
+    /// ModifiedDT - If CFG is modified in anyway.
      bool ModifiedDT;
  
      /// OptSize - True if optimizing for size.
@@ -186,7 +184,7 @@ class TypePromotionTransaction;
      bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
                          Instruction *&Inst,
                          const SmallVectorImpl<Instruction *> &Exts,
-                        unsigned CreatedInst);
+                        unsigned CreatedInstCost);
      bool splitBranchCondition(Function &F);
      bool simplifyOffsetableRelocate(Instruction &I);
    };
@@ -214,9 +212,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      TLI = TM->getSubtargetImpl(F)->getTargetLowering();
    TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
    OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
  
    /// This optimization identifies DIV instructions that can be
@@ -255,7 +250,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
        MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
  
        // Restart BB iteration if the dominator tree of the Function was changed
-      ModifiedDT |= ModifiedDTOnIteration;
        if (ModifiedDTOnIteration)
          break;
      }
@@ -298,8 +292,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
      if (EverMadeChange || MadeChange)
        MadeChange |= EliminateFallThrough(F);
  
-    if (MadeChange)
-      ModifiedDT = true;
      EverMadeChange |= MadeChange;
    }
  
@@ -313,9 +305,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
        EverMadeChange |= simplifyOffsetableRelocate(*I);
    }
  
-  if (ModifiedDT && DT)
-    DT->recalculate(F);
-
    return EverMadeChange;
  }
  
@@ -341,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
        // Remember if SinglePred was the entry block of the function.
        // If so, we will need to move BB back to the entry position.
        bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(BB, DT);
+      MergeBasicBlockIntoOnlyPred(BB, nullptr);
  
        if (isEntry && BB != &BB->getParent()->getEntryBlock())
          BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -481,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
        // Remember if SinglePred was the entry block of the function.  If so, we
        // will need to move BB back to the entry position.
        bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
-      MergeBasicBlockIntoOnlyPred(DestBB, DT);
+      MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
  
        if (isEntry && BB != &BB->getParent()->getEntryBlock())
          BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -523,13 +512,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
    // The PHIs are now updated, change everything that refers to BB to use
    // DestBB and remove BB.
    BB->replaceAllUsesWith(DestBB);
-  if (DT && !ModifiedDT) {
-    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
-    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
-    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
-    DT->changeImmediateDominator(DestBB, NewIDom);
-    DT->eraseNode(BB);
-  }
    BB->eraseFromParent();
    ++NumBlocksElim;
  
@@ -549,8 +531,8 @@ static void computeBaseDerivedRelocateMap(
    for (auto &U : AllRelocateCalls) {
      GCRelocateOperands ThisRelocate(U);
      IntrinsicInst *I = cast<IntrinsicInst>(U);
-    auto K = std::make_pair(ThisRelocate.basePtrIndex(),
-                            ThisRelocate.derivedPtrIndex());
+    auto K = std::make_pair(ThisRelocate.getBasePtrIndex(),
+                            ThisRelocate.getDerivedPtrIndex());
      RelocateIdxMap.insert(std::make_pair(K, I));
    }
    for (auto &Item : RelocateIdxMap) {
@@ -599,15 +581,15 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
      GCRelocateOperands MasterRelocate(RelocatedBase);
      GCRelocateOperands ThisRelocate(ToReplace);
  
-    assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
+    assert(ThisRelocate.getBasePtrIndex() == MasterRelocate.getBasePtrIndex() &&
             "Not relocating a derived object of the original base object");
-    if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
+    if (ThisRelocate.getBasePtrIndex() == ThisRelocate.getDerivedPtrIndex()) {
        // A duplicate relocate call. TODO: coalesce duplicates.
        continue;
      }
  
-    Value *Base = ThisRelocate.basePtr();
-    auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
+    Value *Base = ThisRelocate.getBasePtr();
+    auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
      if (!Derived || Derived->getPointerOperand() != Base)
        continue;
  
@@ -616,15 +598,50 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
        continue;
  
      // Create a Builder and replace the target callsite with a gep
-    IRBuilder<> Builder(ToReplace);
+    assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator");
+
+    // Insert after RelocatedBase
+    IRBuilder<> Builder(RelocatedBase->getNextNode());
      Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
-    Value *Replacement =
-        Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
+
+    // If gc_relocate does not match the actual type, cast it to the right type.
+    // In theory, there must be a bitcast after gc_relocate if the type does not
+    // match, and we should reuse it to get the derived pointer. But it could be
+    // cases like this:
+    // bb1:
+    //  ...
+    //  %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // bb2:
+    //  ...
+    //  %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+    //  br label %merge
+    //
+    // merge:
+    //  %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+    //  %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+    //
+    // In this case, we can not find the bitcast any more. So we insert a new bitcast
+    // no matter there is already one or not. In this way, we can handle all cases, and
+    // the extra bitcast should be optimized away in later passes.
+    Instruction *ActualRelocatedBase = RelocatedBase;
+    if (RelocatedBase->getType() != Base->getType()) {
+      ActualRelocatedBase =
+          cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+    }
+    Value *Replacement = Builder.CreateGEP(
+        Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
      Instruction *ReplacementInst = cast<Instruction>(Replacement);
-    ReplacementInst->removeFromParent();
-    ReplacementInst->insertAfter(RelocatedBase);
      Replacement->takeName(ToReplace);
-    ToReplace->replaceAllUsesWith(Replacement);
+    // If the newly generated derived pointer's type does not match the original derived
+    // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+    Instruction *ActualReplacement = ReplacementInst;
+    if (ReplacementInst->getType() != ToReplace->getType()) {
+      ActualReplacement =
+          cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+    }
+    ToReplace->replaceAllUsesWith(ActualReplacement);
      ToReplace->eraseFromParent();
  
      MadeChange = true;
@@ -711,11 +728,11 @@ static bool SinkCast(CastInst *CI) {
        InsertedCast =
          CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                           InsertPt);
-      MadeChange = true;
      }
  
      // Replace a use of the cast with a use of the new cast.
      TheUse = InsertedCast;
+    MadeChange = true;
      ++NumCastUses;
    }
  
@@ -765,13 +782,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
    return SinkCast(CI);
  }
  
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
  /// the number of virtual registers that must be created and coalesced.  This is
  /// a clear win except on targets with multiple condition code registers
  ///  (PowerPC), where it might lose; some adjustment may be wanted there.
  ///
  /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
    BasicBlock *DefBB = CI->getParent();
  
    /// InsertedCmp - Only insert a cmp in each block once.
@@ -805,21 +869,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
          CmpInst::Create(CI->getOpcode(),
                          CI->getPredicate(),  CI->getOperand(0),
                          CI->getOperand(1), "", InsertPt);
-      MadeChange = true;
      }
  
      // Replace a use of the cmp with a use of the new cmp.
      TheUse = InsertedCmp;
+    MadeChange = true;
      ++NumCmpUses;
    }
  
    // If we removed all uses, nuke the cmp.
-  if (CI->use_empty())
+  if (CI->use_empty()) {
      CI->eraseFromParent();
+    MadeChange = true;
+  }
  
    return MadeChange;
  }
  
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
  /// isExtractBitsCandidateUse - Check if the candidates could
  /// be combined with shift instruction, which includes:
  /// 1. Truncate instruction
@@ -1099,8 +1175,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
      //
      CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
      Builder.SetInsertPoint(InsertPt);
-    
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
      LoadInst* Load = Builder.CreateLoad(Gep, false);
      VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
  
@@ -1194,7 +1271,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
      Builder.SetInsertPoint(InsertPt);
      
      Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
      Builder.CreateStore(OneElt, Gep);
  
      // Create "else" block, fill it in the next iteration
@@ -1228,6 +1306,54 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
        return true;
    }
  
+  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+  // Align the pointer arguments to this call if the target thinks it's a good
+  // idea
+  unsigned MinSize, PrefAlign;
+  if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+    for (auto &Arg : CI->arg_operands()) {
+      // We want to align both objects whose address is used directly and
+      // objects whose address is used in casts and GEPs, though it only makes
+      // sense for GEPs if the offset is a multiple of the desired alignment and
+      // if size - offset meets the size threshold.
+      if (!Arg->getType()->isPointerTy())
+        continue;
+      APInt Offset(TD->getPointerSizeInBits(
+                     cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+      Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+      uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
+      AllocaInst *AI;
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
+          AI->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+        AI->setAlignment(PrefAlign);
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
+    }
+    // If this is a memcpy (or similar) then we may be able to improve the
+    // alignment
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+      unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+      if (Align > MI->getAlignment())
+        MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+    }
+  }
+
    IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
    if (II) {
      switch (II->getIntrinsicID()) {
@@ -1244,7 +1370,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
        WeakVH IterHandle(CurInstIterator);
  
        replaceAndRecursivelySimplify(CI, RetVal,
-                                    TLInfo, ModifiedDT ? nullptr : DT);
+                                    TLInfo, nullptr);
  
        // If the iterator instruction was recursively deleted, start over at the
        // start of the block.
@@ -1823,7 +1949,7 @@ class TypePromotionTransaction {
        Inst->removeFromParent();
      }
  
-    ~InstructionRemover() { delete Replacer; }
+    ~InstructionRemover() override { delete Replacer; }
  
      /// \brief Really remove the instruction.
      void commit() override { delete Inst; }
@@ -2023,7 +2149,7 @@ private:
                                              ExtAddrMode &AMBefore,
                                              ExtAddrMode &AMAfter);
    bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-  bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+  bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
                               Value *PromotedOperand) const;
  };
  
@@ -2157,7 +2283,7 @@ class TypePromotionHelper {
    /// \brief Utility function to promote the operand of \p Ext when this
    /// operand is a promotable trunc or sext or zext.
    /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all instructions
    /// created to promote the operand of Ext.
    /// Newly added extensions are inserted in \p Exts.
    /// Newly added truncates are inserted in \p Truncs.
@@ -2165,53 +2291,55 @@ class TypePromotionHelper {
    /// \return The promoted value which is used instead of Ext.
    static Value *promoteOperandForTruncAndAnyExt(
        Instruction *Ext, TypePromotionTransaction &TPT,
-      InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
        SmallVectorImpl<Instruction *> *Exts,
-      SmallVectorImpl<Instruction *> *Truncs);
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
  
    /// \brief Utility function to promote the operand of \p Ext when this
    /// operand is promotable and is not a supported trunc or sext.
    /// \p PromotedInsts maps the instructions to their type before promotion.
-  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// \p CreatedInstsCost[out] contains the cost of all the instructions
    /// created to promote the operand of Ext.
    /// Newly added extensions are inserted in \p Exts.
    /// Newly added truncates are inserted in \p Truncs.
    /// Should never be called directly.
    /// \return The promoted value which is used instead of Ext.
-  static Value *
-  promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                         InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
-                         SmallVectorImpl<Instruction *> *Exts,
-                         SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+  static Value *promoteOperandForOther(Instruction *Ext,
+                                       TypePromotionTransaction &TPT,
+                                       InstrToOrigTy &PromotedInsts,
+                                       unsigned &CreatedInstsCost,
+                                       SmallVectorImpl<Instruction *> *Exts,
+                                       SmallVectorImpl<Instruction *> *Truncs,
+                                       const TargetLowering &TLI, bool IsSExt);
  
    /// \see promoteOperandForOther.
-  static Value *
-  signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, true);
+  static Value *signExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, true);
    }
  
    /// \see promoteOperandForOther.
-  static Value *
-  zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
-                            InstrToOrigTy &PromotedInsts,
-                            unsigned &CreatedInsts,
-                            SmallVectorImpl<Instruction *> *Exts,
-                            SmallVectorImpl<Instruction *> *Truncs) {
-    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
-                                  Truncs, false);
+  static Value *zeroExtendOperandForOther(
+      Instruction *Ext, TypePromotionTransaction &TPT,
+      InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+      SmallVectorImpl<Instruction *> *Exts,
+      SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+    return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+                                  Exts, Truncs, TLI, false);
    }
  
  public:
    /// Type for the utility function that promotes the operand of Ext.
    typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
-                           InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+                           InstrToOrigTy &PromotedInsts,
+                           unsigned &CreatedInstsCost,
                             SmallVectorImpl<Instruction *> *Exts,
-                           SmallVectorImpl<Instruction *> *Truncs);
+                           SmallVectorImpl<Instruction *> *Truncs,
+                           const TargetLowering &TLI);
    /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
    /// action to promote the operand of \p Ext instead of using Ext.
    /// \return NULL if no promotable action is possible with the current
@@ -2328,16 +2456,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
  
  Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
      llvm::Instruction *SExt, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
      SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
    // By construction, the operand of SExt is an instruction. Otherwise we cannot
    // get through it and this method should not be called.
    Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
    Value *ExtVal = SExt;
+  bool HasMergedNonFreeExt = false;
    if (isa<ZExtInst>(SExtOpnd)) {
      // Replace s|zext(zext(opnd))
      // => zext(opnd).
+    HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
      Value *ZExt =
          TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
      TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2348,7 +2478,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
      // => z|sext(opnd).
      TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
    }
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
  
    // Remove dead code.
    if (SExtOpnd->use_empty())
@@ -2357,8 +2487,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
    // Check if the extension is still needed.
    Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
    if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
-    if (ExtInst && Exts)
-      Exts->push_back(ExtInst);
+    if (ExtInst) {
+      if (Exts)
+        Exts->push_back(ExtInst);
+      CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+    }
      return ExtVal;
    }
  
@@ -2371,13 +2504,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
  
  Value *TypePromotionHelper::promoteOperandForOther(
      Instruction *Ext, TypePromotionTransaction &TPT,
-    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
      SmallVectorImpl<Instruction *> *Exts,
-    SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+    SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+    bool IsSExt) {
    // By construction, the operand of Ext is an instruction. Otherwise we cannot
    // get through it and this method should not be called.
    Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
-  CreatedInsts = 0;
+  CreatedInstsCost = 0;
    if (!ExtOpnd->hasOneUse()) {
      // ExtOpnd will be promoted.
      // All its uses, but Ext, will need to use a truncated value of the
@@ -2452,7 +2586,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
          continue;
        }
        ExtForOpnd = cast<Instruction>(ValForExtOpnd);
-      ++CreatedInsts;
      }
      if (Exts)
        Exts->push_back(ExtForOpnd);
@@ -2461,6 +2594,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
      // Move the sign extension before the insertion point.
      TPT.moveBefore(ExtForOpnd, ExtOpnd);
      TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+    CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
      // If more sext are required, new instructions will have to be created.
      ExtForOpnd = nullptr;
    }
@@ -2473,22 +2607,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
  
  /// IsPromotionProfitable - Check whether or not promoting an instruction
  /// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
  /// \p PromotedOperand is the value that has been promoted.
  /// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
-                                             unsigned SizeWithPromotion,
-                                             Value *PromotedOperand) const {
-  // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+    unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+  DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+  // The cost of the new extensions is greater than the cost of the
+  // old extension plus what we folded.
    // This is not profitable.
-  if (MatchedSize < SizeWithPromotion)
+  if (NewCost > OldCost)
      return false;
-  if (MatchedSize > SizeWithPromotion)
+  if (NewCost < OldCost)
      return true;
    // The promotion is neutral but it may help folding the sign extension in
    // loads for instance.
@@ -2686,9 +2820,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
  
      TypePromotionTransaction::ConstRestorationPt LastKnownGood =
          TPT.getRestorationPoint();
-    unsigned CreatedInsts = 0;
+    unsigned CreatedInstsCost = 0;
+    unsigned ExtCost = !TLI.isExtFree(Ext);
      Value *PromotedOperand =
-        TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+        TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
      // SExt has been moved away.
      // Thus either it will be rematched later in the recursive calls or it is
      // gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2710,7 +2845,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
      unsigned OldSize = AddrModeInsts.size();
  
      if (!MatchAddr(PromotedOperand, Depth) ||
-        !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+        // The total of the new cost is equals to the cost of the created
+        // instructions.
+        // The total of the old cost is equals to the cost of the extension plus
+        // what we have saved in the addressing mode.
+        !IsPromotionProfitable(CreatedInstsCost,
+                               ExtCost + (AddrModeInsts.size() - OldSize),
                                 PromotedOperand)) {
        AddrMode = BackupAddrMode;
        AddrModeInsts.resize(OldSize);
@@ -3064,8 +3204,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
  
      // For a PHI node, push all of its incoming values.
      if (PHINode *P = dyn_cast<PHINode>(V)) {
-      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
-        worklist.push_back(P->getIncomingValue(i));
+      for (Value *IncValue : P->incoming_values())
+        worklist.push_back(IncValue);
        continue;
      }
  
@@ -3201,7 +3341,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
        return false;
      } else {
        Type *I8PtrTy =
-        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+          Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
  
        // Start with the base register. Do this first so that subsequent address
        // matching finds it last, which will prevent it from trying to match it
@@ -3253,7 +3394,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
            // SDAG consecutive load/store merging.
            if (ResultPtr->getType() != I8PtrTy)
              ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
          }
  
          ResultIndex = V;
@@ -3264,7 +3405,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
        } else {
          if (ResultPtr->getType() != I8PtrTy)
            ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
        }
  
        if (SunkAddr->getType() != Addr->getType())
@@ -3470,7 +3611,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
  bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
                                      LoadInst *&LI, Instruction *&Inst,
                                      const SmallVectorImpl<Instruction *> &Exts,
-                                    unsigned CreatedInsts = 0) {
+                                    unsigned CreatedInstsCost = 0) {
    // Iterate over all the extensions to see if one form an ext(load).
    for (auto I : Exts) {
      // Check if we directly have ext(load).
@@ -3492,10 +3633,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
      TypePromotionTransaction::ConstRestorationPt LastKnownGood =
          TPT.getRestorationPoint();
      SmallVector<Instruction *, 4> NewExts;
-    unsigned NewCreatedInsts = 0;
+    unsigned NewCreatedInstsCost = 0;
+    unsigned ExtCost = !TLI->isExtFree(I);
      // Promote.
-    Value *PromotedVal =
-        TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+    Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+                             &NewExts, nullptr, *TLI);
      assert(PromotedVal &&
             "TypePromotionHelper should have filtered out those cases");
  
@@ -3505,9 +3647,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
      // With exactly 2, the transformation is neutral, because we will merge
      // one extension but leave one. However, we optimistically keep going,
      // because the new extension may be removed too.
-    unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+    long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+    TotalCreatedInstsCost -= ExtCost;
      if (!StressExtLdPromotion &&
-        (TotalCreatedInsts > 1 ||
+        (TotalCreatedInstsCost > 1 ||
           !isPromotedInstructionLegal(*TLI, PromotedVal))) {
        // The promotion is not profitable, rollback to the previous state.
        TPT.rollback(LastKnownGood);
@@ -3515,8 +3658,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
      }
      // The promotion is profitable.
      // Check if it exposes an ext(load).
-    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
-    if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+    if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
                 // If we have created a new extension, i.e., now we have two
                 // extensions. We must make sure one of them is merged with
                 // the load, otherwise we may degrade the code quality.
@@ -4192,7 +4335,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
      // to introduce PHI nodes too late to be cleaned up.  If we detect such a
      // trivial PHI, go ahead and zap it here.
      const DataLayout &DL = I->getModule()->getDataLayout();
-    if (Value *V = SimplifyInstruction(P, DL, TLInfo, DT)) {
+    if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
        P->replaceAllUsesWith(V);
        P->eraseFromParent();
        ++NumPHIsElim;
@@ -4622,10 +4765,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
        }
      }
  
-    // Request DOM Tree update.
      // Note: No point in getting fancy here, since the DT info is never
-    // available to CodeGenPrepare and the existing update code is broken
-    // anyways.
+    // available to CodeGenPrepare.
      ModifiedDT = true;
  
      MadeChange = true;