Feeding isSafeToSpeculativelyExecute its DataLayout pointer

[oota-llvm.git] / lib / Transforms / InstCombine / InstructionCombining.cpp
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp

index 0cab81b1bbe3ae1aa5bc6ff790c21c2d90156ee9..46e3bfc7e44e07a74565d3a986a9238d2bb949f9 100644 (file)
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,7 +33,6 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "instcombine"
  #include "llvm/Transforms/Scalar.h"
  #include "InstCombine.h"
  #include "llvm-c/Initialization.h"
@@ -43,6 +42,7 @@
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/IR/CFG.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -58,6 +58,8 @@
  using namespace llvm;
  using namespace llvm::PatternMatch;
  
+#define DEBUG_TYPE "instcombine"
+
  STATISTIC(NumCombined , "Number of insts combined");
  STATISTIC(NumConstProp, "Number of constant folds");
  STATISTIC(NumDeadInst , "Number of dead inst eliminated");
@@ -394,6 +396,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
    return false;
  }
  
+/// This function returns identity value for given opcode, which can be used to
+/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
+static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+  if (isa<Constant>(V))
+    return nullptr;
+
+  if (OpCode == Instruction::Mul)
+    return ConstantInt::get(V->getType(), 1);
+
+  // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
+
+  return nullptr;
+}
+
+/// This function factors binary ops which can be combined using distributive
+/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4).
+static Instruction::BinaryOps
+getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) {
+  if (!Op)
+    return Instruction::BinaryOpsEnd;
+
+  if (Op->getOpcode() == Instruction::Shl) {
+    if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+      // The multiplier is really 1 << CST.
+      RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+      LHS = Op->getOperand(0);
+      return Instruction::Mul;
+    }
+  }
+
+  // TODO: We can add other conversions e.g. shr => div etc.
+
+  LHS = Op->getOperand(0);
+  RHS = Op->getOperand(1);
+  return Op->getOpcode();
+}
+
+/// This tries to simplify binary operations by factorizing out common terms
+/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
+                               const DataLayout *DL, BinaryOperator &I,
+                               Instruction::BinaryOps InnerOpcode, Value *A,
+                               Value *B, Value *C, Value *D) {
+
+  // If any of A, B, C, D are null, we can not factor I, return early.
+  // Checking A and C should be enough.
+  if (!A || !C || !B || !D)
+    return nullptr;
+
+  Value *SimplifiedInst = nullptr;
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
+
+  // Does "X op' Y" always equal "Y op' X"?
+  bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+  // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+  if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+    // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+    // commutative case, "(A op' B) op (C op' A)"?
+    if (A == C || (InnerCommutative && A == D)) {
+      if (A != C)
+        std::swap(C, D);
+      // Consider forming "A op' (B op D)".
+      // If "B op D" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+      // If "B op D" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+      }
+    }
+
+  // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+  if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+    // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+    // commutative case, "(A op' B) op (B op' D)"?
+    if (B == D || (InnerCommutative && B == C)) {
+      if (B != D)
+        std::swap(C, D);
+      // Consider forming "(A op C) op' B".
+      // If "A op C" simplifies then it can be formed with no cost.
+      Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+
+      // If "A op C" doesn't simplify then only go on if both of the existing
+      // operations "A op' B" and "C op' D" will be zapped as no longer used.
+      if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+        V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+      if (V) {
+        SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+      }
+    }
+
+  if (SimplifiedInst) {
+    ++NumFactor;
+    SimplifiedInst->takeName(&I);
+
+    // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
+    // TODO: Check for NUW.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
+      if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
+        bool HasNSW = false;
+        if (isa<OverflowingBinaryOperator>(&I))
+          HasNSW = I.hasNoSignedWrap();
+
+        if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+          if (isa<OverflowingBinaryOperator>(Op0))
+            HasNSW &= Op0->hasNoSignedWrap();
+
+        if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+          if (isa<OverflowingBinaryOperator>(Op1))
+            HasNSW &= Op1->hasNoSignedWrap();
+        BO->setHasNoSignedWrap(HasNSW);
+      }
+    }
+  }
+  return SimplifiedInst;
+}
+
  /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
  /// which some other binary operation distributes over either by factorizing
  /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
@@ -403,65 +526,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
    BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
    BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
-  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
  
    // Factorization.
-  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
-    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
-    // a common term.
-    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
-    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
-    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+  Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B);
+  Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D);
+
+  // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+  // a common term.
+  if (LHSOpcode == RHSOpcode) {
+    if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+      return V;
+  }
  
-    // Does "X op' Y" always equal "Y op' X"?
-    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
-
-    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
-    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
-      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
-      // commutative case, "(A op' B) op (C op' A)"?
-      if (A == C || (InnerCommutative && A == D)) {
-        if (A != C)
-          std::swap(C, D);
-        // Consider forming "A op' (B op D)".
-        // If "B op D" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
-        // If "B op D" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, A, V);
-          V->takeName(&I);
-          return V;
-        }
-      }
+  // The instruction has the form "(A op' B) op (C)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+                                  getIdentityValue(LHSOpcode, RHS)))
+    return V;
  
-    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
-    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
-      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
-      // commutative case, "(A op' B) op (B op' D)"?
-      if (B == D || (InnerCommutative && B == C)) {
-        if (B != D)
-          std::swap(C, D);
-        // Consider forming "(A op C) op' B".
-        // If "A op C" simplifies then it can be formed with no cost.
-        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
-        // If "A op C" doesn't simplify then only go on if both of the existing
-        // operations "A op' B" and "C op' D" will be zapped as no longer used.
-        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
-          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
-        if (V) {
-          ++NumFactor;
-          V = Builder->CreateBinOp(InnerOpcode, V, B);
-          V->takeName(&I);
-          return V;
-        }
-      }
-  }
+  // The instruction has the form "(B) op (C op' D)".  Try to factorize common
+  // term.
+  if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
+                                  getIdentityValue(RHSOpcode, LHS), C, D))
+    return V;
  
    // Expansion.
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
    if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
      // The instruction has the form "(A op' B) op C".  See if expanding it out
      // to "(A op C) op' (B op C)" results in simplifications.
@@ -512,7 +603,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
        }
    }
  
-  return 0;
+  return nullptr;
  }
  
  // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -530,7 +621,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
      if (C->getType()->getElementType()->isIntegerTy())
        return ConstantExpr::getNeg(C);
  
-  return 0;
+  return nullptr;
  }
  
  // dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
@@ -549,7 +640,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
      if (C->getType()->getElementType()->isFloatingPointTy())
        return ConstantExpr::getFNeg(C);
  
-  return 0;
+  return nullptr;
  }
  
  static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
@@ -595,13 +686,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
  // not have a second operand.
  Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
    // Don't modify shared select instructions
-  if (!SI->hasOneUse()) return 0;
+  if (!SI->hasOneUse()) return nullptr;
    Value *TV = SI->getOperand(1);
    Value *FV = SI->getOperand(2);
  
    if (isa<Constant>(TV) || isa<Constant>(FV)) {
      // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType()->isIntegerTy(1)) return 0;
+    if (SI->getType()->isIntegerTy(1)) return nullptr;
  
      // If it's a bitcast involving vectors, make sure it has the same number of
      // elements on both sides.
@@ -610,10 +701,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
        VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
  
        // Verify that either both or neither are vectors.
-      if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+      if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr;
        // If vectors, verify that they have the same number of elements.
        if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
-        return 0;
+        return nullptr;
      }
  
      Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
@@ -622,7 +713,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
      return SelectInst::Create(SI->getCondition(),
                                SelectTrueVal, SelectFalseVal);
    }
-  return 0;
+  return nullptr;
  }
  
  
@@ -634,7 +725,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    PHINode *PN = cast<PHINode>(I.getOperand(0));
    unsigned NumPHIValues = PN->getNumIncomingValues();
    if (NumPHIValues == 0)
-    return 0;
+    return nullptr;
  
    // We normally only transform phis with a single use.  However, if a PHI has
    // multiple uses and they are all the same operation, we can fold *all* of the
@@ -644,7 +735,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
      for (User *U : PN->users()) {
        Instruction *UI = cast<Instruction>(U);
        if (UI != &I && !I.isIdenticalTo(UI))
-        return 0;
+        return nullptr;
      }
      // Otherwise, we can replace *all* users with the new PHI we form.
    }
@@ -654,14 +745,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    // remember the BB it is in.  If there is more than one or if *it* is a PHI,
    // bail out.  We don't do arbitrary constant expressions here because moving
    // their computation can be expensive without a cost model.
-  BasicBlock *NonConstBB = 0;
+  BasicBlock *NonConstBB = nullptr;
    for (unsigned i = 0; i != NumPHIValues; ++i) {
      Value *InVal = PN->getIncomingValue(i);
      if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
        continue;
  
-    if (isa<PHINode>(InVal)) return 0;  // Itself a phi.
-    if (NonConstBB) return 0;  // More than one non-const value.
+    if (isa<PHINode>(InVal)) return nullptr;  // Itself a phi.
+    if (NonConstBB) return nullptr;  // More than one non-const value.
  
      NonConstBB = PN->getIncomingBlock(i);
  
@@ -669,22 +760,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
      // insert a computation after it without breaking the edge.
      if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
        if (II->getParent() == NonConstBB)
-        return 0;
+        return nullptr;
  
      // If the incoming non-constant value is in I's block, we will remove one
      // instruction, but insert another equivalent one, leading to infinite
      // instcombine.
      if (NonConstBB == I.getParent())
-      return 0;
+      return nullptr;
    }
  
    // If there is exactly one non-constant value, we can insert a copy of the
    // operation in that block.  However, if this is a critical edge, we would be
    // inserting the computation one some other paths (e.g. inside a loop).  Only
    // do this if the pred block is unconditionally branching into the phi block.
-  if (NonConstBB != 0) {
+  if (NonConstBB != nullptr) {
      BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
-    if (!BI || !BI->isUnconditional()) return 0;
+    if (!BI || !BI->isUnconditional()) return nullptr;
    }
  
    // Okay, we can do the transformation: create the new PHI node.
@@ -708,7 +799,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
        BasicBlock *ThisBB = PN->getIncomingBlock(i);
        Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
        Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
-      Value *InV = 0;
+      Value *InV = nullptr;
        // Beware of ConstantExpr:  it may eventually evaluate to getNullValue,
        // even if currently isNullValue gives false.
        Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
@@ -722,7 +813,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV = 0;
+      Value *InV = nullptr;
        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
        else if (isa<ICmpInst>(CI))
@@ -736,7 +827,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    } else if (I.getNumOperands() == 2) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV = 0;
+      Value *InV = nullptr;
        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
          InV = ConstantExpr::get(I.getOpcode(), InC, C);
        else
@@ -776,11 +867,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
    assert(PtrTy->isPtrOrPtrVectorTy());
  
    if (!DL)
-    return 0;
+    return nullptr;
  
    Type *Ty = PtrTy->getPointerElementType();
    if (!Ty->isSized())
-    return 0;
+    return nullptr;
  
    // Start with the index over the outer type.  Note that the type size
    // might be zero (even if the offset isn't zero) if the indexed type
@@ -806,7 +897,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
    while (Offset) {
      // Indexing into tail padding between struct/array elements.
      if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
-      return 0;
+      return nullptr;
  
      if (StructType *STy = dyn_cast<StructType>(Ty)) {
        const StructLayout *SL = DL->getStructLayout(STy);
@@ -827,7 +918,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
        Ty = AT->getElementType();
      } else {
        // Otherwise, we can't index into the middle of this atomic type, bail.
-      return 0;
+      return nullptr;
      }
    }
  
@@ -859,7 +950,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
  
    // If Scale is zero then it does not divide Val.
    if (Scale.isMinValue())
-    return 0;
+    return nullptr;
  
    // Look through chains of multiplications, searching for a constant that is
    // divisible by Scale.  For example, descaling X*(Y*(Z*4)) by a factor of 4
@@ -902,7 +993,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
        APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
        if (!Remainder.isMinValue())
          // Not divisible by Scale.
-        return 0;
+        return nullptr;
        // Replace with the quotient in the parent.
        Op = ConstantInt::get(CI->getType(), Quotient);
        NoSignedWrap = true;
@@ -915,7 +1006,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
          // Multiplication.
          NoSignedWrap = BO->hasNoSignedWrap();
          if (RequireNoSignedWrap && !NoSignedWrap)
-          return 0;
+          return nullptr;
  
          // There are three cases for multiplication: multiplication by exactly
          // the scale, multiplication by a constant different to the scale, and
@@ -934,7 +1025,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
  
            // Otherwise drill down into the constant.
            if (!Op->hasOneUse())
-            return 0;
+            return nullptr;
  
            Parent = std::make_pair(BO, 1);
            continue;
@@ -943,7 +1034,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
          // Multiplication by something else. Drill down into the left-hand side
          // since that's where the reassociate pass puts the good stuff.
          if (!Op->hasOneUse())
-          return 0;
+          return nullptr;
  
          Parent = std::make_pair(BO, 0);
          continue;
@@ -954,7 +1045,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
          // Multiplication by a power of 2.
          NoSignedWrap = BO->hasNoSignedWrap();
          if (RequireNoSignedWrap && !NoSignedWrap)
-          return 0;
+          return nullptr;
  
          Value *LHS = BO->getOperand(0);
          int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
@@ -968,7 +1059,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
            break;
          }
          if (Amt < logScale || !Op->hasOneUse())
-          return 0;
+          return nullptr;
  
          // Multiplication by more than the scale.  Reduce the multiplying amount
          // by the scale in the parent.
@@ -979,7 +1070,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
      }
  
      if (!Op->hasOneUse())
-      return 0;
+      return nullptr;
  
      if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
        if (Cast->getOpcode() == Instruction::SExt) {
@@ -993,7 +1084,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
          // Scale and the multiplication Y * SmallScale should not overflow.
          if (SmallScale.sext(Scale.getBitWidth()) != Scale)
            // SmallScale does not sign-extend to Scale.
-          return 0;
+          return nullptr;
          assert(SmallScale.exactLogBase2() == logScale);
          // Require that Y * SmallScale must not overflow.
          RequireNoSignedWrap = true;
@@ -1012,7 +1103,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
          // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
          // from this point up in the expression (see later).
          if (RequireNoSignedWrap)
-          return 0;
+          return nullptr;
  
          // Drill down through the cast.
          unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
@@ -1026,7 +1117,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
      }
  
      // Unsupported expression, bail out.
-    return 0;
+    return nullptr;
    }
  
    // We know that we can successfully descale, so from here on we can safely
@@ -1082,6 +1173,108 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
    } while (1);
  }
  
+/// \brief Creates node of binary operation with the same attributes as the
+/// specified one but with other operands.
+static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
+                                 InstCombiner::BuilderTy *B) {
+  Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+  if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
+    if (isa<OverflowingBinaryOperator>(NewBO)) {
+      NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
+      NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
+    }
+    if (isa<PossiblyExactOperator>(NewBO))
+      NewBO->setIsExact(Inst.isExact());
+  }
+  return BORes;
+}
+
+/// \brief Makes transformation of binary operation specific for vector types.
+/// \param Inst Binary operator to transform.
+/// \return Pointer to node that must replace the original binary operator, or
+///         null pointer if no transformation was made.
+Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
+  if (!Inst.getType()->isVectorTy()) return nullptr;
+
+  // It may not be safe to reorder shuffles and things like div, urem, etc.
+  // because we may trap when executing those ops on unknown vector elements.
+  // See PR20059.
+  if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+
+  unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
+  Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+  assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
+  assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
+
+  // If both arguments of binary operation are shuffles, which use the same
+  // mask and shuffle within a single vector, it is worthwhile to move the
+  // shuffle after binary operation:
+  //   Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m)
+  if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) {
+    ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS);
+    ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS);
+    if (isa<UndefValue>(LShuf->getOperand(1)) &&
+        isa<UndefValue>(RShuf->getOperand(1)) &&
+        LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() &&
+        LShuf->getMask() == RShuf->getMask()) {
+      Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
+          RShuf->getOperand(0), Builder);
+      Value *Res = Builder->CreateShuffleVector(NewBO,
+          UndefValue::get(NewBO->getType()), LShuf->getMask());
+      return Res;
+    }
+  }
+
+  // If one argument is a shuffle within one vector, the other is a constant,
+  // try moving the shuffle after the binary operation.
+  ShuffleVectorInst *Shuffle = nullptr;
+  Constant *C1 = nullptr;
+  if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS);
+  if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
+  if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
+  if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
+  if (Shuffle && C1 &&
+      (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) &&
+      isa<UndefValue>(Shuffle->getOperand(1)) &&
+      Shuffle->getType() == Shuffle->getOperand(0)->getType()) {
+    SmallVector<int, 16> ShMask = Shuffle->getShuffleMask();
+    // Find constant C2 that has property:
+    //   shuffle(C2, ShMask) = C1
+    // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>)
+    // reorder is not possible.
+    SmallVector<Constant*, 16> C2M(VWidth,
+                               UndefValue::get(C1->getType()->getScalarType()));
+    bool MayChange = true;
+    for (unsigned I = 0; I < VWidth; ++I) {
+      if (ShMask[I] >= 0) {
+        assert(ShMask[I] < (int)VWidth);
+        if (!isa<UndefValue>(C2M[ShMask[I]])) {
+          MayChange = false;
+          break;
+        }
+        C2M[ShMask[I]] = C1->getAggregateElement(I);
+      }
+    }
+    if (MayChange) {
+      Constant *C2 = ConstantVector::get(C2M);
+      Value *NewLHS, *NewRHS;
+      if (isa<Constant>(LHS)) {
+        NewLHS = C2;
+        NewRHS = Shuffle->getOperand(0);
+      } else {
+        NewLHS = Shuffle->getOperand(0);
+        NewRHS = C2;
+      }
+      Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
+      Value *Res = Builder->CreateShuffleVector(NewBO,
+          UndefValue::get(Inst.getType()), Shuffle->getMask());
+      return Res;
+    }
+  }
+
+  return nullptr;
+}
+
  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
  
@@ -1124,13 +1317,98 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      if (MadeChange) return &GEP;
    }
  
+  // Check to see if the inputs to the PHI node are getelementptr instructions.
+  if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
+    GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
+    if (!Op1)
+      return nullptr;
+
+    signed DI = -1;
+
+    for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
+      GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I);
+      if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
+        return nullptr;
+
+      // Keep track of the type as we walk the GEP.
+      Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
+
+      for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
+        if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
+          return nullptr;
+
+        if (Op1->getOperand(J) != Op2->getOperand(J)) {
+          if (DI == -1) {
+            // We have not seen any differences yet in the GEPs feeding the
+            // PHI yet, so we record this one if it is allowed to be a
+            // variable.
+
+            // The first two arguments can vary for any GEP, the rest have to be
+            // static for struct slots
+            if (J > 1 && CurTy->isStructTy())
+              return nullptr;
+
+            DI = J;
+          } else {
+            // The GEP is different by more than one input. While this could be
+            // extended to support GEPs that vary by more than one variable it
+            // doesn't make sense since it greatly increases the complexity and
+            // would result in an R+R+R addressing mode which no backend
+            // directly supports and would need to be broken into several
+            // simpler instructions anyway.
+            return nullptr;
+          }
+        }
+
+        // Sink down a layer of the type for the next iteration.
+        if (J > 0) {
+          if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) {
+            CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
+          } else {
+            CurTy = nullptr;
+          }
+        }
+      }
+    }
+
+    GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+
+    if (DI == -1) {
+      // All the GEPs feeding the PHI are identical. Clone one down into our
+      // BB so that it can be merged with the current GEP.
+      GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+                                            NewGEP);
+    } else {
+      // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
+      // into the current block so it can be merged, and create a new PHI to
+      // set that index.
+      Instruction *InsertPt = Builder->GetInsertPoint();
+      Builder->SetInsertPoint(PN);
+      PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+                                          PN->getNumOperands());
+      Builder->SetInsertPoint(InsertPt);
+
+      for (auto &I : PN->operands())
+        NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
+                           PN->getIncomingBlock(I));
+
+      NewGEP->setOperand(DI, NewPN);
+      GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+                                            NewGEP);
+      NewGEP->setOperand(DI, NewPN);
+    }
+
+    GEP.setOperand(0, NewGEP);
+    PtrOp = NewGEP;
+  }
+
    // Combine Indices - If the source pointer to this getelementptr instruction
    // is a getelementptr instruction, combine the indices of the two
    // getelementptr instructions into a single instruction.
    //
    if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
      if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
-      return 0;
+      return nullptr;
  
      // Note that if our source is a gep chain itself then we wait for that
      // chain to be resolved before we perform this transformation.  This
@@ -1138,7 +1416,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      if (GEPOperator *SrcGEP =
            dyn_cast<GEPOperator>(Src->getOperand(0)))
        if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
-        return 0;   // Wait until our source is folded to completion.
+        return nullptr;   // Wait until our source is folded to completion.
  
      SmallVector<Value*, 8> Indices;
  
@@ -1166,7 +1444,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          // intptr_t).  Just avoid transforming this until the input has been
          // normalized.
          if (SO1->getType() != GO1->getType())
-          return 0;
+          return nullptr;
          Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
        }
  
@@ -1216,7 +1494,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  
    // We do not handle pointer-vector geps here.
    if (!StrippedPtrTy)
-    return 0;
+    return nullptr;
  
    if (StrippedPtr != PtrOp) {
      bool HasZeroPointerIndex = false;
@@ -1241,7 +1519,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            GetElementPtrInst *Res =
              GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
            Res->setIsInBounds(GEP.isInBounds());
-          return Res;
+          if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+            return Res;
+          // Insert Res, and create an addrspacecast.
+          // e.g.,
+          // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+          // ->
+          // %0 = GEP i8 addrspace(1)* X, ...
+          // addrspacecast i8 addrspace(1)* %0 to i8*
+          return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
          }
  
          if (ArrayType *XATy =
@@ -1253,8 +1539,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              // to an array of the same type as the destination pointer
              // array.  Because the array type is never stepped over (there
              // is a leading zero) we can fold the cast into this GEP.
-            GEP.setOperand(0, StrippedPtr);
-            return &GEP;
+            if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+              GEP.setOperand(0, StrippedPtr);
+              return &GEP;
+            }
+            // Cannot replace the base pointer directly because StrippedPtr's
+            // address space is different. Instead, create a new GEP followed by
+            // an addrspacecast.
+            // e.g.,
+            // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+            //   i32 0, ...
+            // ->
+            // %0 = GEP [10 x i8] addrspace(1)* X, ...
+            // addrspacecast i8 addrspace(1)* %0 to i8*
+            SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+            Value *NewGEP = GEP.isInBounds() ?
+              Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+              Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+            return new AddrSpaceCastInst(NewGEP, GEP.getType());
            }
          }
        }
@@ -1360,7 +1662,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    }
  
    if (!DL)
-    return 0;
+    return nullptr;
  
    /// See if we can simplify:
    ///   X = bitcast A* to B*
@@ -1412,7 +1714,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      }
    }
  
-  return 0;
+  return nullptr;
  }
  
  static bool
@@ -1527,7 +1829,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
      }
      return EraseInstFromFunction(MI);
    }
-  return 0;
+  return nullptr;
  }
  
  /// \brief Move the call to free before a NULL test.
@@ -1556,30 +1858,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
    //        would duplicate the call to free in each predecessor and it may
    //        not be profitable even for code size.
    if (!PredBB)
-    return 0;
+    return nullptr;
  
    // Validate constraint #2: Does this block contains only the call to
    //                         free and an unconditional branch?
    // FIXME: We could check if we can speculate everything in the
    //        predecessor block
    if (FreeInstrBB->size() != 2)
-    return 0;
+    return nullptr;
    BasicBlock *SuccBB;
    if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
-    return 0;
+    return nullptr;
  
    // Validate the rest of constraint #1 by matching on the pred branch.
    TerminatorInst *TI = PredBB->getTerminator();
    BasicBlock *TrueBB, *FalseBB;
    ICmpInst::Predicate Pred;
    if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
-    return 0;
+    return nullptr;
    if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
-    return 0;
+    return nullptr;
  
    // Validate constraint #3: Ensure the null case just falls through.
    if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
-    return 0;
+    return nullptr;
    assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
           "Broken CFG: missing edge from predecessor to successor");
  
@@ -1614,14 +1916,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
      if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
        return I;
  
-  return 0;
+  return nullptr;
  }
  
  
  
  Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
    // Change br (not X), label True, label False to: br X, label False, True
-  Value *X = 0;
+  Value *X = nullptr;
    BasicBlock *TrueDest;
    BasicBlock *FalseDest;
    if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
@@ -1664,7 +1966,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
        return &BI;
      }
  
-  return 0;
+  return nullptr;
  }
  
  Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
@@ -1688,7 +1990,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
          return &SI;
        }
    }
-  return 0;
+  return nullptr;
  }
  
  Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
@@ -1705,7 +2007,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
        // first index
        return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
      }
-    return 0; // Can't handle other constants
+    return nullptr; // Can't handle other constants
    }
  
    if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
@@ -1838,7 +2140,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
    // and if again single-use then via load (gep (gep)) to load (gep).
    // However, double extracts from e.g. function arguments or return values
    // aren't handled yet.
-  return 0;
+  return nullptr;
  }
  
  enum Personality_Type {
@@ -1894,7 +2196,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
    // Simplify the list of clauses, eg by removing repeated catch clauses
    // (these are often created by inlining).
    bool MakeNewInstruction = false; // If true, recreate using the following:
-  SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction;
+  SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
    bool CleanupFlag = LI.isCleanup();   // - The new instruction is a cleanup.
  
    SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
@@ -1902,8 +2204,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
      bool isLastClause = i + 1 == e;
      if (LI.isCatch(i)) {
        // A catch clause.
-      Value *CatchClause = LI.getClause(i);
-      Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts());
+      Constant *CatchClause = LI.getClause(i);
+      Constant *TypeInfo = CatchClause->stripPointerCasts();
  
        // If we already saw this clause, there is no point in having a second
        // copy of it.
@@ -1932,7 +2234,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
        // equal (for example if one represents a C++ class, and the other some
        // class derived from it).
        assert(LI.isFilter(i) && "Unsupported landingpad clause!");
-      Value *FilterClause = LI.getClause(i);
+      Constant *FilterClause = LI.getClause(i);
        ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
        unsigned NumTypeInfos = FilterType->getNumElements();
  
@@ -1976,8 +2278,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
          // catch-alls.  If so, the filter can be discarded.
          bool SawCatchAll = false;
          for (unsigned j = 0; j != NumTypeInfos; ++j) {
-          Value *Elt = Filter->getOperand(j);
-          Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts());
+          Constant *Elt = Filter->getOperand(j);
+          Constant *TypeInfo = Elt->stripPointerCasts();
            if (isCatchAll(Personality, TypeInfo)) {
              // This element is a catch-all.  Bail out, noting this fact.
              SawCatchAll = true;
@@ -2082,7 +2384,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
          continue;
        // If Filter is a subset of LFilter, i.e. every element of Filter is also
        // an element of LFilter, then discard LFilter.
-      SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j;
+      SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
        // If Filter is empty then it is a subset of LFilter.
        if (!FElts) {
          // Discard LFilter.
@@ -2177,7 +2479,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
      return &LI;
    }
  
-  return 0;
+  return nullptr;
  }
  
  
@@ -2270,7 +2572,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
          for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
               i != e; ++i) {
            ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
-          if (CE == 0) continue;
+          if (CE == nullptr) continue;
  
            Constant*& FoldRes = FoldedConstants[CE];
            if (!FoldRes)
@@ -2374,7 +2676,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
  
    while (!Worklist.isEmpty()) {
      Instruction *I = Worklist.RemoveOne();
-    if (I == 0) continue;  // skip null values.
+    if (I == nullptr) continue;  // skip null values.
  
      // Check to see if we can DCE the instruction.
      if (isInstructionTriviallyDead(I, TLI)) {
@@ -2516,7 +2818,7 @@ bool InstCombiner::runOnFunction(Function &F) {
      return false;
  
    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
    TLI = &getAnalysis<TargetLibraryInfo>();
    // Minimizing size?
    MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -2543,7 +2845,7 @@ bool InstCombiner::runOnFunction(Function &F) {
    while (DoOneIteration(F, Iteration++))
      EverMadeChange = true;
  
-  Builder = 0;
+  Builder = nullptr;
    return EverMadeChange;
  }