[InstCombine] insert a new shuffle in a safe place (PR25999)

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineVectorOps.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

index 501d87ca4ada70d8b6f5cbca553522b68c543e9f..5cde31a9162e894b8dfbcc5be843c39d7a1ea48b 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -12,16 +12,20 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "instcombine"
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
  #include "llvm/IR/PatternMatch.h"
  using namespace llvm;
  using namespace PatternMatch;
  
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation.  isConstant indicates whether we're
-/// extracting one known element.  If false we're extracting a variable index.
-static bool CheapToScalarize(Value *V, bool isConstant) {
+#define DEBUG_TYPE "instcombine"
+
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
    if (Constant *C = dyn_cast<Constant>(V)) {
      if (isConstant) return true;
  
@@ -46,75 +50,25 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
      return true;
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
      if (BO->hasOneUse() &&
-        (CheapToScalarize(BO->getOperand(0), isConstant) ||
-         CheapToScalarize(BO->getOperand(1), isConstant)))
+        (cheapToScalarize(BO->getOperand(0), isConstant) ||
+         cheapToScalarize(BO->getOperand(1), isConstant)))
        return true;
    if (CmpInst *CI = dyn_cast<CmpInst>(I))
      if (CI->hasOneUse() &&
-        (CheapToScalarize(CI->getOperand(0), isConstant) ||
-         CheapToScalarize(CI->getOperand(1), isConstant)))
+        (cheapToScalarize(CI->getOperand(0), isConstant) ||
+         cheapToScalarize(CI->getOperand(1), isConstant)))
        return true;
  
    return false;
  }
  
-/// FindScalarElement - Given a vector and an element number, see if the scalar
-/// value is already around as a register, for example if it were inserted then
-/// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo) {
-  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
-  VectorType *VTy = cast<VectorType>(V->getType());
-  unsigned Width = VTy->getNumElements();
-  if (EltNo >= Width)  // Out of range access.
-    return UndefValue::get(VTy->getElementType());
-
-  if (Constant *C = dyn_cast<Constant>(V))
-    return C->getAggregateElement(EltNo);
-
-  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
-    // If this is an insert to a variable element, we don't know what it is.
-    if (!isa<ConstantInt>(III->getOperand(2)))
-      return 0;
-    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
-    // If this is an insert to the element we are looking for, return the
-    // inserted value.
-    if (EltNo == IIElt)
-      return III->getOperand(1);
-
-    // Otherwise, the insertelement doesn't modify the value, recurse on its
-    // vector input.
-    return FindScalarElement(III->getOperand(0), EltNo);
-  }
-
-  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
-    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
-    int InEl = SVI->getMaskValue(EltNo);
-    if (InEl < 0)
-      return UndefValue::get(VTy->getElementType());
-    if (InEl < (int)LHSWidth)
-      return FindScalarElement(SVI->getOperand(0), InEl);
-    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
-  }
-
-  // Extract a value from a vector add operation with a constant zero.
-  Value *Val = 0; Constant *Con = 0;
-  if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
-    if (Con->getAggregateElement(EltNo)->isNullValue())
-      return FindScalarElement(Val, EltNo);
-  }
-
-  // Otherwise, we don't know.
-  return 0;
-}
-
  // If we have a PHI node with a vector type that has only 2 uses: feed
  // itself and be an operand of extractelement at a constant location,
  // try to replace the PHI of the vector type with a PHI of a scalar type.
  Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
    // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
    if (!PN->hasNUses(2))
-    return NULL;
+    return nullptr;
  
    // If so, it's known at this point that one operand is PHI and the other is
    // an extractelement node. Find the PHI user that is not the extractelement
@@ -128,8 +82,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
    // and that it is a binary operation which is cheap to scalarize.
    // otherwise return NULL.
    if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
-      !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
-    return NULL;
+      !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
+    return nullptr;
  
    // Create a scalar PHI node that will replace the vector PHI node
    // just before the current PHI node.
@@ -143,7 +97,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
      // If the operand is the PHI induction variable:
      if (PHIInVal == PHIUser) {
        // Scalarize the binary operation. Its first operand is the
-      // scalar PHI and the second operand is extracted from the other
+      // scalar PHI, and the second operand is extracted from the other
        // vector operand.
        BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
        unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
@@ -161,8 +115,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
        Instruction *pos = dyn_cast<Instruction>(PHIInVal);
        BasicBlock::iterator InsertPos;
        if (pos && !isa<PHINode>(pos)) {
-        InsertPos = pos;
-        ++InsertPos;
+        InsertPos = ++pos->getIterator();
        } else {
          InsertPos = inBB->getFirstInsertionPt();
        }
@@ -176,10 +129,14 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
  }
  
  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+  if (Value *V = SimplifyExtractElementInst(
+          EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+    return ReplaceInstUsesWith(EI, V);
+
    // If vector val is constant with all elements the same, replace EI with
    // that element.  We handle a known element # below.
    if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
-    if (CheapToScalarize(C, false))
+    if (cheapToScalarize(C, false))
        return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
  
    // If extracting a specified index from the vector, see if we can recursively
@@ -188,10 +145,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      unsigned IndexVal = IdxC->getZExtValue();
      unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
  
-    // If this is extracting an invalid index, turn this into undef, to avoid
-    // crashing the code below.
-    if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+    // InstSimplify handles cases where the index is invalid.
+    assert(IndexVal < VectorWidth);
  
      // This instruction only demands the single element from the input vector.
      // If the input vector has a single use, simplify it based on this use
@@ -200,23 +155,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
        APInt UndefElts(VectorWidth, 0);
        APInt DemandedMask(VectorWidth, 0);
        DemandedMask.setBit(IndexVal);
-      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
-                                                DemandedMask, UndefElts)) {
+      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+                                                UndefElts)) {
          EI.setOperand(0, V);
          return &EI;
        }
      }
  
-    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
-      return ReplaceInstUsesWith(EI, Elt);
-
-    // If the this extractelement is directly using a bitcast from a vector of
+    // If this extractelement is directly using a bitcast from a vector of
      // the same number of elements, see if we can find the source element from
      // it.  In this case, we will end up needing to bitcast the scalars.
      if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
        if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
          if (VT->getNumElements() == VectorWidth)
-          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+          if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
              return new BitCastInst(Elt, EI.getType());
      }
  
@@ -231,10 +183,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
  
    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
      // Push extractelement into predecessor operation if legal and
-    // profitable to do so
+    // profitable to do so.
      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
        if (I->hasOneUse() &&
-          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+          cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
          Value *newEI0 =
            Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
                                          EI.getName()+".lhs");
@@ -277,8 +229,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                                             SrcIdx, false));
        }
      } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
-      // Canonicalize extractelement(cast) -> cast(extractelement)
-      // bitcasts can change the number of vector elements and they cost nothing
+      // Canonicalize extractelement(cast) -> cast(extractelement).
+      // Bitcasts can change the number of vector elements, and they cost
+      // nothing.
        if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
          Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
                                                    EI.getIndexOperand());
@@ -292,7 +245,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
          // fight the vectorizer.
  
          // If we are extracting an element from a vector select or a select on
-        // vectors, a select on the scalars extracted from the vector arguments.
+        // vectors, create a select on the scalars extracted from the vector
+        // arguments.
          Value *TrueVal = SI->getTrueValue();
          Value *FalseVal = SI->getFalseValue();
  
@@ -319,13 +273,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
        }
      }
    }
-  return 0;
+  return nullptr;
  }
  
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                           SmallVectorImpl<Constant*> &Mask) {
    assert(LHS->getType() == RHS->getType() &&
           "Invalid CollectSingleShuffleElements");
@@ -360,9 +313,9 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
  
      if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
-      // Okay, we can handle this if the vector we are insertinting into is
+      // We can handle this if the vector we are inserting into is
        // transitively ok.
-      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+      if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
          // If so, update the mask to reflect the inserted undef.
          Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
          return true;
@@ -375,9 +328,9 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
  
          // This must be extracting from either LHS or RHS.
          if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
-          // Okay, we can handle this if the vector we are insertinting into is
+          // We can handle this if the vector we are inserting into is
            // transitively ok.
-          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+          if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
              // If so, update the mask to reflect the inserted value.
              if (EI->getOperand(0) == LHS) {
                Mask[InsertedIdx % NumElts] =
@@ -399,10 +352,62 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
    return false;
  }
  
+/// If we have insertion into a vector that is wider than the vector that we
+/// are extracting from, try to widen the source vector to allow a single
+/// shufflevector to replace one or more insert/extract pairs.
+static void replaceExtractElements(InsertElementInst *InsElt,
+                                   ExtractElementInst *ExtElt,
+                                   InstCombiner &IC) {
+  VectorType *InsVecType = InsElt->getType();
+  VectorType *ExtVecType = ExtElt->getVectorOperandType();
+  unsigned NumInsElts = InsVecType->getVectorNumElements();
+  unsigned NumExtElts = ExtVecType->getVectorNumElements();
+
+  // The inserted-to vector must be wider than the extracted-from vector.
+  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
+      NumExtElts >= NumInsElts)
+    return;
+
+  // Create a shuffle mask to widen the extended-from vector using undefined
+  // values. The mask selects all of the values of the original vector followed
+  // by as many undefined values as needed to create a vector of the same length
+  // as the inserted-to vector.
+  SmallVector<Constant *, 16> ExtendMask;
+  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
+  for (unsigned i = 0; i < NumExtElts; ++i)
+    ExtendMask.push_back(ConstantInt::get(IntType, i));
+  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
+    ExtendMask.push_back(UndefValue::get(IntType));
+
+  Value *ExtVecOp = ExtElt->getVectorOperand();
+  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
+                                        ConstantVector::get(ExtendMask));
+
+  // Insert the new shuffle after the vector operand of the extract is defined
+  // (as long as it's not a PHI) or at the start of the basic block of the
+  // extract, so any subsequent extracts in the same basic block can use it.
+  // TODO: Insert before the earliest ExtractElementInst that is replaced.
+  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
+    WideVec->insertAfter(ExtVecOpInst);
+  else
+    IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
+
+  // Replace extracts from the original narrow vector with extracts from the new
+  // wide vector.
+  for (User *U : ExtVecOp->users()) {
+    ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
+    if (!OldExt || OldExt->getParent() != WideVec->getParent())
+      continue;
+    auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+    NewExt->insertAfter(WideVec);
+    IC.ReplaceInstUsesWith(*OldExt, NewExt);
+  }
+}
  
  /// We are building a shuffle to create V, which is a sequence of insertelement,
  /// extractelement pairs. If PermittedRHS is set, then we must either use it or
-/// not rely on the second vector source. Return an std::pair containing the
+/// not rely on the second vector source. Return a std::pair containing the
  /// left and right vectors of the proposed shuffle (or 0), and set the Mask
  /// parameter as required.
  ///
@@ -410,9 +415,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
  /// often been chosen carefully to be efficiently implementable on the target.
  typedef std::pair<Value *, Value *> ShuffleOps;
  
-static ShuffleOps CollectShuffleElements(Value *V,
+static ShuffleOps collectShuffleElements(Value *V,
                                           SmallVectorImpl<Constant *> &Mask,
-                                         Value *PermittedRHS) {
+                                         Value *PermittedRHS,
+                                         InstCombiner &IC) {
    assert(V->getType()->isVectorTy() && "Invalid shuffle!");
    unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
  
@@ -441,12 +447,16 @@ static ShuffleOps CollectShuffleElements(Value *V,
  
          // Either the extracted from or inserted into vector must be RHSVec,
          // otherwise we'd end up with a shuffle of three inputs.
-        if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
+        if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
            Value *RHS = EI->getOperand(0);
-          ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
-          assert(LR.second == 0 || LR.second == RHS);
+          ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
+          assert(LR.second == nullptr || LR.second == RHS);
  
            if (LR.first->getType() != RHS->getType()) {
+            // Although we are giving up for now, see if we can create extracts
+            // that match the inserts for another round of combining.
+            replaceExtractElements(IEI, EI, IC);
+
              // We tried our best, but we can't find anything compatible with RHS
              // further up the chain. Return a trivial shuffle.
              for (unsigned i = 0; i < NumElts; ++i)
@@ -476,19 +486,54 @@ static ShuffleOps CollectShuffleElements(Value *V,
          // If this insertelement is a chain that comes from exactly these two
          // vectors, return the vector and the effective shuffle.
          if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
-            CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+            collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
                                           Mask))
            return std::make_pair(EI->getOperand(0), PermittedRHS);
        }
      }
    }
  
-  // Otherwise, can't do anything fancy.  Return an identity vector.
+  // Otherwise, we can't do anything fancy. Return an identity vector.
    for (unsigned i = 0; i != NumElts; ++i)
      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
    return std::make_pair(V, nullptr);
  }
  
+/// Try to find redundant insertvalue instructions, like the following ones:
+///  %0 = insertvalue { i8, i32 } undef, i8 %x, 0
+///  %1 = insertvalue { i8, i32 } %0,    i8 %y, 0
+/// Here the second instruction inserts values at the same indices, as the
+/// first one, making the first one redundant.
+/// It should be transformed to:
+///  %0 = insertvalue { i8, i32 } undef, i8 %y, 0
+Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
+  bool IsRedundant = false;
+  ArrayRef<unsigned int> FirstIndices = I.getIndices();
+
+  // If there is a chain of insertvalue instructions (each of them except the
+  // last one has only one use and it's another insertvalue insn from this
+  // chain), check if any of the 'children' uses the same indices as the first
+  // instruction. In this case, the first one is redundant.
+  Value *V = &I;
+  unsigned Depth = 0;
+  while (V->hasOneUse() && Depth < 10) {
+    User *U = V->user_back();
+    auto UserInsInst = dyn_cast<InsertValueInst>(U);
+    if (!UserInsInst || U->getOperand(0) != V)
+      break;
+    if (UserInsInst->getIndices() == FirstIndices) {
+      IsRedundant = true;
+      break;
+    }
+    V = UserInsInst;
+    Depth++;
+  }
+
+  if (IsRedundant)
+    return ReplaceInstUsesWith(I, I.getOperand(0));
+  return nullptr;
+}
+
  Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    Value *VecOp    = IE.getOperand(0);
    Value *ScalarOp = IE.getOperand(1);
@@ -524,13 +569,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
        // (and any insertelements it points to), into one big shuffle.
        if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
          SmallVector<Constant*, 16> Mask;
-        ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
+        ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
  
          // The proposed shuffle may be trivial, in which case we shouldn't
          // perform the combine.
          if (LR.first != &IE && LR.second != &IE) {
            // We now have a shuffle of LHS, RHS, Mask.
-          if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
+          if (LR.second == nullptr)
+            LR.second = UndefValue::get(LR.first->getType());
            return new ShuffleVectorInst(LR.first, LR.second,
                                         ConstantVector::get(Mask));
          }
@@ -547,7 +593,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
      return &IE;
    }
  
-  return 0;
+  return nullptr;
  }
  
  /// Return true if we can evaluate the specified expression tree if the vector
@@ -599,8 +645,8 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
      case Instruction::FPTrunc:
      case Instruction::FPExt:
      case Instruction::GetElementPtr: {
-      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
-        if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+      for (Value *Operand : I->operands()) {
+        if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
            return false;
        }
        return true;
@@ -628,7 +674,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
  
  /// Rebuild a new instruction just like 'I' but with the new operands given.
  /// In the event of type mismatch, the type of the operands is correct.
-static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
    // We don't want to use the IRBuilder here because we want the replacement
    // instructions to appear next to 'I', not the builder's insertion point.
    switch (I->getOpcode()) {
@@ -695,7 +741,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
      case Instruction::GetElementPtr: {
        Value *Ptr = NewOps[0];
        ArrayRef<Value*> Idx = NewOps.slice(1);
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+      GetElementPtrInst *GEP = GetElementPtrInst::Create(
+          cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
        GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
        return GEP;
      }
@@ -770,7 +817,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
          NeedsRebuild |= (V != I->getOperand(i));
        }
        if (NeedsRebuild) {
-        return BuildNew(I, NewOps);
+        return buildNew(I, NewOps);
        }
        return I;
      }
@@ -802,10 +849,46 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
    llvm_unreachable("failed to reorder elements of vector instruction!");
  }
  
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+                                  bool &isLHSID, bool &isRHSID) {
+  isLHSID = isRHSID = true;
+
+  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] < 0) continue;  // Ignore undef values.
+    // Is this an identity shuffle of the LHS value?
+    isLHSID &= (Mask[i] == (int)i);
+
+    // Is this an identity shuffle of the RHS value?
+    isRHSID &= (Mask[i]-e == i);
+  }
+}
+
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+//                 +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+//   Input:        |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+//   Shuffles to:  |EE|FF|GG|HH|
+//                 +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+                                       SmallVector<int, 16> &Mask) {
+  unsigned LHSElems =
+      cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+  unsigned MaskElems = Mask.size();
+  unsigned BegIdx = Mask.front();
+  unsigned EndIdx = Mask.back();
+  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+    return false;
+  for (unsigned I = 0; I != MaskElems; ++I)
+    if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+      return false;
+  return true;
+}
+
  Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    Value *LHS = SVI.getOperand(0);
    Value *RHS = SVI.getOperand(1);
    SmallVector<int, 16> Mask = SVI.getShuffleMask();
+  Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
  
    bool MadeChange = false;
  
@@ -841,18 +924,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
      SmallVector<Constant*, 16> Elts;
      for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
        if (Mask[i] < 0) {
-        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        Elts.push_back(UndefValue::get(Int32Ty));
          continue;
        }
  
        if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
            (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
          Mask[i] = -1;     // Turn into undef.
-        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        Elts.push_back(UndefValue::get(Int32Ty));
        } else {
          Mask[i] = Mask[i] % e;  // Force to LHS.
-        Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
-                                        Mask[i]));
+        Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
        }
      }
      SVI.setOperand(0, SVI.getOperand(1));
@@ -865,16 +947,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  
    if (VWidth == LHSWidth) {
      // Analyze the shuffle, are the LHS or RHS and identity shuffles?
-    bool isLHSID = true, isRHSID = true;
-
-    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-      if (Mask[i] < 0) continue;  // Ignore undef values.
-      // Is this an identity shuffle of the LHS value?
-      isLHSID &= (Mask[i] == (int)i);
-
-      // Is this an identity shuffle of the RHS value?
-      isRHSID &= (Mask[i]-e == i);
-    }
+    bool isLHSID, isRHSID;
+    recognizeIdentityMask(Mask, isLHSID, isRHSID);
  
      // Eliminate identity shuffles.
      if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -886,6 +960,95 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
      return ReplaceInstUsesWith(SVI, V);
    }
  
+  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+  // a non-vector type. We can instead bitcast the original vector followed by
+  // an extract of the desired element:
+  //
+  //   %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+  //                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  //   %1 = bitcast <4 x i8> %sroa to i32
+  // Becomes:
+  //   %bc = bitcast <16 x i8> %in to <4 x i32>
+  //   %ext = extractelement <4 x i32> %bc, i32 0
+  //
+  // If the shuffle is extracting a contiguous range of values from the input
+  // vector then each use which is a bitcast of the extracted size can be
+  // replaced. This will work if the vector types are compatible, and the begin
+  // index is aligned to a value in the casted vector type. If the begin index
+  // isn't aligned then we can shuffle the original vector (keeping the same
+  // vector type) before extracting.
+  //
+  // This code will bail out if the target type is fundamentally incompatible
+  // with vectors of the source type.
+  //
+  // Example of <16 x i8>, target type i32:
+  // Index range [4,8):         v-----------v Will work.
+  //                +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+  //     <16 x i8>: |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
+  //     <4 x i32>: |           |           |           |           |
+  //                +-----------+-----------+-----------+-----------+
+  // Index range [6,10):              ^-----------^ Needs an extra shuffle.
+  // Target type i40:           ^--------------^ Won't work, bail.
+  if (isShuffleExtractingFromLHS(SVI, Mask)) {
+    Value *V = LHS;
+    unsigned MaskElems = Mask.size();
+    unsigned BegIdx = Mask.front();
+    VectorType *SrcTy = cast<VectorType>(V->getType());
+    unsigned VecBitWidth = SrcTy->getBitWidth();
+    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+    assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+    unsigned SrcNumElems = SrcTy->getNumElements();
+    SmallVector<BitCastInst *, 8> BCs;
+    DenseMap<Type *, Value *> NewBCs;
+    for (User *U : SVI.users())
+      if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+        if (!BC->use_empty())
+          // Only visit bitcasts that weren't previously handled.
+          BCs.push_back(BC);
+    for (BitCastInst *BC : BCs) {
+      Type *TgtTy = BC->getDestTy();
+      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+      if (!TgtElemBitWidth)
+        continue;
+      unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+      bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+      bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+      if (!VecBitWidthsEqual)
+        continue;
+      if (!VectorType::isValidElementType(TgtTy))
+        continue;
+      VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+      if (!BegIsAligned) {
+        // Shuffle the input so [0,NumElements) contains the output, and
+        // [NumElems,SrcNumElems) is undef.
+        SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+                                                UndefValue::get(Int32Ty));
+        for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+          ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+        V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+                                         ConstantVector::get(ShuffleMask),
+                                         SVI.getName() + ".extract");
+        BegIdx = 0;
+      }
+      unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+      assert(SrcElemsPerTgtElem);
+      BegIdx /= SrcElemsPerTgtElem;
+      bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+      auto *NewBC =
+          BCAlreadyExists
+              ? NewBCs[CastSrcTy]
+              : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+      if (!BCAlreadyExists)
+        NewBCs[CastSrcTy] = NewBC;
+      auto *Ext = Builder->CreateExtractElement(
+          NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+      // The shufflevector isn't being replaced: the bitcast that used it
+      // is. InstCombine will visit the newly-created instructions.
+      ReplaceInstUsesWith(*BC, Ext);
+      MadeChange = true;
+    }
+  }
+
    // If the LHS is a shufflevector itself, see if we can combine it with this
    // one without producing an unusual shuffle.
    // Cases that might be simplified:
@@ -933,16 +1096,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
    if (LHSShuffle)
      if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
-      LHSShuffle = NULL;
+      LHSShuffle = nullptr;
    if (RHSShuffle)
      if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
-      RHSShuffle = NULL;
+      RHSShuffle = nullptr;
    if (!LHSShuffle && !RHSShuffle)
-    return MadeChange ? &SVI : 0;
+    return MadeChange ? &SVI : nullptr;
  
-  Value* LHSOp0 = NULL;
-  Value* LHSOp1 = NULL;
-  Value* RHSOp0 = NULL;
+  Value* LHSOp0 = nullptr;
+  Value* LHSOp1 = nullptr;
+  Value* RHSOp0 = nullptr;
    unsigned LHSOp0Width = 0;
    unsigned RHSOp0Width = 0;
    if (LHSShuffle) {
@@ -974,11 +1137,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    // case 4
    if (LHSOp0 == RHSOp0) {
      newLHS = LHSOp0;
-    newRHS = NULL;
+    newRHS = nullptr;
    }
  
    if (newLHS == LHS && newRHS == RHS)
-    return MadeChange ? &SVI : 0;
+    return MadeChange ? &SVI : nullptr;
  
    SmallVector<int, 16> LHSMask;
    SmallVector<int, 16> RHSMask;
@@ -1038,7 +1201,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
        // If newRHS == newLHS, we want to remap any references from newRHS to
        // newLHS so that we can properly identify splats that may occur due to
        // obfuscation across the two vectors.
-      if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
+      if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
          eltMask += newLHSWidth;
      }
  
@@ -1056,7 +1219,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    // or is a splat, do the replacement.
    if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
      SmallVector<Constant*, 16> Elts;
-    Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
      for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
        if (newMask[i] < 0) {
          Elts.push_back(UndefValue::get(Int32Ty));
@@ -1064,10 +1226,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
          Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
        }
      }
-    if (newRHS == NULL)
+    if (!newRHS)
        newRHS = UndefValue::get(newLHS->getType());
      return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
    }
  
-  return MadeChange ? &SVI : 0;
+  // If the result mask is an identity, replace uses of this instruction with
+  // corresponding argument.
+  bool isLHSID, isRHSID;
+  recognizeIdentityMask(newMask, isLHSID, isRHSID);
+  if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
+  if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
+
+  return MadeChange ? &SVI : nullptr;
  }