//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation. isConstant indicates whether we're
-/// extracting one known element. If false we're extracting a variable index.
-static bool CheapToScalarize(Value *V, bool isConstant) {
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (isConstant) return true;
return true;
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
if (BO->hasOneUse() &&
- (CheapToScalarize(BO->getOperand(0), isConstant) ||
- CheapToScalarize(BO->getOperand(1), isConstant)))
+ (cheapToScalarize(BO->getOperand(0), isConstant) ||
+ cheapToScalarize(BO->getOperand(1), isConstant)))
return true;
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->hasOneUse() &&
- (CheapToScalarize(CI->getOperand(0), isConstant) ||
- CheapToScalarize(CI->getOperand(1), isConstant)))
+ (cheapToScalarize(CI->getOperand(0), isConstant) ||
+ cheapToScalarize(CI->getOperand(1), isConstant)))
return true;
return false;
}
-/// FindScalarElement - Given a vector and an element number, see if the scalar
-/// value is already around as a register, for example if it were inserted then
-/// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo) {
- assert(V->getType()->isVectorTy() && "Not looking at a vector?");
- VectorType *VTy = cast<VectorType>(V->getType());
- unsigned Width = VTy->getNumElements();
- if (EltNo >= Width) // Out of range access.
- return UndefValue::get(VTy->getElementType());
-
- if (Constant *C = dyn_cast<Constant>(V))
- return C->getAggregateElement(EltNo);
-
- if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
- // If this is an insert to a variable element, we don't know what it is.
- if (!isa<ConstantInt>(III->getOperand(2)))
- return nullptr;
- unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
- // If this is an insert to the element we are looking for, return the
- // inserted value.
- if (EltNo == IIElt)
- return III->getOperand(1);
-
- // Otherwise, the insertelement doesn't modify the value, recurse on its
- // vector input.
- return FindScalarElement(III->getOperand(0), EltNo);
- }
-
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
- unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
- int InEl = SVI->getMaskValue(EltNo);
- if (InEl < 0)
- return UndefValue::get(VTy->getElementType());
- if (InEl < (int)LHSWidth)
- return FindScalarElement(SVI->getOperand(0), InEl);
- return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
- }
-
- // Extract a value from a vector add operation with a constant zero.
- Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
- if (Con->getAggregateElement(EltNo)->isNullValue())
- return FindScalarElement(Val, EltNo);
- }
-
- // Otherwise, we don't know.
- return nullptr;
-}
-
// If we have a PHI node with a vector type that has only 2 uses: feed
// itself and be an operand of extractelement at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type.
// and that it is a binary operation which is cheap to scalarize.
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
// If the operand is the PHI induction variable:
if (PHIInVal == PHIUser) {
// Scalarize the binary operation. Its first operand is the
- // scalar PHI and the second operand is extracted from the other
+ // scalar PHI, and the second operand is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
Instruction *pos = dyn_cast<Instruction>(PHIInVal);
BasicBlock::iterator InsertPos;
if (pos && !isa<PHINode>(pos)) {
- InsertPos = pos;
- ++InsertPos;
+ InsertPos = ++pos->getIterator();
} else {
InsertPos = inBB->getFirstInsertionPt();
}
}
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+ if (Value *V = SimplifyExtractElementInst(
+ EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(EI, V);
+
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
- if (CheapToScalarize(C, false))
+ if (cheapToScalarize(C, false))
return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
// If extracting a specified index from the vector, see if we can recursively
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
- // If this is extracting an invalid index, turn this into undef, to avoid
- // crashing the code below.
- if (IndexVal >= VectorWidth)
- return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ // InstSimplify handles cases where the index is invalid.
+ assert(IndexVal < VectorWidth);
// This instruction only demands the single element from the input vector.
// If the input vector has a single use, simplify it based on this use
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- DemandedMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
}
- if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
- return ReplaceInstUsesWith(EI, Elt);
-
- // If the this extractelement is directly using a bitcast from a vector of
+ // If this extractelement is directly using a bitcast from a vector of
// the same number of elements, see if we can find the source element from
// it. In this case, we will end up needing to bitcast the scalars.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
if (VT->getNumElements() == VectorWidth)
- if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+ if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
// Push extractelement into predecessor operation if legal and
- // profitable to do so
+ // profitable to do so.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
if (I->hasOneUse() &&
- CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+ cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
Value *newEI0 =
Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
EI.getName()+".lhs");
SrcIdx, false));
}
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
- // Canonicalize extractelement(cast) -> cast(extractelement)
- // bitcasts can change the number of vector elements and they cost nothing
+ // Canonicalize extractelement(cast) -> cast(extractelement).
+ // Bitcasts can change the number of vector elements, and they cost
+ // nothing.
if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
// fight the vectorizer.
// If we are extracting an element from a vector select or a select on
- // vectors, a select on the scalars extracted from the vector arguments.
+ // vectors, create a select on the scalars extracted from the vector
+ // arguments.
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
return nullptr;
}
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
SmallVectorImpl<Constant*> &Mask) {
assert(LHS->getType() == RHS->getType() &&
"Invalid CollectSingleShuffleElements");
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
- // Okay, we can handle this if the vector we are insertinting into is
+ // We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted undef.
Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
return true;
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
- // Okay, we can handle this if the vector we are insertinting into is
+ // We can handle this if the vector we are inserting into is
// transitively ok.
- if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+ if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
if (EI->getOperand(0) == LHS) {
Mask[InsertedIdx % NumElts] =
/// We are building a shuffle to create V, which is a sequence of insertelement,
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
-/// not rely on the second vector source. Return an std::pair containing the
+/// not rely on the second vector source. Return a std::pair containing the
/// left and right vectors of the proposed shuffle (or 0), and set the Mask
/// parameter as required.
///
/// often been chosen carefully to be efficiently implementable on the target.
typedef std::pair<Value *, Value *> ShuffleOps;
-static ShuffleOps CollectShuffleElements(Value *V,
+static ShuffleOps collectShuffleElements(Value *V,
SmallVectorImpl<Constant *> &Mask,
Value *PermittedRHS) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
- ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+ ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS);
assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
// If this insertelement is a chain that comes from exactly these two
// vectors, return the vector and the effective shuffle.
if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
- CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+ collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
Mask))
return std::make_pair(EI->getOperand(0), PermittedRHS);
}
}
}
- // Otherwise, can't do anything fancy. Return an identity vector.
+ // Otherwise, we can't do anything fancy. Return an identity vector.
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
return std::make_pair(V, nullptr);
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
+ ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr);
// The proposed shuffle may be trivial, in which case we shouldn't
// perform the combine.
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::GetElementPtr: {
- for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+ for (Value *Operand : I->operands()) {
+ if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
return false;
}
return true;
/// Rebuild a new instruction just like 'I' but with the new operands given.
/// In the event of type mismatch, the type of the operands is correct.
-static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
// We don't want to use the IRBuilder here because we want the replacement
// instructions to appear next to 'I', not the builder's insertion point.
switch (I->getOpcode()) {
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
NeedsRebuild |= (V != I->getOperand(i));
}
if (NeedsRebuild) {
- return BuildNew(I, NewOps);
+ return buildNew(I, NewOps);
}
return I;
}
llvm_unreachable("failed to reorder elements of vector instruction!");
}
-static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
bool &isLHSID, bool &isRHSID) {
isLHSID = isRHSID = true;
}
}
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+// Shuffles to: |EE|FF|GG|HH|
+// +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+ SmallVector<int, 16> &Mask) {
+ unsigned LHSElems =
+ cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ unsigned EndIdx = Mask.back();
+ if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+ return false;
+ for (unsigned I = 0; I != MaskElems; ++I)
+ if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+ return false;
+ return true;
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SmallVector<int, 16> Mask = SVI.getShuffleMask();
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
bool MadeChange = false;
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
if (Mask[i] < 0) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
continue;
}
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
Mask[i] = -1; // Turn into undef.
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
- Mask[i]));
+ Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
bool isLHSID, isRHSID;
- RecognizeIdentityMask(Mask, isLHSID, isRHSID);
+ recognizeIdentityMask(Mask, isLHSID, isRHSID);
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
return ReplaceInstUsesWith(SVI, V);
}
+ // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+ // a non-vector type. We can instead bitcast the original vector followed by
+ // an extract of the desired element:
+ //
+ // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+ // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // %1 = bitcast <4 x i8> %sroa to i32
+ // Becomes:
+ // %bc = bitcast <16 x i8> %in to <4 x i32>
+ // %ext = extractelement <4 x i32> %bc, i32 0
+ //
+ // If the shuffle is extracting a contiguous range of values from the input
+ // vector then each use which is a bitcast of the extracted size can be
+ // replaced. This will work if the vector types are compatible, and the begin
+ // index is aligned to a value in the casted vector type. If the begin index
+ // isn't aligned then we can shuffle the original vector (keeping the same
+ // vector type) before extracting.
+ //
+ // This code will bail out if the target type is fundamentally incompatible
+ // with vectors of the source type.
+ //
+ // Example of <16 x i8>, target type i32:
+ // Index range [4,8): v-----------v Will work.
+ // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ // <16 x i8>: | | | | | | | | | | | | | | | | |
+ // <4 x i32>: | | | | |
+ // +-----------+-----------+-----------+-----------+
+ // Index range [6,10): ^-----------^ Needs an extra shuffle.
+ // Target type i40: ^--------------^ Won't work, bail.
+ if (isShuffleExtractingFromLHS(SVI, Mask)) {
+ Value *V = LHS;
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ VectorType *SrcTy = cast<VectorType>(V->getType());
+ unsigned VecBitWidth = SrcTy->getBitWidth();
+ unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+ assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+ unsigned SrcNumElems = SrcTy->getNumElements();
+ SmallVector<BitCastInst *, 8> BCs;
+ DenseMap<Type *, Value *> NewBCs;
+ for (User *U : SVI.users())
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+ if (!BC->use_empty())
+ // Only visit bitcasts that weren't previously handled.
+ BCs.push_back(BC);
+ for (BitCastInst *BC : BCs) {
+ Type *TgtTy = BC->getDestTy();
+ unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+ if (!TgtElemBitWidth)
+ continue;
+ unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+ bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+ bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+ if (!VecBitWidthsEqual)
+ continue;
+ if (!VectorType::isValidElementType(TgtTy))
+ continue;
+ VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+ if (!BegIsAligned) {
+ // Shuffle the input so [0,NumElements) contains the output, and
+ // [NumElems,SrcNumElems) is undef.
+ SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+ UndefValue::get(Int32Ty));
+ for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+ ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+ V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(ShuffleMask),
+ SVI.getName() + ".extract");
+ BegIdx = 0;
+ }
+ unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+ assert(SrcElemsPerTgtElem);
+ BegIdx /= SrcElemsPerTgtElem;
+ bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+ auto *NewBC =
+ BCAlreadyExists
+ ? NewBCs[CastSrcTy]
+ : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+ if (!BCAlreadyExists)
+ NewBCs[CastSrcTy] = NewBC;
+ auto *Ext = Builder->CreateExtractElement(
+ NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+ // The shufflevector isn't being replaced: the bitcast that used it
+ // is. InstCombine will visit the newly-created instructions.
+ ReplaceInstUsesWith(*BC, Ext);
+ MadeChange = true;
+ }
+ }
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
SmallVector<Constant*, 16> Elts;
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
if (newMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));
// If the result mask is an identity, replace uses of this instruction with
// corresponding argument.
- if (VWidth == LHSWidth) {
- bool isLHSID, isRHSID;
- RecognizeIdentityMask(newMask, isLHSID, isRHSID);
- if (isLHSID) return ReplaceInstUsesWith(SVI, newLHS);
- if (isRHSID) return ReplaceInstUsesWith(SVI, newRHS);
- }
+ bool isLHSID, isRHSID;
+ recognizeIdentityMask(newMask, isLHSID, isRHSID);
+ if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
+ if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
return MadeChange ? &SVI : nullptr;
}