[InstCombine] canonicalize (bitcast (extractelement X)) --> (extractelement(bitcast X))

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineCasts.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index ad934804c412e204dc80cb86fb5940198058df0e..b90e4d846bc72e61da3a6fbdd0129fc059f485fe 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -501,7 +501,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
    // conversion.
    // It works because bits coming from sign extension have the same value as
-  // sign bit of the original value; performing ashr instead of lshr
+  // the sign bit of the original value; performing ashr instead of lshr
    // generates bits of the same value as the sign bit.
    if (Src->hasOneUse() &&
        match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
@@ -509,7 +509,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
      const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
      // This optimization can be only performed when zero bits generated by
      // the original lshr aren't pulled into the value after truncation, so we
-    // can only shift by values smaller then the size of destination type (in
+    // can only shift by values smaller than the size of destination type (in
      // bits).
      if (Cst->getValue().ult(ASize)) {
        Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
@@ -1210,7 +1210,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
    if (Instruction *I = commonCastTransforms(CI))
      return I;
    // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
-  // simpilify this expression to avoid one or more of the trunc/extend
+  // simplify this expression to avoid one or more of the trunc/extend
    // operations if we can do so without changing the numerical results.
    //
    // The exact manner in which the widths of the operands interact to limit
@@ -1715,62 +1715,78 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
    return Result;
  }
  
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+                                              InstCombiner &IC,
+                                              const DataLayout &DL) {
+  // TODO: Create and use a pattern matcher for ExtractElementInst.
+  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+  if (!ExtElt || !ExtElt->hasOneUse())
+    return nullptr;
+
+  // The bitcast must be to a vectorizable type, otherwise we can't make a new
+  // type to extract from.
+  Type *DestType = BitCast.getType();
+  if (!VectorType::isValidElementType(DestType))
+    return nullptr;
+
+  unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+  auto *NewVecType = VectorType::get(DestType, NumElts);
+  auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+                                          NewVecType, "bc");
+  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
+}
+
+static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy,
+                                         unsigned ShiftAmt, InstCombiner &IC,
+                                         const DataLayout &DL) {
+  VectorType *VecTy = cast<VectorType>(VecInput->getType());
+  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+  unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
+
+  if ((VecWidth % DestWidth != 0) || (ShiftAmt % DestWidth != 0))
+    return nullptr;
+
+  // If the element type of the vector doesn't match the result type,
+  // bitcast it to be a vector type we can extract from.
+  unsigned NumVecElts = VecWidth / DestWidth;
+  if (VecTy->getElementType() != DestTy) {
+    VecTy = VectorType::get(DestTy, NumVecElts);
+    VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+  }
+
+  unsigned Elt = ShiftAmt / DestWidth;
+  if (DL.isBigEndian())
+    Elt = NumVecElts - 1 - Elt;
+
+  return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
  
  /// See if we can optimize an integer->float/double bitcast.
  /// The various long double bitcasts can't get in here.
  static Instruction *optimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
                                                const DataLayout &DL) {
    Value *Src = CI.getOperand(0);
-  Type *DestTy = CI.getType();
+  Type *DstTy = CI.getType();
  
    // If this is a bitcast from int to float, check to see if the int is an
    // extraction from a vector.
    Value *VecInput = nullptr;
    // bitcast(trunc(bitcast(somevector)))
    if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
-
-      unsigned Elt = 0;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
+      isa<VectorType>(VecInput->getType()))
+    return foldVecTruncToExtElt(VecInput, DstTy, 0, IC, DL);
  
    // bitcast(trunc(lshr(bitcast(somevector), cst))
    ConstantInt *ShAmt = nullptr;
    if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
                                  m_ConstantInt(ShAmt)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
-        ShAmt->getZExtValue() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
+      isa<VectorType>(VecInput->getType()))
+    return foldVecTruncToExtElt(VecInput, DstTy, ShAmt->getZExtValue(), IC, DL);
  
-      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
    return nullptr;
  }
  
@@ -1895,6 +1911,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      }
    }
  
+  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+    return I;
+
    if (SrcTy->isPointerTy())
      return commonPointerCastTransforms(CI);
    return commonCastTransforms(CI);