[InstCombine] Adding "\n" to debug output. NFC.

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineCasts.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 768616f423383c49d7a8b700222621a6787a436e..da835a19232200e321fb957d83b6a7a57993df77 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -83,7 +83,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    PointerType *PTy = cast<PointerType>(CI.getType());
  
    BuilderTy AllocaBuilder(*Builder);
-  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+  AllocaBuilder.SetInsertPoint(&AI);
  
    // Get the type really allocated and the type casted to.
    Type *AllocElTy = AI.getAllocatedType();
@@ -430,6 +430,50 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
    return false;
  }
  
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+///   trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+///   --->
+///   extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+                                         const DataLayout &DL) {
+  Value *TruncOp = Trunc.getOperand(0);
+  Type *DestType = Trunc.getType();
+  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+    return nullptr;
+
+  Value *VecInput = nullptr;
+  ConstantInt *ShiftVal = nullptr;
+  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+                                  m_LShr(m_BitCast(m_Value(VecInput)),
+                                         m_ConstantInt(ShiftVal)))) ||
+      !isa<VectorType>(VecInput->getType()))
+    return nullptr;
+
+  VectorType *VecType = cast<VectorType>(VecInput->getType());
+  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+    return nullptr;
+
+  // If the element type of the vector doesn't match the result type,
+  // bitcast it to a vector type that we can extract from.
+  unsigned NumVecElts = VecWidth / DestWidth;
+  if (VecType->getElementType() != DestType) {
+    VecType = VectorType::get(DestType, NumVecElts);
+    VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+  }
+
+  unsigned Elt = ShiftAmount / DestWidth;
+  if (DL.isBigEndian())
+    Elt = NumVecElts - 1 - Elt;
+
+  return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
  Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    if (Instruction *Result = commonCastTransforms(CI))
      return Result;
@@ -469,7 +513,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
  
    // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
    if (DestTy->getScalarSizeInBits() == 1) {
-    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Constant *One = ConstantInt::get(SrcTy, 1);
      Src = Builder->CreateAnd(Src, One);
      Value *Zero = Constant::getNullValue(Src->getType());
      return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
@@ -488,26 +532,49 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
      // If the shift amount is larger than the size of A, then the result is
      // known to be zero because all the input bits got shifted out.
      if (Cst->getZExtValue() >= ASize)
-      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy));
  
      // Since we're doing an lshr and a zero extend, and know that the shift
      // amount is smaller than ASize, it is always safe to do the shift in A's
      // type, then zero extend or truncate to the result.
      Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
      Shift->takeName(Src);
-    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+    return CastInst::CreateIntegerCast(Shift, DestTy, false);
+  }
+
+  // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+  // conversion.
+  // It works because bits coming from sign extension have the same value as
+  // the sign bit of the original value; performing ashr instead of lshr
+  // generates bits of the same value as the sign bit.
+  if (Src->hasOneUse() &&
+      match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+      cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+    const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    // This optimization can be only performed when zero bits generated by
+    // the original lshr aren't pulled into the value after truncation, so we
+    // can only shift by values smaller than the size of destination type (in
+    // bits).
+    if (Cst->getValue().ult(ASize)) {
+      Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+      Shift->takeName(Src);
+      return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+    }
    }
  
    // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
    // type isn't non-native.
-  if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
-      ShouldChangeType(Src->getType(), CI.getType()) &&
+  if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+      ShouldChangeType(SrcTy, DestTy) &&
        match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
-    Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+    Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
      return BinaryOperator::CreateAnd(NewTrunc,
-                                     ConstantExpr::getTrunc(Cst, CI.getType()));
+                                     ConstantExpr::getTrunc(Cst, DestTy));
    }
  
+  if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+    return I;
+
    return nullptr;
  }
  
@@ -792,7 +859,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
  
      // Okay, we can transform this!  Insert the new expression now.
      DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid zero extend: " << CI);
+          " to avoid zero extend: " << CI << '\n');
      Value *Res = EvaluateInDifferentType(Src, DestTy, false);
      assert(Res->getType() == DestTy);
  
@@ -1081,7 +1148,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
        canEvaluateSExtd(Src, DestTy)) {
      // Okay, we can transform this!  Insert the new expression now.
      DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid sign extend: " << CI);
+          " to avoid sign extend: " << CI << '\n');
      Value *Res = EvaluateInDifferentType(Src, DestTy, true);
      assert(Res->getType() == DestTy);
  
@@ -1190,7 +1257,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
    if (Instruction *I = commonCastTransforms(CI))
      return I;
    // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
-  // simpilify this expression to avoid one or more of the trunc/extend
+  // simplify this expression to avoid one or more of the trunc/extend
    // operations if we can do so without changing the numerical results.
    //
    // The exact manner in which the widths of the operands interact to limit
@@ -1330,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
          Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
                                                     CI.getType());
          Type *IntrinsicType[] = { CI.getType() };
-        Function *Overload =
-          Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
-                                    II->getIntrinsicID(), IntrinsicType);
+        Function *Overload = Intrinsic::getDeclaration(
+            CI.getModule(), II->getIntrinsicID(), IntrinsicType);
  
          Value *Args[] = { InnerTrunc };
          return CallInst::Create(Overload, Args, II->getName());
@@ -1695,63 +1761,29 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
    return Result;
  }
  
-
-/// See if we can optimize an integer->float/double bitcast.
-/// The various long double bitcasts can't get in here.
-static Instruction *optimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+                                              InstCombiner &IC,
                                                const DataLayout &DL) {
-  Value *Src = CI.getOperand(0);
-  Type *DestTy = CI.getType();
-
-  // If this is a bitcast from int to float, check to see if the int is an
-  // extraction from a vector.
-  Value *VecInput = nullptr;
-  // bitcast(trunc(bitcast(somevector)))
-  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
-
-      unsigned Elt = 0;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
+  // TODO: Create and use a pattern matcher for ExtractElementInst.
+  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+  if (!ExtElt || !ExtElt->hasOneUse())
+    return nullptr;
  
-  // bitcast(trunc(lshr(bitcast(somevector), cst))
-  ConstantInt *ShAmt = nullptr;
-  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
-                                m_ConstantInt(ShAmt)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
-        ShAmt->getZExtValue() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
+  // The bitcast must be to a vectorizable type, otherwise we can't make a new
+  // type to extract from.
+  Type *DestType = BitCast.getType();
+  if (!VectorType::isValidElementType(DestType))
+    return nullptr;
  
-      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
-  return nullptr;
+  unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+  auto *NewVecType = VectorType::get(DestType, NumElts);
+  auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+                                          NewVecType, "bc");
+  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
  }
  
  Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -1797,11 +1829,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      }
    }
  
-  // Try to optimize int -> float bitcasts.
-  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
-    if (Instruction *I = optimizeIntToFloatBitCast(CI, *this, DL))
-      return I;
-
    if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
      if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
@@ -1875,6 +1902,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      }
    }
  
+  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+    return I;
+
    if (SrcTy->isPointerTy())
      return commonPointerCastTransforms(CI);
    return commonCastTransforms(CI);