Remove PHINode::reserveOperandSpace(). Instead, add a parameter to

[oota-llvm.git] / lib / Transforms / Scalar / ScalarReplAggregates.cpp
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp

index d4c953514ce3246dd72197124387c7e8457dc6a6..42246ff3216249572518494db3091b439d072170 100644 (file)
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
  /// optimization, which scans the uses of an alloca and determines if it can
  /// rewrite it in terms of a single new alloca that can be mem2reg'd.
  class ConvertToScalarInfo {
-  /// AllocaSize - The size of the alloca being considered.
+  /// AllocaSize - The size of the alloca being considered in bytes.
    unsigned AllocaSize;
    const TargetData &TD;
  
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
    /// also declared as a vector, we do want to promote to a vector.
    bool HadAVector;
  
+  /// HadNonMemTransferAccess - True if there is at least one access to the 
+  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
+  /// large integers unless there is some potential for optimization.
+  bool HadNonMemTransferAccess;
+
  public:
    explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
-    : AllocaSize(Size), TD(td) {
-    IsNotTrivial = false;
-    VectorTy = 0;
-    HadAVector = false;
-  }
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+      HadAVector(false), HadNonMemTransferAccess(false) { }
  
    AllocaInst *TryConvert(AllocaInst *AI);
  
  private:
    bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+  bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
  
    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
            << *VectorTy << '\n');
      NewTy = VectorTy;  // Use the vector type.
    } else {
+    unsigned BitWidth = AllocaSize * 8;
+    if (!HadAVector && !HadNonMemTransferAccess &&
+        !TD.fitsInLegalInteger(BitWidth))
+      return 0;
+
      DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
      // Create and insert the integer alloca.
-    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+    NewTy = IntegerType::get(AI->getContext(), BitWidth);
    }
    AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
    ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
  /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
  /// so far at the offset specified by Offset (which is specified in bytes).
  ///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
  ///   1) A union of vector types of the same size and potentially its elements.
  ///      Here we turn element accesses into insert/extract element operations.
  ///      This promotes a <4 x float> with a store of float to the third element
  ///      into a <4 x float> that uses insert element.
-///   2) A fully general blob of memory, which we turn into some (potentially
+///   2) A union of vector types with power-of-2 size differences, e.g. a float,
+///      <2 x float> and <4 x float>.  Here we turn element accesses into insert
+///      and extract element operations, and <2 x float> accesses into a cast to
+///      <2 x double>, an extract, and a cast back to <2 x float>.
+///   3) A fully general blob of memory, which we turn into some (potentially
  ///      large) integer type with extract and insert operations where the loads
  ///      and stores would mutate the memory.  We mark this by setting VectorTy
  ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+                                      bool IsLoadOrStore) {
    // If we already decided to turn this into a blob of integer memory, there is
    // nothing to be done.
    if (VectorTy && VectorTy->isVoidTy())
@@ -314,26 +327,19 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
    // If the In type is a vector that is the same size as the alloca, see if it
    // matches the existing VecTy.
    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
-    // Remember if we saw a vector type.
-    HadAVector = true;
-
-    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
-      // If we're storing/loading a vector of the right size, allow it as a
-      // vector.  If this the first vector we see, remember the type so that
-      // we know the element size.  If this is a subsequent access, ignore it
-      // even if it is a differing type but the same size.  Worst case we can
-      // bitcast the resultant vectors.
-      if (VectorTy == 0)
-        VectorTy = VInTy;
+    if (MergeInVectorType(VInTy, Offset))
        return;
-    }
    } else if (In->isFloatTy() || In->isDoubleTy() ||
               (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // Full width accesses can be ignored, because they can always be turned
+    // into bitcasts.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (IsLoadOrStore && EltSize == AllocaSize)
+      return;
      // If we're accessing something that could be an element of a vector, see
      // if the implied vector agrees with what we already have and if Offset is
      // compatible with it.
-    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
      if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
          (VectorTy == 0 ||
           cast<VectorType>(VectorTy)->getElementType()
@@ -349,6 +355,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
    VectorTy = Type::getVoidTy(In->getContext());
  }
  
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+                                            uint64_t Offset) {
+  // Remember if we saw a vector type.
+  HadAVector = true;
+
+  // TODO: Support nonzero offsets?
+  if (Offset != 0)
+    return false;
+
+  // Only allow vectors that are a power-of-2 away from the size of the alloca.
+  if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+    return false;
+
+  // If this the first vector we see, remember the type so that we know the
+  // element size.
+  if (!VectorTy) {
+    VectorTy = VInTy;
+    return true;
+  }
+
+  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  unsigned InBitWidth = VInTy->getBitWidth();
+
+  // Vectors of the same size can be converted using a simple bitcast.
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+    return true;
+
+  const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+  const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+  // Do not allow mixed integer and floating-point accesses from vectors of
+  // different sizes.
+  if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+    return false;
+
+  if (ElementTy->isFloatingPointTy()) {
+    // Only allow floating-point vectors of different sizes if they have the
+    // same element type.
+    // TODO: This could be loosened a bit, but would anything benefit?
+    if (ElementTy != InElementTy)
+      return false;
+
+    // There are no arbitrary-precision floating-point types, which limits the
+    // number of legal vector types with larger element types that we can form
+    // to bitcast and extract a subvector.
+    // TODO: We could support some more cases with mixed fp128 and double here.
+    if (!(BitWidth == 64 || BitWidth == 128) ||
+        !(InBitWidth == 64 || InBitWidth == 128))
+      return false;
+  } else {
+    assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+                                       "or floating-point.");
+    unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+    unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+    // Do not allow integer types smaller than a byte or types whose widths are
+    // not a multiple of a byte.
+    if (BitWidth < 8 || InBitWidth < 8 ||
+        BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+      return false;
+  }
+
+  // Pick the largest of the two vector types.
+  if (InBitWidth > BitWidth)
+    VectorTy = VInTy;
+
+  return true;
+}
+
  /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
  /// its accesses to a single vector type, return true and set VecTy to
  /// the new type.  If we could convert the alloca into a single promotable
@@ -369,7 +446,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
        // Don't touch MMX operations.
        if (LI->getType()->isX86_MMXTy())
          return false;
-      MergeInType(LI->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(LI->getType(), Offset, true);
        continue;
      }
  
@@ -379,7 +457,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
        // Don't touch MMX operations.
        if (SI->getOperand(0)->getType()->isX86_MMXTy())
          return false;
-      MergeInType(SI->getOperand(0)->getType(), Offset);
+      HadNonMemTransferAccess = true;
+      MergeInType(SI->getOperand(0)->getType(), Offset, true);
        continue;
      }
  
@@ -403,6 +482,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
        if (!CanConvertToScalar(GEP, Offset+GEPOffset))
          return false;
        IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
        continue;
      }
  
@@ -414,6 +494,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
            !isa<ConstantInt>(MSI->getLength()))
          return false;
        IsNotTrivial = true;  // Can't be mem2reg'd.
+      HadNonMemTransferAccess = true;
        continue;
      }
  
@@ -529,9 +610,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
        // If the source and destination are both to the same alloca, then this is
        // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
        // as appropriate.
-      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, 0));
+      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
  
-      if (GetUnderlyingObject(MTI->getSource(), 0) != OrigAI) {
+      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
          // Dest must be OrigAI, change this to be a load from the original
          // pointer (bitcasted), then a store to our new alloca.
          assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -547,7 +628,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
          LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
          SrcVal->setAlignment(MTI->getAlignment());
          Builder.CreateStore(SrcVal, NewAI);
-      } else if (GetUnderlyingObject(MTI->getDest(), 0) != OrigAI) {
+      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
          // Src must be OrigAI, change this to be a load from NewAI then a store
          // through the original dest pointer (bitcasted).
          assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -575,6 +656,26 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
    }
  }
  
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input type must be an integer or float, and
+/// the resulting type must be an integer, float or double.
+static const Type *getScaledElementType(const Type *OldTy,
+                                        unsigned NewBitWidth) {
+  assert((OldTy->isIntegerTy() || OldTy->isFloatTy()) && "Partial vector "
+         "accesses must be scaled from integer or float elements.");
+
+  LLVMContext &Context = OldTy->getContext();
+
+  if (OldTy->isIntegerTy())
+    return Type::getIntNTy(Context, NewBitWidth);
+  if (NewBitWidth == 32)
+    return Type::getFloatTy(Context);
+  if (NewBitWidth == 64)
+    return Type::getDoubleTy(Context);
+
+  llvm_unreachable("Invalid type for a partial vector access of an alloca!");
+}
+
  /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
  /// or vector value FromVal, extracting the bits from the offset specified by
  /// Offset.  This returns the value, which is of type ToType.
@@ -595,9 +696,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
    // If the result alloca is a vector type, this is either an element
    // access or a bitcast to another vector type of the same size.
    if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy())
+    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+    if (ToTypeSize == AllocaSize)
        return Builder.CreateBitCast(FromVal, ToType, "tmp");
  
+    if (ToType->isVectorTy()) {
+      assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
+             "Partial vector access of an alloca must have a power-of-2 size "
+             "ratio.");
+      assert(Offset == 0 && "Can't extract a value of a smaller vector type "
+                            "from a nonzero offset.");
+
+      const Type *ToElementTy = cast<VectorType>(ToType)->getElementType();
+      const Type *CastElementTy = getScaledElementType(ToElementTy,
+                                                       ToTypeSize * 8);
+      unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+      LLVMContext &Context = FromVal->getContext();
+      const Type *CastTy = VectorType::get(CastElementTy,
+                                           NumCastVectorElements);
+      Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+      Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+                                        Type::getInt32Ty(Context), 0), "tmp");
+      return Builder.CreateBitCast(Extract, ToType, "tmp");
+    }
+
      // Otherwise it must be an element access.
      unsigned Elt = 0;
      if (Offset) {
@@ -717,6 +840,27 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
      if (ValSize == VecSize)
        return Builder.CreateBitCast(SV, AllocaType, "tmp");
  
+    if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
+      assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
+                            "a nonzero offset.");
+
+      const Type *ToElementTy =
+        cast<VectorType>(SV->getType())->getElementType();
+      const Type *CastElementTy = getScaledElementType(ToElementTy, ValSize);
+      unsigned NumCastVectorElements = VecSize / ValSize;
+
+      LLVMContext &Context = SV->getContext();
+      const Type *OldCastTy = VectorType::get(CastElementTy,
+                                              NumCastVectorElements);
+      Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
+
+      Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+      Value *Insert =
+        Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+                                    Type::getInt32Ty(Context), 0), "tmp");
+      return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+    }
+
      uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
  
      // Must be an element insertion.
@@ -1083,7 +1227,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
      }
      
      const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
-    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+                                     PN->getName()+".ld", PN);
  
      // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
      // matter which one we get and if any differ, it doesn't matter.
@@ -1916,7 +2061,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
            if (EltTy != ValTy) {
              unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
              SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = ConstantVector::get(&Elts[0], NumElts);
+            StoreVal = ConstantVector::get(Elts);
            }
          }
          new StoreInst(StoreVal, EltPtr, MI);