Rework the rewriting of loads and stores for vector and integer allocas

author Chandler Carruth <chandlerc@gmail.com>

Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index b0b618860411fc4ea138852cdc90ca20d29209c9..15103d784bc7e9a618b97c31ebf2fe92e56dbc81 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1382,11 +1382,7 @@ class SROA : public FunctionPass {
    /// \brief A collection of instructions to delete.
    /// We try to batch deletions to simplify code and make things a bit more
    /// efficient.
    /// \brief A collection of instructions to delete.
    /// We try to batch deletions to simplify code and make things a bit more
    /// efficient.
-  SmallVector<Instruction *, 8> DeadInsts;
-
-  /// \brief A set to prevent repeatedly marking an instruction split into many
-  /// uses as dead. Only used to guard insertion into DeadInsts.
-  SmallPtrSet<Instruction *, 4> DeadSplitInsts;
+  SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
  
    /// \brief Post-promotion worklist.
    ///
  
    /// \brief Post-promotion worklist.
    ///
@@ -1573,7 +1569,7 @@ private:
      do {
        LoadInst *LI = Loads.pop_back_val();
        LI->replaceAllUsesWith(NewPN);
      do {
        LoadInst *LI = Loads.pop_back_val();
        LI->replaceAllUsesWith(NewPN);
-      Pass.DeadInsts.push_back(LI);
+      Pass.DeadInsts.insert(LI);
      } while (!Loads.empty());
  
      // Inject loads into all of the pred blocks.
      } while (!Loads.empty());
  
      // Inject loads into all of the pred blocks.
@@ -1717,7 +1713,7 @@ private:
  
        DEBUG(dbgs() << "          speculated to: " << *V << "\n");
        LI->replaceAllUsesWith(V);
  
        DEBUG(dbgs() << "          speculated to: " << *V << "\n");
        LI->replaceAllUsesWith(V);
-      Pass.DeadInsts.push_back(LI);
+      Pass.DeadInsts.insert(LI);
      }
    }
  };
      }
    }
  };
@@ -2134,8 +2130,13 @@ static bool isVectorPromotionViable(const DataLayout &TD,
      } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
        // Disable vector promotion when there are loads or stores of an FCA.
        return false;
      } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
        // Disable vector promotion when there are loads or stores of an FCA.
        return false;
-    } else if (!isa<LoadInst>(I->U->getUser()) &&
-               !isa<StoreInst>(I->U->getUser())) {
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+      if (LI->isVolatile())
+        return false;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+      if (SI->isVolatile())
+        return false;
+    } else {
        return false;
      }
    }
        return false;
      }
    }
@@ -2241,18 +2242,23 @@ static bool isIntegerWideningViable(const DataLayout &TD,
  static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
                               IntegerType *Ty, uint64_t Offset,
                               const Twine &Name) {
  static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
                               IntegerType *Ty, uint64_t Offset,
                               const Twine &Name) {
+  DEBUG(dbgs() << "       start: " << *V << "\n");
    IntegerType *IntTy = cast<IntegerType>(V->getType());
    assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
           "Element extends past full value");
    uint64_t ShAmt = 8*Offset;
    if (DL.isBigEndian())
      ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
    IntegerType *IntTy = cast<IntegerType>(V->getType());
    assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
           "Element extends past full value");
    uint64_t ShAmt = 8*Offset;
    if (DL.isBigEndian())
      ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
-  if (ShAmt)
+  if (ShAmt) {
      V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
      V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+    DEBUG(dbgs() << "     shifted: " << *V << "\n");
+  }
    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
           "Cannot extract to a larger integer!");
    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
           "Cannot extract to a larger integer!");
-  if (Ty != IntTy)
+  if (Ty != IntTy) {
      V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
      V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+    DEBUG(dbgs() << "     trunced: " << *V << "\n");
+  }
    return V;
  }
  
    return V;
  }
  
@@ -2262,20 +2268,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
    IntegerType *Ty = cast<IntegerType>(V->getType());
    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
           "Cannot insert a larger integer!");
    IntegerType *Ty = cast<IntegerType>(V->getType());
    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
           "Cannot insert a larger integer!");
-  if (Ty != IntTy)
+  DEBUG(dbgs() << "       start: " << *V << "\n");
+  if (Ty != IntTy) {
      V = IRB.CreateZExt(V, IntTy, Name + ".ext");
      V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+    DEBUG(dbgs() << "    extended: " << *V << "\n");
+  }
    assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
           "Element store outside of alloca store");
    uint64_t ShAmt = 8*Offset;
    if (DL.isBigEndian())
      ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
    assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
           "Element store outside of alloca store");
    uint64_t ShAmt = 8*Offset;
    if (DL.isBigEndian())
      ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
-  if (ShAmt)
+  if (ShAmt) {
      V = IRB.CreateShl(V, ShAmt, Name + ".shift");
      V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+    DEBUG(dbgs() << "     shifted: " << *V << "\n");
+  }
  
    if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
      APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
      Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
  
    if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
      APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
      Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+    DEBUG(dbgs() << "      masked: " << *Old << "\n");
      V = IRB.CreateOr(Old, V, Name + ".insert");
      V = IRB.CreateOr(Old, V, Name + ".insert");
+    DEBUG(dbgs() << "    inserted: " << *V << "\n");
    }
    return V;
  }
    }
    return V;
  }
@@ -2442,30 +2455,21 @@ private:
    void deleteIfTriviallyDead(Value *V) {
      Instruction *I = cast<Instruction>(V);
      if (isInstructionTriviallyDead(I))
    void deleteIfTriviallyDead(Value *V) {
      Instruction *I = cast<Instruction>(V);
      if (isInstructionTriviallyDead(I))
-      Pass.DeadInsts.push_back(I);
+      Pass.DeadInsts.insert(I);
    }
  
    }
  
-  bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
-    Value *Result;
+  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
      if (LI.getType() == VecTy->getElementType() ||
          BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
      if (LI.getType() == VecTy->getElementType() ||
          BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      Result = IRB.CreateExtractElement(
-        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
-        getIndex(IRB, BeginOffset), getName(".extract"));
-    } else {
-      Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
+      V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
+                                   getName(".extract"));
      }
      }
-    if (Result->getType() != LI.getType())
-      Result = convertValue(TD, IRB, Result, LI.getType());
-    LI.replaceAllUsesWith(Result);
-    Pass.DeadInsts.push_back(&LI);
-
-    DEBUG(dbgs() << "          to: " << *Result << "\n");
-    return true;
+    return V;
    }
  
    }
  
-  bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+  Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
      assert(IntTy && "We cannot insert an integer to the alloca");
      assert(!LI.isVolatile());
      Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
      assert(IntTy && "We cannot insert an integer to the alloca");
      assert(!LI.isVolatile());
      Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
@@ -2473,12 +2477,10 @@ private:
      V = convertValue(TD, IRB, V, IntTy);
      assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
      uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
      V = convertValue(TD, IRB, V, IntTy);
      assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
      uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-    V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                       getName(".extract"));
-    LI.replaceAllUsesWith(V);
-    Pass.DeadInsts.push_back(&LI);
-    DEBUG(dbgs() << "          to: " << *V << "\n");
-    return true;
+    if (Offset > 0 || EndOffset < NewAllocaEndOffset)
+      V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+                         getName(".extract"));
+    return V;
    }
  
    bool visitLoadInst(LoadInst &LI) {
    }
  
    bool visitLoadInst(LoadInst &LI) {
@@ -2488,7 +2490,29 @@ private:
      IRBuilder<> IRB(&LI);
  
      uint64_t Size = EndOffset - BeginOffset;
      IRBuilder<> IRB(&LI);
  
      uint64_t Size = EndOffset - BeginOffset;
-    if (Size < TD.getTypeStoreSize(LI.getType())) {
+    bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
+    Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
+                                    : LI.getType();
+    bool IsPtrAdjusted = false;
+    Value *V;
+    if (VecTy) {
+      V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+    } else if (IntTy && LI.getType()->isIntegerTy()) {
+      V = rewriteIntegerLoad(IRB, LI);
+    } else if (BeginOffset == NewAllocaBeginOffset &&
+               canConvertValue(TD, NewAllocaTy, LI.getType())) {
+      V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                LI.isVolatile(), getName(".load"));
+    } else {
+      Type *LTy = TargetTy->getPointerTo();
+      V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
+                                getPartitionTypeAlign(TargetTy),
+                                LI.isVolatile(), getName(".load"));
+      IsPtrAdjusted = true;
+    }
+    V = convertValue(TD, IRB, V, TargetTy);
+
+    if (IsSplitIntLoad) {
        assert(!LI.isVolatile());
        assert(LI.getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
        assert(!LI.isVolatile());
        assert(LI.getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
@@ -2498,21 +2522,8 @@ private:
        assert(LI.getType()->getIntegerBitWidth() ==
               TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
               "Only alloca-wide loads can be split and recomposed");
        assert(LI.getType()->getIntegerBitWidth() ==
               TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
               "Only alloca-wide loads can be split and recomposed");
-      IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
-      bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
-                           canConvertValue(TD, NewAllocaTy, NarrowTy);
-      Value *V;
        // Move the insertion point just past the load so that we can refer to it.
        IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
        // Move the insertion point just past the load so that we can refer to it.
        IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
-      if (IsConvertable)
-        V = convertValue(TD, IRB,
-                         IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                               getName(".load")),
-                         NarrowTy);
-      else
-        V = IRB.CreateAlignedLoad(
-          getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
-          getPartitionTypeAlign(NarrowTy), getName(".load"));
        // Create a placeholder value with the same type as LI to use as the
        // basis for the new value. This allows us to replace the uses of LI with
        // the computed value, and then replace the placeholder with LI, leaving
        // Create a placeholder value with the same type as LI to use as the
        // basis for the new value. This allows us to replace the uses of LI with
        // the computed value, and then replace the placeholder with LI, leaving
@@ -2524,104 +2535,40 @@ private:
        LI.replaceAllUsesWith(V);
        Placeholder->replaceAllUsesWith(&LI);
        delete Placeholder;
        LI.replaceAllUsesWith(V);
        Placeholder->replaceAllUsesWith(&LI);
        delete Placeholder;
-      if (Pass.DeadSplitInsts.insert(&LI))
-        Pass.DeadInsts.push_back(&LI);
-      DEBUG(dbgs() << "          to: " << *V << "\n");
-      return IsConvertable;
-    }
-
-    if (VecTy)
-      return rewriteVectorizedLoadInst(IRB, LI, OldOp);
-    if (IntTy && LI.getType()->isIntegerTy())
-      return rewriteIntegerLoad(IRB, LI);
-
-    if (BeginOffset == NewAllocaBeginOffset &&
-        canConvertValue(TD, NewAllocaTy, LI.getType())) {
-      Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                           LI.isVolatile(), getName(".load"));
-      Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
-      LI.replaceAllUsesWith(NewV);
-      Pass.DeadInsts.push_back(&LI);
-
-      DEBUG(dbgs() << "          to: " << *NewLI << "\n");
-      return !LI.isVolatile();
+    } else {
+      LI.replaceAllUsesWith(V);
      }
  
      }
  
-    assert(!IntTy && "Invalid load found with int-op widening enabled");
-
-    Value *NewPtr = getAdjustedAllocaPtr(IRB,
-                                         LI.getPointerOperand()->getType());
-    LI.setOperand(0, NewPtr);
-    LI.setAlignment(getPartitionTypeAlign(LI.getType()));
-    DEBUG(dbgs() << "          to: " << LI << "\n");
-
+    Pass.DeadInsts.insert(&LI);
      deleteIfTriviallyDead(OldOp);
      deleteIfTriviallyDead(OldOp);
-    return NewPtr == &NewAI && !LI.isVolatile();
-  }
-
-  bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy,
-                            unsigned Size) {
-    assert(!SI.isVolatile());
-    assert(ValueTy->isIntegerTy() &&
-           "Only integer type loads and stores are split");
-    assert(ValueTy->getIntegerBitWidth() ==
-           TD.getTypeStoreSizeInBits(ValueTy) &&
-           "Non-byte-multiple bit width");
-    assert(ValueTy->getIntegerBitWidth() ==
-           TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
-           "Only alloca-wide stores can be split and recomposed");
-    IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
-    Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
-                              BeginOffset, getName(".extract"));
-    StoreInst *NewSI;
-    bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
-      canConvertValue(TD, NarrowTy, NewAllocaTy);
-    if (IsConvertable)
-      NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
-                                     &NewAI, NewAI.getAlignment());
-    else
-      NewSI = IRB.CreateAlignedStore(
-        V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
-        getPartitionTypeAlign(NarrowTy));
-    (void)NewSI;
-    if (Pass.DeadSplitInsts.insert(&SI))
-      Pass.DeadInsts.push_back(&SI);
-
-    DEBUG(dbgs() << "          to: " << *NewSI << "\n");
-    return IsConvertable;
+    DEBUG(dbgs() << "          to: " << *V << "\n");
+    return !LI.isVolatile() && !IsPtrAdjusted;
    }
  
    }
  
-  bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
-                                  Value *OldOp) {
-    Value *V = SI.getValueOperand();
-    Type *ValueTy = V->getType();
-    if (ValueTy == ElementTy ||
+  bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+                                  StoreInst &SI, Value *OldOp) {
+    if (V->getType() == ElementTy ||
          BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
          BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      if (ValueTy != ElementTy)
+      if (V->getType() != ElementTy)
          V = convertValue(TD, IRB, V, ElementTy);
        LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                             getName(".load"));
        V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                    getName(".insert"));
          V = convertValue(TD, IRB, V, ElementTy);
        LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                             getName(".load"));
        V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                    getName(".insert"));
-    } else if (ValueTy != VecTy) {
-      uint64_t Size = EndOffset - BeginOffset;
-      if (Size < TD.getTypeStoreSize(ValueTy))
-        return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
-
+    } else if (V->getType() != VecTy) {
        V = convertValue(TD, IRB, V, VecTy);
      }
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
        V = convertValue(TD, IRB, V, VecTy);
      }
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-    Pass.DeadInsts.push_back(&SI);
+    Pass.DeadInsts.insert(&SI);
  
      (void)Store;
      DEBUG(dbgs() << "          to: " << *Store << "\n");
      return true;
    }
  
  
      (void)Store;
      DEBUG(dbgs() << "          to: " << *Store << "\n");
      return true;
    }
  
-  bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+  bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
      assert(IntTy && "We cannot extract an integer from the alloca");
      assert(!SI.isVolatile());
      assert(IntTy && "We cannot extract an integer from the alloca");
      assert(!SI.isVolatile());
-    Value *V = SI.getValueOperand();
      if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                           getName(".oldload"));
      if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                           getName(".oldload"));
@@ -2633,7 +2580,7 @@ private:
      }
      V = convertValue(TD, IRB, V, NewAllocaTy);
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
      }
      V = convertValue(TD, IRB, V, NewAllocaTy);
      StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-    Pass.DeadInsts.push_back(&SI);
+    Pass.DeadInsts.insert(&SI);
      (void)Store;
      DEBUG(dbgs() << "          to: " << *Store << "\n");
      return true;
      (void)Store;
      DEBUG(dbgs() << "          to: " << *Store << "\n");
      return true;
@@ -2645,46 +2592,53 @@ private:
      assert(OldOp == OldPtr);
      IRBuilder<> IRB(&SI);
  
      assert(OldOp == OldPtr);
      IRBuilder<> IRB(&SI);
  
-    if (VecTy)
-      return rewriteVectorizedStoreInst(IRB, SI, OldOp);
-    Type *ValueTy = SI.getValueOperand()->getType();
-
-    uint64_t Size = EndOffset - BeginOffset;
-    if (Size < TD.getTypeStoreSize(ValueTy))
-      return rewriteWideStoreInst(IRB, SI, ValueTy, Size);
-
-    if (IntTy && ValueTy->isIntegerTy())
-      return rewriteIntegerStore(IRB, SI);
+    Value *V = SI.getValueOperand();
  
      // Strip all inbounds GEPs and pointer casts to try to dig out any root
      // alloca that should be re-examined after promoting this alloca.
  
      // Strip all inbounds GEPs and pointer casts to try to dig out any root
      // alloca that should be re-examined after promoting this alloca.
-    if (ValueTy->isPointerTy())
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
-                                                  ->stripInBoundsOffsets()))
+    if (V->getType()->isPointerTy())
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
          Pass.PostPromotionWorklist.insert(AI);
  
          Pass.PostPromotionWorklist.insert(AI);
  
-    if (BeginOffset == NewAllocaBeginOffset &&
-        canConvertValue(TD, ValueTy, NewAllocaTy)) {
-      Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
-      StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
-                                                SI.isVolatile());
-      (void)NewSI;
-      Pass.DeadInsts.push_back(&SI);
-
-      DEBUG(dbgs() << "          to: " << *NewSI << "\n");
-      return !SI.isVolatile();
+    uint64_t Size = EndOffset - BeginOffset;
+    if (Size < TD.getTypeStoreSize(V->getType())) {
+      assert(!SI.isVolatile());
+      assert(V->getType()->isIntegerTy() &&
+             "Only integer type loads and stores are split");
+      assert(V->getType()->getIntegerBitWidth() ==
+             TD.getTypeStoreSizeInBits(V->getType()) &&
+             "Non-byte-multiple bit width");
+      assert(V->getType()->getIntegerBitWidth() ==
+             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+             "Only alloca-wide stores can be split and recomposed");
+      IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
+      V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
+                         getName(".extract"));
      }
  
      }
  
-    assert(!IntTy && "Invalid store found with int-op widening enabled");
-
-    Value *NewPtr = getAdjustedAllocaPtr(IRB,
-                                         SI.getPointerOperand()->getType());
-    SI.setOperand(1, NewPtr);
-    SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType()));
-    DEBUG(dbgs() << "          to: " << SI << "\n");
+    if (VecTy)
+      return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+    if (IntTy && V->getType()->isIntegerTy())
+      return rewriteIntegerStore(IRB, V, SI);
  
  
+    StoreInst *NewSI;
+    if (BeginOffset == NewAllocaBeginOffset &&
+        canConvertValue(TD, V->getType(), NewAllocaTy)) {
+      V = convertValue(TD, IRB, V, NewAllocaTy);
+      NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+                                     SI.isVolatile());
+    } else {
+      Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
+      NewSI = IRB.CreateAlignedStore(V, NewPtr,
+                                     getPartitionTypeAlign(V->getType()),
+                                     SI.isVolatile());
+    }
+    (void)NewSI;
+    Pass.DeadInsts.insert(&SI);
      deleteIfTriviallyDead(OldOp);
      deleteIfTriviallyDead(OldOp);
-    return NewPtr == &NewAI && !SI.isVolatile();
+
+    DEBUG(dbgs() << "          to: " << *NewSI << "\n");
+    return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
    }
  
    bool visitMemSetInst(MemSetInst &II) {
    }
  
    bool visitMemSetInst(MemSetInst &II) {
@@ -2704,8 +2658,7 @@ private:
      }
  
      // Record this instruction for deletion.
      }
  
      // Record this instruction for deletion.
-    if (Pass.DeadSplitInsts.insert(&II))
-      Pass.DeadInsts.push_back(&II);
+    Pass.DeadInsts.insert(&II);
  
      Type *AllocaTy = NewAI.getAllocatedType();
      Type *ScalarTy = AllocaTy->getScalarType();
  
      Type *AllocaTy = NewAI.getAllocatedType();
      Type *ScalarTy = AllocaTy->getScalarType();
@@ -2861,8 +2814,7 @@ private:
        return false;
      }
      // Record this instruction for deletion.
        return false;
      }
      // Record this instruction for deletion.
-    if (Pass.DeadSplitInsts.insert(&II))
-      Pass.DeadInsts.push_back(&II);
+    Pass.DeadInsts.insert(&II);
  
      bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
                           EndOffset == NewAllocaEndOffset;
  
      bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
                           EndOffset == NewAllocaEndOffset;
@@ -2972,8 +2924,7 @@ private:
      assert(II.getArgOperand(1) == OldPtr);
  
      // Record this instruction for deletion.
      assert(II.getArgOperand(1) == OldPtr);
  
      // Record this instruction for deletion.
-    if (Pass.DeadSplitInsts.insert(&II))
-      Pass.DeadInsts.push_back(&II);
+    Pass.DeadInsts.insert(&II);
  
      ConstantInt *Size
        = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
  
      ConstantInt *Size
        = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
@@ -3542,7 +3493,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
         DI != DE; ++DI) {
      Changed = true;
      (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
         DI != DE; ++DI) {
      Changed = true;
      (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
-    DeadInsts.push_back(*DI);
+    DeadInsts.insert(*DI);
    }
    for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
                                              DE = P.dead_op_end();
    }
    for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
                                              DE = P.dead_op_end();
@@ -3553,7 +3504,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
      if (Instruction *OldI = dyn_cast<Instruction>(OldV))
        if (isInstructionTriviallyDead(OldI)) {
          Changed = true;
      if (Instruction *OldI = dyn_cast<Instruction>(OldV))
        if (isInstructionTriviallyDead(OldI)) {
          Changed = true;
-        DeadInsts.push_back(OldI);
+        DeadInsts.insert(OldI);
        }
    }
  
        }
    }
  
@@ -3574,7 +3525,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
  /// We also record the alloca instructions deleted here so that they aren't
  /// subsequently handed to mem2reg to promote.
  void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
  /// We also record the alloca instructions deleted here so that they aren't
  /// subsequently handed to mem2reg to promote.
  void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
-  DeadSplitInsts.clear();
    while (!DeadInsts.empty()) {
      Instruction *I = DeadInsts.pop_back_val();
      DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
    while (!DeadInsts.empty()) {
      Instruction *I = DeadInsts.pop_back_val();
      DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
@@ -3586,7 +3536,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
          // Zero out the operand and see if it becomes trivially dead.
          *OI = 0;
          if (isInstructionTriviallyDead(U))
          // Zero out the operand and see if it becomes trivially dead.
          *OI = 0;
          if (isInstructionTriviallyDead(U))
-          DeadInsts.push_back(U);
+          DeadInsts.insert(U);
        }
  
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
        }
  
      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll

index 110950f76a1f4329fb861a4a7840999b587e046e..b363eefb3f9d9ca1e320b40d686009e2329e951d 100644 (file)
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1100,12 +1100,12 @@ entry:
    %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
    store float %phi.real, float* %real
    store float %phi.imag, float* %imag
    %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
    store float %phi.real, float* %real
    store float %phi.imag, float* %imag
+  ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
    ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
    ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
    ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
    ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
    ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
    ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
    ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
    ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
    ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
    ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
-  ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
    ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
    ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
    ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
    ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
    ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
    ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
diff --git a/test/Transforms/SROA/vector-extract.ll b/test/Transforms/SROA/vector-extract.ll

deleted file mode 100644 (file)

index 2ca6168..0000000
--- a/test/Transforms/SROA/vector-extract.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -sroa -S | FileCheck %s
-; rdar://12713675
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-define <2 x i16> @test1(i64 %x) nounwind ssp {
-; CHECK: @test1
-entry:
-  %tmp = alloca i64, align 8
-  br i1 undef, label %bb1, label %bb2
-; CHECK-NOT: alloca
-
-bb1:
-  store i64 %x, i64* %tmp, align 8
-; CHECK-NOT: store
-  %0 = bitcast i64* %tmp to <2 x i16>*
-  %1 = load <2 x i16>* %0, align 8
-; CHECK-NOT: load
-; CHECK: trunc i64 %x to i32
-; CHECK: bitcast i32
-  ret <2 x i16> %1
-
-bb2:
-  ret <2 x i16> < i16 0, i16 0 >
-}
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll

index 02e084bf1129bb0403677c4596cee130a8683fa4..ea28f5d1a647aada3f11e05a4647aa023e93ae78 100644 (file)
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -220,3 +220,48 @@ entry:
    ret i32 %load
  ; CHECK: ret i32
  }
    ret i32 %load
  ; CHECK: ret i32
  }
+
+define <2 x i8> @PR14349.1(i32 %x) {
+; CEHCK: @PR14349.1
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split store of an integer to a store of a vector.
+entry:
+  %a = alloca i32
+; CHECK-NOT: alloca
+
+  store i32 %x, i32* %a
+; CHECK-NOT: store
+
+  %cast = bitcast i32* %a to <2 x i8>*
+  %vec = load <2 x i8>* %cast
+; CHECK-NOT: load
+
+  ret <2 x i8> %vec
+; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
+; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
+; CHECK: ret <2 x i8> %[[cast]]
+}
+
+define i32 @PR14349.2(<2 x i8> %x) {
+; CEHCK: @PR14349.2
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split load of an integer to a load of a vector.
+entry:
+  %a = alloca i32
+; CHECK-NOT: alloca
+
+  %cast = bitcast i32* %a to <2 x i8>*
+  store <2 x i8> %x, <2 x i8>* %cast
+; CHECK-NOT: store
+
+  %int = load i32* %a
+; CHECK-NOT: load
+
+  ret i32 %int
+; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
+; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
+; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
+; CHECK: ret i32 %[[insert]]
+}
author	Chandler Carruth <chandlerc@gmail.com>
	Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Tue, 20 Nov 2012 01:12:50 +0000 (01:12 +0000)
lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
test/Transforms/SROA/basictest.ll		patch \| blob \| history
test/Transforms/SROA/vector-extract.ll	[deleted file]	patch \| blob \| history
test/Transforms/SROA/vector-promotion.ll		patch \| blob \| history