#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
LoopInfo *LI;
/// Dominator Tree.
DominatorTree *DT;
+ /// Alias Analysis.
+ AliasAnalysis *AA;
/// Data Layout.
const DataLayout *DL;
/// Target Library Info.
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI,
- Function *F)
+ AliasAnalysis *AA, Function *F)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr),
+ DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr),
WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
}
Ends.clear();
IsWritePtr.clear();
DependencySetId.clear();
+ AliasSetId.clear();
}
/// Insert a pointer and calculate the start and end SCEVs.
void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr,
- unsigned DepSetId, ValueToValueMap &Strides);
+ unsigned DepSetId, unsigned ASId, ValueToValueMap &Strides);
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the id of the set of pointers that could be dependent because of a
/// shared underlying object.
SmallVector<unsigned, 2> DependencySetId;
+ /// Holds the id of the disjoint alias set to which this pointer belongs.
+ SmallVector<unsigned, 2> AliasSetId;
};
/// A struct for saving information about induction variables.
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
+ /// Alias analysis.
+ AliasAnalysis *AA;
/// Parent function
Function *TheFunction;
DominatorTree *DT;
BlockFrequencyInfo *BFI;
TargetLibraryInfo *TLI;
+ AliasAnalysis *AA;
bool DisableUnrolling;
bool AlwaysVectorize;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BFI = &getAnalysis<BlockFrequencyInfo>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<AliasAnalysis>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AliasAnalysis>();
}
};
void LoopVectorizationLegality::RuntimePointerCheck::insert(
ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
- ValueToValueMap &Strides) {
+ unsigned ASId, ValueToValueMap &Strides) {
// Get the stride replaced scev.
const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
Ends.push_back(ScEnd);
IsWritePtr.push_back(WritePtr);
DependencySetId.push_back(DepSetId);
+ AliasSetId.push_back(ASId);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
// Only need to check pointers between two different dependency sets.
if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
continue;
+ // Only need to check pointers in the same alias set.
+ if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
+ continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
/// \brief Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
- AccessAnalysis(const DataLayout *Dl, DepCandidates &DA) :
- DL(Dl), DepCands(DA), AreAllWritesIdentified(true),
- AreAllReadsIdentified(true), IsRTCheckNeeded(false) {}
+ AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
+ DL(Dl), AA(AA), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
/// \brief Register a load and whether it is only read from.
- void addLoad(Value *Ptr, bool IsReadOnly) {
+ void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.TBAATag);
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
}
/// \brief Register a store.
- void addStore(Value *Ptr) {
+ void addStore(AliasAnalysis::Location &Loc) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.TBAATag);
Accesses.insert(MemAccessInfo(Ptr, true));
}
/// \brief Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
void buildDependenceSets() {
- // Process read-write pointers first.
- processMemAccesses(false);
- // Next, process read pointers.
- processMemAccesses(true);
+ processMemAccesses();
}
bool isRTCheckNeeded() { return IsRTCheckNeeded; }
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
- typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
- /// \brief Go over all memory access or only the deferred ones if
- /// \p UseDeferred is true and check whether runtime pointer checks are needed
- /// and build sets of dependency check candidates.
- void processMemAccesses(bool UseDeferred);
+ /// \brief Go over all memory access and check whether runtime pointer checks
+ /// are needed /// and build sets of dependency check candidates.
+ void processMemAccesses();
/// Set of all accesses.
PtrAccessSet Accesses;
- /// Set of access to check after all writes have been processed.
- PtrAccessSet DeferredAccesses;
-
- /// Map of pointers to last access encountered.
- UnderlyingObjToAccessMap ObjToLastAccess;
-
/// Set of accesses that need a further dependence check.
MemAccessInfoSet CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
- /// Set of underlying objects already written to.
- SmallPtrSet<Value*, 16> WriteObjects;
-
const DataLayout *DL;
+ AliasAnalysis *AA;
+
+ /// An alias set tracker to partition the access set by underlying object and
+ //intrinsic property (such as TBAA metadata).
+ AliasSetTracker AST;
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
DepCandidates &DepCands;
- bool AreAllWritesIdentified;
- bool AreAllReadsIdentified;
bool IsRTCheckNeeded;
};
ValueToValueMap &StridesMap, bool ShouldCheckStride) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- unsigned NumReadPtrChecks = 0;
- unsigned NumWritePtrChecks = 0;
bool CanDoRT = true;
bool IsDepCheckNeeded = isDependencyCheckNeeded();
- // We assign consecutive id to access from different dependence sets.
- // Accesses within the same set don't need a runtime check.
- unsigned RunningDepId = 1;
- DenseMap<Value *, unsigned> DepSetId;
-
- for (PtrAccessSet::iterator AI = Accesses.begin(), AE = Accesses.end();
- AI != AE; ++AI) {
- const MemAccessInfo &Access = *AI;
- Value *Ptr = Access.getPointer();
- bool IsWrite = Access.getInt();
-
- // Just add write checks if we have both.
- if (!IsWrite && Accesses.count(MemAccessInfo(Ptr, true)))
- continue;
+ NumComparisons = 0;
- if (IsWrite)
- ++NumWritePtrChecks;
- else
- ++NumReadPtrChecks;
-
- if (hasComputableBounds(SE, StridesMap, Ptr) &&
- // When we run after a failing dependency check we have to make sure we
- // don't have wrapping pointers.
- (!ShouldCheckStride ||
- isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
- // The id of the dependence set.
- unsigned DepId;
-
- if (IsDepCheckNeeded) {
- Value *Leader = DepCands.getLeaderValue(Access).getPointer();
- unsigned &LeaderId = DepSetId[Leader];
- if (!LeaderId)
- LeaderId = RunningDepId++;
- DepId = LeaderId;
- } else
- // Each access has its own dependence set.
- DepId = RunningDepId++;
-
- RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, StridesMap);
-
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
- } else {
- CanDoRT = false;
+ // We assign a consecutive id to access from different alias sets.
+ // Accesses between different groups doesn't need to be checked.
+ unsigned ASId = 1;
+ for (auto &AS : AST) {
+ unsigned NumReadPtrChecks = 0;
+ unsigned NumWritePtrChecks = 0;
+
+ // We assign consecutive id to access from different dependence sets.
+ // Accesses within the same set don't need a runtime check.
+ unsigned RunningDepId = 1;
+ DenseMap<Value *, unsigned> DepSetId;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+ MemAccessInfo Access(Ptr, IsWrite);
+
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
+ if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ // When we run after a failing dependency check we have to make sure we
+ // don't have wrapping pointers.
+ (!ShouldCheckStride ||
+ isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (IsDepCheckNeeded) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
+ } else {
+ CanDoRT = false;
+ }
}
- }
- if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
- NumComparisons = 0; // Only one dependence set.
- else {
- NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
- NumWritePtrChecks - 1));
+ if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+ NumComparisons += 0; // Only one dependence set.
+ else {
+ NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
+ NumWritePtrChecks - 1));
+ }
+
+ ++ASId;
}
// If the pointers that we would use for the bounds comparison have different
// Only need to check pointers between two different dependency sets.
if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
continue;
+ // Only need to check pointers in the same alias set.
+ if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ continue;
Value *PtrI = RtCheck.Pointers[i];
Value *PtrJ = RtCheck.Pointers[j];
return CanDoRT;
}
-static bool isFunctionScopeIdentifiedObject(Value *Ptr) {
- return isNoAliasArgument(Ptr) || isNoAliasCall(Ptr) || isa<AllocaInst>(Ptr);
-}
-
-void AccessAnalysis::processMemAccesses(bool UseDeferred) {
+void AccessAnalysis::processMemAccesses() {
// We process the set twice: first we process read-write pointers, last we
// process read-only pointers. This allows us to skip dependence tests for
// read-only pointers.
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
- for (PtrAccessSet::iterator AI = S.begin(), AE = S.end(); AI != AE; ++AI) {
- const MemAccessInfo &Access = *AI;
- Value *Ptr = Access.getPointer();
- bool IsWrite = Access.getInt();
-
- DepCands.insert(Access);
-
- // Memorize read-only pointers for later processing and skip them in the
- // first round (they need to be checked after we have seen all write
- // pointers). Note: we also mark pointer that are not consecutive as
- // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need the
- // second check for "!IsWrite".
- bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
- if (!UseDeferred && IsReadOnlyPtr) {
- DeferredAccesses.insert(Access);
- continue;
- }
+ DEBUG(dbgs() << "LV: Processing memory accesses...\n");
+ DEBUG(dbgs() << " AST: "; AST.dump());
+ DEBUG(dbgs() << "LV: Accesses:\n");
+ DEBUG({
+ for (auto A : Accesses)
+ dbgs() << "\t" << *A.getPointer() << " (" <<
+ (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+ "read-only" : "read")) << ")\n";
+ });
+
+ // The AliasSetTracker has nicely partitioned our pointers by metadata
+ // compatibility and potential for underlying-object overlap. As a result, we
+ // only need to check for potential pointer dependencies within each alias
+ // set.
+ for (auto &AS : AST) {
+ // Note that both the alias-set tracker and the alias sets themselves used
+ // linked lists internally and so the iteration order here is deterministic
+ // (matching the original instruction order within each set).
+
+ bool SetHasWrite = false;
+
+ // Map of pointers to last access encountered.
+ typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ UnderlyingObjToAccessMap ObjToLastAccess;
+
+ // Set of access to check after all writes have been processed.
+ PtrAccessSet DeferredAccesses;
+
+ // Iterate over each alias set twice, once to process read/write pointers,
+ // and then to process read-only pointers.
+ for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+ bool UseDeferred = SetIteration > 0;
+ PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = S.count(MemAccessInfo(Ptr, true));
+
+ // If we're using the deferred access set, then it contains only reads.
+ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+ if (UseDeferred && !IsReadOnlyPtr)
+ continue;
+ // Otherwise, the pointer must be in the PtrAccessSet, either as a read
+ // or a write.
+ assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+ S.count(MemAccessInfo(Ptr, false))) &&
+ "Alias-set pointer not in the access set?");
+
+ MemAccessInfo Access(Ptr, IsWrite);
+ DepCands.insert(Access);
+
+ // Memorize read-only pointers for later processing and skip them in the
+ // first round (they need to be checked after we have seen all write
+ // pointers). Note: we also mark pointer that are not consecutive as
+ // "read-only" pointers (so that we check "a[b[i]] +="). Hence, we need
+ // the second check for "!IsWrite".
+ if (!UseDeferred && IsReadOnlyPtr) {
+ DeferredAccesses.insert(Access);
+ continue;
+ }
- bool NeedDepCheck = false;
- // Check whether there is the possibility of dependency because of
- // underlying objects being the same.
- typedef SmallVector<Value*, 16> ValueVector;
- ValueVector TempObjects;
- GetUnderlyingObjects(Ptr, TempObjects, DL);
- for (ValueVector::iterator UI = TempObjects.begin(), UE = TempObjects.end();
- UI != UE; ++UI) {
- Value *UnderlyingObj = *UI;
-
- // If this is a write then it needs to be an identified object. If this a
- // read and all writes (so far) are identified function scope objects we
- // don't need an identified underlying object but only an Argument (the
- // next write is going to invalidate this assumption if it is
- // unidentified).
- // This is a micro-optimization for the case where all writes are
- // identified and we have one argument pointer.
- // Otherwise, we do need a runtime check.
- if ((IsWrite && !isFunctionScopeIdentifiedObject(UnderlyingObj)) ||
- (!IsWrite && (!AreAllWritesIdentified ||
- !isa<Argument>(UnderlyingObj)) &&
- !isIdentifiedObject(UnderlyingObj))) {
- DEBUG(dbgs() << "LV: Found an unidentified " <<
- (IsWrite ? "write" : "read" ) << " ptr: " << *UnderlyingObj <<
- "\n");
- IsRTCheckNeeded = (IsRTCheckNeeded ||
- !isIdentifiedObject(UnderlyingObj) ||
- !AreAllReadsIdentified);
+ // If this is a write - check other reads and writes for conflicts. If
+ // this is a read only check other writes for conflicts (but only if
+ // there is no other write to the ptr - this is an optimization to
+ // catch "a[i] = a[i] + " without having to do a dependence check).
+ if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+ CheckDeps.insert(Access);
+ IsRTCheckNeeded = true;
+ }
if (IsWrite)
- AreAllWritesIdentified = false;
- if (!IsWrite)
- AreAllReadsIdentified = false;
+ SetHasWrite = true;
+
+ // Create sets of pointers connected by a shared alias set and
+ // underlying object.
+ typedef SmallVector<Value*, 16> ValueVector;
+ ValueVector TempObjects;
+ GetUnderlyingObjects(Ptr, TempObjects, DL);
+ for (Value *UnderlyingObj : TempObjects) {
+ UnderlyingObjToAccessMap::iterator Prev =
+ ObjToLastAccess.find(UnderlyingObj);
+ if (Prev != ObjToLastAccess.end())
+ DepCands.unionSets(Access, Prev->second);
+
+ ObjToLastAccess[UnderlyingObj] = Access;
+ }
}
-
- // If this is a write - check other reads and writes for conflicts. If
- // this is a read only check other writes for conflicts (but only if there
- // is no other write to the ptr - this is an optimization to catch "a[i] =
- // a[i] + " without having to do a dependence check).
- if ((IsWrite || IsReadOnlyPtr) && WriteObjects.count(UnderlyingObj))
- NeedDepCheck = true;
-
- if (IsWrite)
- WriteObjects.insert(UnderlyingObj);
-
- // Create sets of pointers connected by shared underlying objects.
- UnderlyingObjToAccessMap::iterator Prev =
- ObjToLastAccess.find(UnderlyingObj);
- if (Prev != ObjToLastAccess.end())
- DepCands.unionSets(Access, Prev->second);
-
- ObjToLastAccess[UnderlyingObj] = Access;
}
-
- if (NeedDepCheck)
- CheckDeps.insert(Access);
}
}
if (!AIsWrite && !BIsWrite)
return false;
+ // We cannot check pointers in different address spaces.
+ if (APtr->getType()->getPointerAddressSpace() !=
+ BPtr->getType()->getPointerAddressSpace())
+ return true;
+
const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
}
AccessAnalysis::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(DL, DependentAccesses);
+ AccessAnalysis Accesses(DL, AA, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr)) {
++NumReadWrites;
- Accesses.addStore(Ptr);
+
+ AliasAnalysis::Location Loc = AA->getLocation(ST);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(ST->getParent()))
+ Loc.TBAATag = nullptr;
+
+ Accesses.addStore(Loc);
}
}
++NumReads;
IsReadOnlyPtr = true;
}
- Accesses.addLoad(Ptr, IsReadOnlyPtr);
+
+ AliasAnalysis::Location Loc = AA->getLocation(LD);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(LD->getParent()))
+ Loc.TBAATag = nullptr;
+
+ Accesses.addLoad(Loc, IsReadOnlyPtr);
}
// If we write (or read-write) to a single destination and there are no
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
--- /dev/null
+; RUN: opt < %s -tbaa -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %conv = fptosi float %0 to i32
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+
+; TBAA partitions the accesses in this loop, so it can be vectorized without
+; runtime checks.
+
+; CHECK-LABEL: @test1
+; CHECK: entry:
+; CHECK-NEXT: br label %vector.body
+; CHECK: vector.body:
+
+; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test1
+; CHECK-NOTBAA: icmp uge i32*
+
+; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4, !tbaa !4
+ %conv = sitofp i32 %1 to float
+ %mul = fmul float %0, %conv
+ %arrayidx4 = getelementptr inbounds float* %c, i64 %indvars.iv
+ store float %mul, float* %arrayidx4, align 4, !tbaa !0
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+
+; This test is like the first, except here there is still one runtime check
+; required. Without TBAA, however, two checks are required.
+
+; CHECK-LABEL: @test2
+; CHECK: icmp uge float*
+; CHECK: icmp uge float*
+; CHECK-NOT: icmp uge i32*
+
+; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK: ret i32 0
+
+; CHECK-NOTBAA-LABEL: @test2
+; CHECK-NOTBAA: icmp uge float*
+; CHECK-NOTBAA: icmp uge float*
+; CHECK-NOTBAA-DAG: icmp uge float*
+; CHECK-NOTBAA-DAG: icmp uge i32*
+
+; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
+
+; CHECK-NOTBAA: ret i32 0
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"float", metadata !2, i64 0}
+!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"int", metadata !2, i64 0}
+