#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
- LoopInfo *Li, DominatorTree *Dt)
+ LoopInfo *Li, DominatorTree *Dt, AssumptionTracker *AT)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0),
F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
- Builder(Se->getContext()) {}
+ Builder(Se->getContext()) {
+ CodeMetrics::collectEphemeralValues(F, AT, EphValues);
+ }
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
/// This list holds pairs of (Internal Scalar : External User).
UserList ExternalUses;
+ /// Values used only by @llvm.assume calls.
+ SmallPtrSet<const Value *, 32> EphValues;
+
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherSeq;
/// A list of blocks that we are going to CSE.
// We now know that this is a vector of instructions of the same type from
// the same block.
+ // Don't vectorize ephemeral values.
+ for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+ if (EphValues.count(VL[i])) {
+ DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
+ ") is ephemeral.\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
// Check if this is a duplicate of another entry.
if (ScalarToTreeEntry.count(VL[0])) {
int Idx = ScalarToTreeEntry[VL[0]];
if (!ExtractCostCalculated.insert(I->Scalar))
continue;
+ // Uses by ephemeral values are free (because the ephemeral value will be
+ // removed prior to code generation, and so the extraction will be
+ // removed as well).
+ if (EphValues.count(I->User))
+ continue;
+
VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
I->Lane);
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ AssumptionTracker *AT;
bool runOnFunction(Function &F) override {
if (skipOptnoneFunction(F))
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ AT = &getAnalysis<AssumptionTracker>();
StoreRefs.clear();
bool Changed = false;
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT);
+ BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AT);
// Scan the blocks in the function in post order.
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AssumptionTracker>();
AU.addRequired<ScalarEvolution>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetTransformInfo>();
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
ret <4 x float> %rd
}
+declare void @llvm.assume(i1) nounwind
+
+; This entire tree is ephemeral, don't vectorize any of it.
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_eph(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+ %c0 = extractelement <4 x i32> %c, i32 0
+ %c1 = extractelement <4 x i32> %c, i32 1
+ %c2 = extractelement <4 x i32> %c, i32 2
+ %c3 = extractelement <4 x i32> %c, i32 3
+ %a0 = extractelement <4 x float> %a, i32 0
+ %a1 = extractelement <4 x float> %a, i32 1
+ %a2 = extractelement <4 x float> %a, i32 2
+ %a3 = extractelement <4 x float> %a, i32 3
+ %b0 = extractelement <4 x float> %b, i32 0
+ %b1 = extractelement <4 x float> %b, i32 1
+ %b2 = extractelement <4 x float> %b, i32 2
+ %b3 = extractelement <4 x float> %b, i32 3
+ %cmp0 = icmp ne i32 %c0, 0
+ %cmp1 = icmp ne i32 %c1, 0
+ %cmp2 = icmp ne i32 %c2, 0
+ %cmp3 = icmp ne i32 %c3, 0
+ %s0 = select i1 %cmp0, float %a0, float %b0
+ %s1 = select i1 %cmp1, float %a1, float %b1
+ %s2 = select i1 %cmp2, float %a2, float %b2
+ %s3 = select i1 %cmp3, float %a3, float %b3
+ %ra = insertelement <4 x float> undef, float %s0, i32 0
+ %rb = insertelement <4 x float> %ra, float %s1, i32 1
+ %rc = insertelement <4 x float> %rb, float %s2, i32 2
+ %rd = insertelement <4 x float> %rc, float %s3, i32 3
+ %q0 = extractelement <4 x float> %rd, i32 0
+ %q1 = extractelement <4 x float> %rd, i32 1
+ %q2 = extractelement <4 x float> %rd, i32 2
+ %q3 = extractelement <4 x float> %rd, i32 3
+ %q4 = fadd float %q0, %q1
+ %q5 = fadd float %q2, %q3
+ %q6 = fadd float %q4, %q5
+ %qi = fcmp olt float %q6, %q5
+ call void @llvm.assume(i1 %qi)
+ ret <4 x float> undef
+}
+
; Insert in an order different from the vector indices to make sure it
; doesn't matter
define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {