From 722b0a4d293b16eebaed94ae65d5f11743cbcea5 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 23 Jun 2013 21:57:27 +0000 Subject: [PATCH] SLP Vectorizer: Fix a bug in the code that does CSE on the generated gather sequences. Make sure that we don't replace and RAUW two sequences if one does not dominate the other. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 ++++-- test/Transforms/SLPVectorizer/X86/cse.ll | 51 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 838cb9599cb..5bc3d852e79 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -127,8 +127,9 @@ public: static const int MAX_COST = INT_MIN; FuncSLP(Function *Func, ScalarEvolution *Se, DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li) : - F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), + TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, + DominatorTree *Dt) : + F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), Builder(Se->getContext()) { for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { BasicBlock *BB = it; @@ -255,6 +256,7 @@ public: TargetTransformInfo *TTI; AliasAnalysis *AA; LoopInfo *LI; + DominatorTree *DT; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; }; @@ -1197,7 +1199,8 @@ void FuncSLP::optimizeGatherSequence() { // visited instructions. for (SmallPtrSet::iterator v = Visited.begin(), ve = Visited.end(); v != ve; ++v) { - if (Insert->isIdenticalTo(*v)) { + if (Insert->isIdenticalTo(*v) && + DT->dominates((*v)->getParent(), Insert->getParent())) { Insert->replaceAllUsesWith(*v); break; } @@ -1224,6 +1227,7 @@ struct SLPVectorizer : public FunctionPass { TargetTransformInfo *TTI; AliasAnalysis *AA; LoopInfo *LI; + DominatorTree *DT; virtual bool runOnFunction(Function &F) { SE = &getAnalysis(); @@ -1231,6 +1235,7 @@ struct SLPVectorizer : public FunctionPass { TTI = &getAnalysis(); AA = &getAnalysis(); LI = &getAnalysis(); + DT = &getAnalysis(); StoreRefs.clear(); bool Changed = false; @@ -1244,7 +1249,7 @@ struct SLPVectorizer : public FunctionPass { // Use the bollom up slp vectorizer to construct chains that start with // he store instructions. - FuncSLP R(&F, SE, DL, TTI, AA, LI); + FuncSLP R(&F, SE, DL, TTI, AA, LI, DT); for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) { BasicBlock *BB = it; @@ -1274,6 +1279,7 @@ struct SLPVectorizer : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } private: diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll index 6321b007683..d286798d704 100644 --- a/test/Transforms/SLPVectorizer/X86/cse.ll +++ b/test/Transforms/SLPVectorizer/X86/cse.ll @@ -83,3 +83,54 @@ entry: ret i32 undef } +; int test2(double *G, int k) { +; if (k) { +; G[0] = 1+G[5]*4; +; G[1] = 6+G[6]*3; +; } else { +; G[2] = 7+G[5]*4; +; G[3] = 8+G[6]*3; +; } +; } + +; We can't merge the gather sequences because one does not dominate the other. +; CHECK: test2 +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: insertelement +; CHECK: ret +define i32 @test2(double* nocapture %G, i32 %k) { + %1 = icmp eq i32 %k, 0 + %2 = getelementptr inbounds double* %G, i64 5 + %3 = load double* %2, align 8 + %4 = fmul double %3, 4.000000e+00 + br i1 %1, label %12, label %5 + +;