From 4db6728b3a29b51add70c4f2553c726d30a1988a Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Thu, 30 Jul 2015 17:40:39 +0000 Subject: [PATCH] [SLP vectorizer]: Choose the best consecutive candidate to pair with a store instruction. The patch changes the SLPVectorizer::vectorizeStores to choose the immediate succeeding or preceding candidate for a store instruction when it has multiple consecutive candidates. In this way it has better chance to find more slp vectorization opportunities. Differential Revision: http://reviews.llvm.org/D10445 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243666 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/SLPVectorizer.cpp | 25 +++++++++---- test/Transforms/SLPVectorizer/X86/pr23510.ll | 38 ++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 test/Transforms/SLPVectorizer/X86/pr23510.ll diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index acc8d91adea..b215a256af0 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3262,15 +3262,26 @@ bool SLPVectorizer::vectorizeStores(ArrayRef Stores, // Do a quadratic search on all of the given stores and find // all of the pairs of stores that follow each other. + SmallVector IndexQueue; for (unsigned i = 0, e = Stores.size(); i < e; ++i) { - for (unsigned j = 0; j < e; ++j) { - if (i == j) - continue; - const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); - if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) { - Tails.insert(Stores[j]); + const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); + IndexQueue.clear(); + // If a store has multiple consecutive store candidates, search Stores + // array according to the sequence: from i+1 to e, then from i-1 to 0. + // This is because usually pairing with immediate succeeding or preceding + // candidate create the best chance to find slp vectorization opportunity. + unsigned j = 0; + for (j = i + 1; j < e; ++j) + IndexQueue.push_back(j); + for (j = i; j > 0; --j) + IndexQueue.push_back(j - 1); + + for (auto &k : IndexQueue) { + if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) { + Tails.insert(Stores[k]); Heads.insert(Stores[i]); - ConsecutiveChain[Stores[i]] = Stores[j]; + ConsecutiveChain[Stores[i]] = Stores[k]; + break; } } } diff --git a/test/Transforms/SLPVectorizer/X86/pr23510.ll b/test/Transforms/SLPVectorizer/X86/pr23510.ll new file mode 100644 index 00000000000..efdb0ecd999 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/pr23510.ll @@ -0,0 +1,38 @@ +; PR23510 +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @_Z3fooPml( +; CHECK: lshr <2 x i64> +; CHECK: lshr <2 x i64> + +@total = global i64 0, align 8 + +define void @_Z3fooPml(i64* nocapture %a, i64 %i) { +entry: + %tmp = load i64, i64* %a, align 8 + %shr = lshr i64 %tmp, 4 + store i64 %shr, i64* %a, align 8 + %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1 + %tmp1 = load i64, i64* %arrayidx1, align 8 + %shr2 = lshr i64 %tmp1, 4 + store i64 %shr2, i64* %arrayidx1, align 8 + %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %i + %tmp2 = load i64, i64* %arrayidx3, align 8 + %tmp3 = load i64, i64* @total, align 8 + %add = add i64 %tmp3, %tmp2 + store i64 %add, i64* @total, align 8 + %tmp4 = load i64, i64* %a, align 8 + %shr5 = lshr i64 %tmp4, 4 + store i64 %shr5, i64* %a, align 8 + %tmp5 = load i64, i64* %arrayidx1, align 8 + %shr7 = lshr i64 %tmp5, 4 + store i64 %shr7, i64* %arrayidx1, align 8 + %tmp6 = load i64, i64* %arrayidx3, align 8 + %tmp7 = load i64, i64* @total, align 8 + %add9 = add i64 %tmp7, %tmp6 + store i64 %add9, i64* @total, align 8 + ret void +} -- 2.34.1