From a9779bfbc9ab0cf3f157453fd0afd110b04a9fdc Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Tue, 30 Oct 2012 20:17:37 +0000 Subject: [PATCH] BBVectorize: Cache fixed-order pairs instead of recomputing pointer info. Instead of recomputing relative pointer information just prior to fusing, cache this information (which also needs to be computed during the candidate-pair selection process). This cuts down on the total number of SE queries made, and also is a necessary intermediate step on the road toward including shuffle costs in the pair selection procedure. No functionality change is intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167049 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/BBVectorize.cpp | 85 ++++++++++-------------- 1 file changed, 34 insertions(+), 51 deletions(-) diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 32a18f21085..051606b6d80 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -216,6 +216,7 @@ namespace { bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap &CandidatePairs, + DenseSet &FixedOrderPairs, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, bool NonPow2Len); @@ -237,13 +238,14 @@ namespace { void fuseChosenPairs(BasicBlock &BB, std::vector &PairableInsts, - DenseMap& ChosenPairs); + DenseMap& ChosenPairs, + DenseSet &FixedOrderPairs); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool areInstsCompatible(Instruction *I, Instruction *J, bool IsSimpleLoadStore, bool NonPow2Len, - int &CostSavings); + int &CostSavings, int &FixedOrder); bool trackUsesOfI(DenseSet &Users, AliasSetTracker &WriteSet, Instruction *I, @@ -332,10 +334,6 @@ namespace { DenseMap &ChosenPairs, std::multimap &LoadMoveSet); - void collectPtrInfo(std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseSet &LowPtrInsts); - bool canMoveUsesOfIAfterJ(BasicBlock &BB, std::multimap &LoadMoveSet, Instruction *I, Instruction *J); @@ -648,12 +646,15 @@ namespace { std::vector AllPairableInsts; DenseMap AllChosenPairs; + DenseSet AllFixedOrderPairs; do { std::vector PairableInsts; std::multimap CandidatePairs; + DenseSet FixedOrderPairs; DenseMap CandidatePairCostSavings; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, + FixedOrderPairs, CandidatePairCostSavings, PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; @@ -690,6 +691,14 @@ namespace { AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(), PairableInsts.end()); AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end()); + + for (DenseMap::iterator I = ChosenPairs.begin(), + IE = ChosenPairs.end(); I != IE; ++I) { + if (FixedOrderPairs.count(*I)) + AllFixedOrderPairs.insert(*I); + else if (FixedOrderPairs.count(ValuePair(I->second, I->first))) + AllFixedOrderPairs.insert(ValuePair(I->second, I->first)); + } } while (ShouldContinue); if (AllChosenPairs.empty()) return false; @@ -702,7 +711,7 @@ namespace { // replaced with a vector_extract on the result. Subsequent optimization // passes should coalesce the build/extract combinations. - fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); + fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs, AllFixedOrderPairs); // It is important to cleanup here so that future iterations of this // function have less work to do. @@ -816,11 +825,12 @@ namespace { // in the use tree of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool IsSimpleLoadStore, bool NonPow2Len, - int &CostSavings) { + int &CostSavings, int &FixedOrder) { DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << " <-> " << *J << "\n"); CostSavings = 0; + FixedOrder = 0; // Loads and stores can be merged if they have different alignments, // but are otherwise the same. @@ -846,6 +856,7 @@ namespace { if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, IAddressSpace, JAddressSpace, OffsetInElmts) && abs64(OffsetInElmts) == 1) { + FixedOrder = (int) OffsetInElmts; unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; @@ -992,6 +1003,7 @@ namespace { bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap &CandidatePairs, + DenseSet &FixedOrderPairs, DenseMap &CandidatePairCostSavings, std::vector &PairableInsts, bool NonPow2Len) { BasicBlock::iterator E = BB.end(); @@ -1029,9 +1041,9 @@ namespace { // J does not use I, and comes before the first use of I, so it can be // merged with I if the instructions are compatible. - int CostSavings; + int CostSavings, FixedOrder; if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len, - CostSavings)) continue; + CostSavings, FixedOrder)) continue; // J is a candidate for merging with I. if (!PairableInsts.size() || @@ -1044,6 +1056,11 @@ namespace { CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CostSavings)); + if (FixedOrder == 1) + FixedOrderPairs.insert(ValuePair(I, J)); + else if (FixedOrder == -1) + FixedOrderPairs.insert(ValuePair(J, I)); + // The next call to this function must start after the last instruction // selected during this invocation. if (JAfterStart) { @@ -2341,37 +2358,6 @@ namespace { } } - // As with the aliasing information, SCEV can also change because of - // vectorization. This information is used to compute relative pointer - // offsets; the necessary information will be cached here prior to - // fusion. - void BBVectorize::collectPtrInfo(std::vector &PairableInsts, - DenseMap &ChosenPairs, - DenseSet &LowPtrInsts) { - for (std::vector::iterator PI = PairableInsts.begin(), - PIE = PairableInsts.end(); PI != PIE; ++PI) { - DenseMap::iterator P = ChosenPairs.find(*PI); - if (P == ChosenPairs.end()) continue; - - Instruction *I = cast(P->first); - Instruction *J = cast(P->second); - - if (!isa(I) && !isa(I)) - continue; - - Value *IPtr, *JPtr; - unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace; - int64_t OffsetInElmts; - if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, - IAddressSpace, JAddressSpace, - OffsetInElmts) || abs64(OffsetInElmts) != 1) - llvm_unreachable("Pre-fusion pointer analysis failed"); - - Value *LowPI = (OffsetInElmts > 0) ? I : J; - LowPtrInsts.insert(LowPI); - } - } - // When the first instruction in each pair is cloned, it will inherit its // parent's metadata. This metadata must be combined with that of the other // instruction in a safe way. @@ -2405,7 +2391,8 @@ namespace { // second member). void BBVectorize::fuseChosenPairs(BasicBlock &BB, std::vector &PairableInsts, - DenseMap &ChosenPairs) { + DenseMap &ChosenPairs, + DenseSet &FixedOrderPairs) { LLVMContext& Context = BB.getContext(); // During the vectorization process, the order of the pairs to be fused @@ -2423,9 +2410,6 @@ namespace { std::multimap LoadMoveSet; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); - DenseSet LowPtrInsts; - collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); - DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { @@ -2465,12 +2449,11 @@ namespace { continue; } - bool FlipMemInputs = false; - if (isa(I) || isa(I)) - FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); + // If the pair must have the other order, then flip it. + bool FlipPairOrder = FixedOrderPairs.count(ValuePair(J, I)); Instruction *L = I, *H = J; - if (FlipMemInputs) + if (FlipPairOrder) std::swap(H, L); unsigned NumOperands = I->getNumOperands(); @@ -2492,10 +2475,10 @@ namespace { // If we've flipped the memory inputs, make sure that we take the correct // alignment. - if (FlipMemInputs) { + if (FlipPairOrder) { if (isa(K)) cast(K)->setAlignment(cast(J)->getAlignment()); - else + else if (isa(K)) cast(K)->setAlignment(cast(J)->getAlignment()); } -- 2.34.1