X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FLoopVectorize.cpp;h=c5b8b5b073d6779f110d4ace19c05f14620ebe33;hp=1a6021cb3c88d3804c2ea4c4f04869448e1e564f;hb=aaaedd7f8f7d68b41ad3845d050365c03163942f;hpb=2c4a333422f4fb220f0afcb1ba316940cb92db31 diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 1a6021cb3c8..c5b8b5b073d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -144,7 +144,7 @@ static cl::opt MaximizeBandwidth( /// ... static cl::opt EnableMemAccessVersioning( "enable-mem-access-versioning", cl::init(true), cl::Hidden, - cl::desc("Enable symblic stride memory access versioning")); + cl::desc("Enable symbolic stride memory access versioning")); static cl::opt EnableInterleavedMemAccesses( "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden, @@ -325,7 +325,7 @@ public: // can be validly truncated to. The cost model has assumed this truncation // will happen when vectorizing. void vectorize(LoopVectorizationLegality *L, - DenseMap MinimumBitWidths) { + MapVector MinimumBitWidths) { MinBWs = MinimumBitWidths; Legal = L; // Create a new empty loop. Unlink the old loop and connect the new one. @@ -546,7 +546,7 @@ protected: /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - DenseMap MinBWs; + MapVector MinBWs; LoopVectorizationLegality *Legal; // Record whether runtime check is added. @@ -648,7 +648,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) { } /// \brief Propagate known metadata from one instruction to a vector of others. -static void propagateMetadata(SmallVectorImpl &To, const Instruction *From) { +static void propagateMetadata(SmallVectorImpl &To, + const Instruction *From) { for (Value *V : To) if (Instruction *I = dyn_cast(V)) propagateMetadata(I, From); @@ -1229,6 +1230,9 @@ public: /// Returns True if V is an induction variable in this loop. bool isInductionVariable(const Value *V); + /// Returns True if PN is a reduction variable in this loop. + bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } + /// Return true if the block BB needs to be predicated in order for the loop /// to be vectorized. bool blockNeedsPredication(BasicBlock *BB); @@ -1501,7 +1505,7 @@ public: /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - DenseMap MinBWs; + MapVector MinBWs; /// The loop that we evaluate. Loop *TheLoop; @@ -2100,7 +2104,6 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - return WidenMap.splat(V, B); } @@ -2524,7 +2527,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) { +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, + bool IfPredicateStore) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. SmallVector Params; @@ -2586,7 +2590,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic Value *Cmp = nullptr; if (IfPredicateStore) { Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width)); - Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1)); + Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, + ConstantInt::get(Cmp->getType(), 1)); } Instruction *Cloned = Instr->clone(); @@ -2654,7 +2659,8 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); - assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); + assert(BackedgeTakenCount != SE->getCouldNotCompute() && + "Invalid loop count"); Type *IdxTy = Legal->getWidestInductionType(); @@ -3307,7 +3313,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(RdxPhi && "Unable to recover vectorized PHI"); // Find the reduction variable descriptor. - assert(Legal->getReductionVars()->count(RdxPhi) && + assert(Legal->isReductionVariable(RdxPhi) && "Unable to find the reduction variable"); RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi]; @@ -3602,12 +3608,12 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { return BlockMask; } -void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, - InnerLoopVectorizer::VectorParts &Entry, - unsigned UF, unsigned VF, PhiVector *PV) { +void InnerLoopVectorizer::widenPHIInstruction( + Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF, + unsigned VF, PhiVector *PV) { PHINode* P = cast(PN); // Handle reduction variables: - if (Legal->getReductionVars()->count(P)) { + if (Legal->isReductionVariable(P)) { for (unsigned part = 0; part < UF; ++part) { // This is phase one of vectorizing PHIs. Type *VecTy = (VF == 1) ? PN->getType() : @@ -3669,7 +3675,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, case InductionDescriptor::IK_NoInduction: llvm_unreachable("Unknown induction"); case InductionDescriptor::IK_IntInduction: { - assert(P->getType() == II.getStartValue()->getType() && "Types must match"); + assert(P->getType() == II.getStartValue()->getType() && + "Types must match"); // Handle other induction variables that are now based on the // canonical one. Value *V = Induction; @@ -3854,9 +3861,10 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, CI->getType()); Value *Broadcasted = getBroadcastInstrs(ScalarCast); - InductionDescriptor II = Legal->getInductionVars()->lookup(OldInduction); - Constant *Step = - ConstantInt::getSigned(CI->getType(), II.getStepValue()->getSExtValue()); + InductionDescriptor II = + Legal->getInductionVars()->lookup(OldInduction); + Constant *Step = ConstantInt::getSigned( + CI->getType(), II.getStepValue()->getSExtValue()); for (unsigned Part = 0; Part < UF; ++Part) Entry[Part] = getStepVector(Broadcasted, VF * Part, Step); propagateMetadata(Entry, &*it); @@ -4522,8 +4530,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr || !isSinglePredecessor) { - // Build a masked store if it is legal for the target, otherwise scalarize - // the block. + // Build a masked store if it is legal for the target, otherwise + // scalarize the block. bool isLegalMaskedOp = isLegalMaskedStore(SI->getValueOperand()->getType(), SI->getPointerOperand()); @@ -4883,7 +4891,7 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { // Examine PHI nodes that are reduction variables. Update the type to // account for the recurrence type. if (PHINode *PN = dyn_cast(it)) { - if (!Legal->getReductionVars()->count(PN)) + if (!Legal->isReductionVariable(PN)) continue; RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN]; T = RdxDesc.getRecurrenceType(); @@ -5050,8 +5058,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, } // Interleave if this is a large loop (small loops are already dealt with by - // this - // point) that could benefit from interleaving. + // this point) that could benefit from interleaving. bool HasReductions = (Legal->getReductionVars()->size() > 0); if (TTI.enableAggressiveInterleaving(HasReductions)) { DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); @@ -5181,7 +5188,7 @@ LoopVectorizationCostModel::calculateRegisterUsage( continue; } - // Count the number of live interals. + // Count the number of live intevals. unsigned RegUsage = 0; for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); @@ -5409,8 +5416,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - if (VF > 1 && MinBWs.count(dyn_cast(I->getOperand(0)))) - ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]); + Instruction *Op0AsInstruction = dyn_cast(I->getOperand(0)); + auto It = MinBWs.find(Op0AsInstruction); + if (VF > 1 && It != MinBWs.end()) + ValTy = IntegerType::get(ValTy->getContext(), It->second); VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); }