From 108f92e376fd5f7c7c5c4e22dd10c1a5b279b1e3 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Tue, 2 Sep 2008 22:18:08 +0000 Subject: [PATCH] If all IV uses are extending integer IV then change the type of IV itself, if possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 201 ++++++++++++++++++ .../LoopStrengthReduce/2008-09-02-IVType.ll | 57 +++++ 2 files changed, 258 insertions(+) create mode 100644 test/Transforms/LoopStrengthReduce/2008-09-02-IVType.ll diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 9c1a95331e3..83231c1ae81 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -46,6 +46,7 @@ STATISTIC(NumInserted, "Number of PHIs inserted"); STATISTIC(NumVariable, "Number of PHIs with variable strides"); STATISTIC(NumEliminated, "Number of strides eliminated"); STATISTIC(NumShadow, "Number of Shadow IVs optimized"); +STATISTIC(NumIVType, "Number of IV types optimized"); namespace { @@ -184,6 +185,10 @@ private: /// inside the loop then try to eliminate the cast opeation. void OptimizeShadowIV(Loop *L); + /// OptimizeIVType - If IV is always sext'ed or zext'ed then + /// change the type of IV, if possible. + void OptimizeIVType(Loop *L); + bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); @@ -1808,6 +1813,200 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { } } +/// suitableExtInstruction - Helper function used by OptimizeIVType. +/// If I is a suitable SEXT or ZEXT instruction then return type +/// to which I is extended to. Otherwise return NULL. +const Type *suitableExtInstruction(Instruction *I, bool isSigned, + const Type *ExtType) { + + const Type *DestType = NULL; + if (ZExtInst *ZI = dyn_cast(I)) + DestType = ZI->getDestTy(); + else if (SExtInst *SI = dyn_cast(I)) { + // If the inital value is signed then this is not suitable for + // OptimizeIVType transformation. + if (isSigned) + return NULL; + DestType = SI->getDestTy(); + } + + if (!DestType) return NULL; + + if (!ExtType) + return DestType; + + // If another use of IV extended to some other type then the IV is not + // suitable for OptimizeIVType transformation. + if (ExtType != DestType) + return NULL; + + return DestType; +} + +/// suitableIVIncr - Helper function used by OptimizeIVType. If I is +/// a suitable binary operator whose all uses are either SEXT or ZEXT +/// then return the type to which all uses are extended to. Otherwise +/// return NULL. +const Type *suitableIVIncr(Instruction *I, + Instruction *PHI, bool isSigned, + const Type *ExtType) { + + BinaryOperator *Incr = dyn_cast(I); + if (!Incr) return NULL; + + if (Incr->getOpcode() != Instruction::Add) + return NULL; + + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PHI) + C = dyn_cast(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PHI) + C = dyn_cast(Incr->getOperand(0)); + + if (!C) return NULL; + + const Type *RExtType = NULL; + for (Value::use_iterator IncUI = Incr->use_begin(), + IncUE = Incr->use_end(); IncUI != IncUE; ++IncUI) { + + Instruction *U2 = dyn_cast(*IncUI); + if (U2 == PHI) + continue; + const Type *DestType = suitableExtInstruction(U2, isSigned, ExtType); + if (!DestType) + return NULL; + + if (!RExtType) + RExtType = DestType; + + if (DestType != RExtType) + return NULL; + } + + return RExtType; + +} + +/// getNewPHIIncrement - Create a new increment instruction for newPHI +/// using type Ty based on increment instruction Incr. +/// Helper function used by OptimizeIVType. +BinaryOperator *getNewPHIIncrement(BinaryOperator *Incr, PHINode *PHI, + PHINode *NewPHI, const Type *Ty) { + ConstantInt *C = NULL; + if (Incr->getOperand(0) == PHI) + C = dyn_cast(Incr->getOperand(1)); + else if (Incr->getOperand(1) == PHI) + C = dyn_cast(Incr->getOperand(0)); + + assert (C && "Unexpected Incr operand!"); + return BinaryOperator::Create(Incr->getOpcode(), NewPHI, + ConstantInt::get(Ty, C->getZExtValue()), + "IV.next", Incr); +} + +/// OptimizeIVType - If IV is always sext'ed or zext'ed then +/// change the type of IV, if possible. +void LoopStrengthReduce::OptimizeIVType(Loop *L) { + + BasicBlock *LPH = L->getLoopPreheader(); + SmallVector PHIs; + for (BasicBlock::iterator BI = L->getHeader()->begin(), + BE = L->getHeader()->end(); BI != BE; ++BI) { + if (PHINode *PHI = dyn_cast(BI)) + PHIs.push_back(PHI); + else + break; + } + + while(!PHIs.empty()) { + PHINode *PHI = PHIs.back(); PHIs.pop_back(); + if (PHI->getNumIncomingValues() != 2) continue; + + unsigned Entry = 0, Latch = 1; + if (PHI->getIncomingBlock(0) != LPH) { + Entry = 1; + Latch = 0; + } + + ConstantInt *CInit = dyn_cast(PHI->getIncomingValue(Entry)); + if (!CInit) return; + + bool signedInit = CInit->getValue().isNegative(); + + bool TransformPhi = true; + const Type *ExtType = NULL; + BinaryOperator *Incr = NULL; + SmallVector PHIUses; + + // Collect all IV uses. + for (Value::use_iterator UI = PHI->use_begin(), + UE = PHI->use_end(); UI != UE; ++UI) { + Instruction *Use = dyn_cast(*UI); + if (!Use) { + TransformPhi = false; + break; + } + + ExtType = suitableIVIncr(Use, PHI, signedInit, ExtType); + if (ExtType) { + Incr = cast(Use); + continue; + } + ExtType = suitableExtInstruction(Use, signedInit, ExtType); + if (ExtType) { + PHIUses.push_back(Use); + continue; + } + + TransformPhi = false; + break; + } + + if (!TransformPhi || Incr == false || PHIUses.empty()) + continue; + + // Apply transformation. Extend IV type and eliminate SEXT or ZEXT + // instructions. + NumIVType++; + + PHINode *NewPH = PHINode::Create(ExtType, "IV", PHI); + ConstantInt *NewCInit = ConstantInt::get(ExtType, CInit->getZExtValue()); + BinaryOperator *NewIncr = getNewPHIIncrement(Incr, PHI, NewPH, ExtType); + + NewPH->addIncoming(NewCInit, PHI->getIncomingBlock(Entry)); + NewPH->addIncoming(NewIncr, PHI->getIncomingBlock(Latch)); + + // Replace all SEXT or ZEXT uses with new IV directly. + while (!PHIUses.empty()) { + Instruction *Use = PHIUses.back(); PHIUses.pop_back(); + SE->deleteValueFromRecords(Use); + Use->replaceAllUsesWith(NewPH); + Use->eraseFromParent(); + } + + // Replace all uses of IV increment with new increment. + SmallVector IncrUses; + for (Value::use_iterator UI2 = Incr->use_begin(), + UE2 = Incr->use_end(); UI2 != UE2; ++UI2) + IncrUses.push_back(cast(*UI2)); + + while (!IncrUses.empty()) { + Instruction *Use = IncrUses.back(); IncrUses.pop_back(); + if (Use == PHI) continue; + SE->deleteValueFromRecords(Use); + Use->replaceAllUsesWith(NewIncr); + Use->eraseFromParent(); + } + + // Remove old PHI and increment instruction. + SE->deleteValueFromRecords(PHI); + PHI->removeIncomingValue(Entry); + PHI->removeIncomingValue(Latch); + SE->deleteValueFromRecords(Incr); + Incr->eraseFromParent(); + } +} + // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar // uses in the loop, look to see if we can eliminate some, in favor of using // common indvars for the different uses. @@ -1877,6 +2076,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { UIntPtrTy = TD->getIntPtrType(); Changed = false; + OptimizeIVType(L); + // Find all uses of induction variables in this loop, and catagorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. diff --git a/test/Transforms/LoopStrengthReduce/2008-09-02-IVType.ll b/test/Transforms/LoopStrengthReduce/2008-09-02-IVType.ll new file mode 100644 index 00000000000..772d9dda0f5 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/2008-09-02-IVType.ll @@ -0,0 +1,57 @@ +; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep sext | count 1 +; ModuleID = '' + %struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }> + %struct.ComponentInstanceRecord = type <{ [1 x i32] }> + %struct.DCPredictors = type { [5 x i16] } + %struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** } + %struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }> + %struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 } + %struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 } + %struct.OpaqueQTMLMutex = type opaque + %struct.Rect = type { i16, i16, i16, i16 } + %struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable } + %struct.SharedEGlobals = type { i8**, i8**, i8**, i8** } + %struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* } + %struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 } +@llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind { +entry: + %tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70 ; [#uses=1] + %tmp4412 = load i32* %tmp4311, align 16 ; [#uses=2] + %tmp4613 = icmp sgt i32 %tmp4412, 0 ; [#uses=1] + br i1 %tmp4613, label %bb, label %bb49 + +bb: ; preds = %bb28, %entry + %component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ] ; [#uses=2] + %tmp12 = sext i16 %component.09 to i32 ; [#uses=2] + %tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12 ; [#uses=2] + %tmp7 = load i16** %tmp6, align 4 ; [#uses=2] + %tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12 ; [#uses=1] + %tmp246 = load i32* %tmp235, align 4 ; [#uses=2] + %tmp267 = icmp sgt i32 %tmp246, 0 ; [#uses=1] + br i1 %tmp267, label %bb8, label %bb28 + +bb8: ; preds = %bb8, %bb + %indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ] ; [#uses=3] + %theDCTBufferIter.01.rec = shl i32 %indvar, 6 ; [#uses=1] + %tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64 ; [#uses=1] + %tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec ; [#uses=1] + %i.02 = trunc i32 %indvar to i16 ; [#uses=1] + %tmp13 = add i16 %i.02, 1 ; [#uses=1] + %phitmp = sext i16 %tmp13 to i32 ; [#uses=1] + %tmp26 = icmp slt i32 %phitmp, %tmp246 ; [#uses=1] + %indvar.next2 = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp26, label %bb8, label %bb28 + +bb28: ; preds = %bb8, %bb + %theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ] ; [#uses=1] + store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4 + %tmp37 = add i16 %component.09, 1 ; [#uses=2] + %phitmp15 = sext i16 %tmp37 to i32 ; [#uses=1] + %tmp46 = icmp slt i32 %phitmp15, %tmp4412 ; [#uses=1] + br i1 %tmp46, label %bb, label %bb49 + +bb49: ; preds = %bb28, %entry + ret i16 0 +} -- 2.34.1