From d22a849282c45bbf7eb1734c274294d81e49e3a8 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Tue, 9 Sep 2008 21:41:07 +0000 Subject: [PATCH] if loop induction variable is always sign or zero extended then extend the type of induction variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 119 +++++++++++++++++- .../IndVarsSimplify/2008-09-02-IVType.ll | 58 +++++++++ 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index bfda9cda170..76a07d4601f 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -93,6 +93,8 @@ namespace { void RewriteLoopExitValues(Loop *L, SCEV *IterationCount); void DeleteTriviallyDeadInstructions(std::set &Insts); + + void OptimizeCanonicalIVType(Loop *L); }; } @@ -597,7 +599,122 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { #endif DeleteTriviallyDeadInstructions(DeadInsts); - + OptimizeCanonicalIVType(L); assert(L->isLCSSAForm()); return Changed; } + +/// OptimizeCanonicalIVType - If loop induction variable is always +/// sign or zero extended then extend the type of induction +/// variable. +void IndVarSimplify::OptimizeCanonicalIVType(Loop *L) { + PHINode *PH = L->getCanonicalInductionVariable(); + if (!PH) return; + + // Check loop iteration count. + SCEVHandle IC = SE->getIterationCount(L); + if (isa(IC)) return; + SCEVConstant *IterationCount = dyn_cast(IC); + if (!IterationCount) return; + + unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; + + // Check IV uses. If all IV uses are either SEXT or ZEXT (except + // IV increment instruction) then this IV is suitable for this + // transformstion. + bool isSEXT = false; + BinaryOperator *Incr = NULL; + const Type *NewType = NULL; + for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); + UI != UE; ++UI) { + const Type *CandidateType = NULL; + if (ZExtInst *ZI = dyn_cast(UI)) + CandidateType = ZI->getDestTy(); + else if (SExtInst *SI = dyn_cast(UI)) { + CandidateType = SI->getDestTy(); + isSEXT = true; + } + else if ((Incr = dyn_cast(UI))) { + // Validate IV increment instruction. + if (PH->getIncomingValue(BackEdge) == Incr) + continue; + } + if (!CandidateType) { + NewType = NULL; + break; + } + if (!NewType) + NewType = CandidateType; + else if (NewType != CandidateType) { + NewType = NULL; + break; + } + } + + // IV uses are not suitable then avoid this transformation. + if (!NewType || !Incr) + return; + + // IV increment instruction has two uses, one is loop exit condition + // and second is the IV (phi node) itself. + ICmpInst *Exit = NULL; + for(Value::use_iterator II = Incr->use_begin(), IE = Incr->use_end(); + II != IE; ++II) { + if (PH == *II) continue; + Exit = dyn_cast(*II); + break; + } + if (!Exit) return; + ConstantInt *EV = dyn_cast(Exit->getOperand(0)); + if (!EV) + EV = dyn_cast(Exit->getOperand(1)); + if (!EV) return; + + // Check iteration count max value to avoid loops that wrap around IV. + APInt ICount = IterationCount->getValue()->getValue(); + if (ICount.isNegative()) return; + uint32_t BW = PH->getType()->getPrimitiveSizeInBits(); + APInt Max = (isSEXT ? APInt::getSignedMaxValue(BW) : APInt::getMaxValue(BW)); + if (ICount.getZExtValue() > Max.getZExtValue()) return; + + // Extend IV type. + + SCEVExpander Rewriter(*SE, *LI); + Value *NewIV = Rewriter.getOrInsertCanonicalInductionVariable(L,NewType); + PHINode *NewPH = cast(NewIV); + Instruction *NewIncr = cast(NewPH->getIncomingValue(BackEdge)); + + // Replace all SEXT or ZEXT uses. + SmallVector PHUses; + for(Value::use_iterator UI = PH->use_begin(), UE = PH->use_end(); + UI != UE; ++UI) { + Instruction *I = cast(UI); + PHUses.push_back(I); + } + while (!PHUses.empty()){ + Instruction *Use = PHUses.back(); PHUses.pop_back(); + if (Incr == Use) continue; + + SE->deleteValueFromRecords(Use); + Use->replaceAllUsesWith(NewIV); + Use->eraseFromParent(); + } + + // Replace exit condition. + ConstantInt *NEV = ConstantInt::get(NewType, EV->getZExtValue()); + Instruction *NE = new ICmpInst(Exit->getPredicate(), + NewIncr, NEV, "new.exit", + Exit->getParent()->getTerminator()); + SE->deleteValueFromRecords(Exit); + Exit->replaceAllUsesWith(NE); + Exit->eraseFromParent(); + + // Remove old IV and increment instructions. + SE->deleteValueFromRecords(PH); + PH->removeIncomingValue((unsigned)0); + PH->removeIncomingValue((unsigned)0); + SE->deleteValueFromRecords(Incr); + Incr->eraseFromParent(); +} + diff --git a/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll b/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll new file mode 100644 index 00000000000..8111cbe3a48 --- /dev/null +++ b/test/Transforms/IndVarsSimplify/2008-09-02-IVType.ll @@ -0,0 +1,58 @@ +; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep sext | count 1 +; ModuleID = '' + + %struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }> + %struct.ComponentInstanceRecord = type <{ [1 x i32] }> + %struct.DCPredictors = type { [5 x i16] } + %struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** } + %struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }> + %struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 } + %struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 } + %struct.OpaqueQTMLMutex = type opaque + %struct.Rect = type { i16, i16, i16, i16 } + %struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable } + %struct.SharedEGlobals = type { i8**, i8**, i8**, i8** } + %struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* } + %struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 } +@llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind { +entry: + %tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70 ; [#uses=1] + %tmp4412 = load i32* %tmp4311, align 16 ; [#uses=2] + %tmp4613 = icmp sgt i32 %tmp4412, 0 ; [#uses=1] + br i1 %tmp4613, label %bb, label %bb49 + +bb: ; preds = %bb28, %entry + %component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ] ; [#uses=2] + %tmp12 = sext i16 %component.09 to i32 ; [#uses=2] + %tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12 ; [#uses=2] + %tmp7 = load i16** %tmp6, align 4 ; [#uses=2] + %tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12 ; [#uses=1] + %tmp246 = load i32* %tmp235, align 4 ; [#uses=2] + %tmp267 = icmp sgt i32 %tmp246, 0 ; [#uses=1] + br i1 %tmp267, label %bb8, label %bb28 + +bb8: ; preds = %bb8, %bb + %indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ] ; [#uses=3] + %theDCTBufferIter.01.rec = shl i32 %indvar, 6 ; [#uses=1] + %tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64 ; [#uses=1] + %tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec ; [#uses=1] + %i.02 = trunc i32 %indvar to i16 ; [#uses=1] + %tmp13 = add i16 %i.02, 1 ; [#uses=1] + %phitmp = sext i16 %tmp13 to i32 ; [#uses=1] + %tmp26 = icmp slt i32 %phitmp, %tmp246 ; [#uses=1] + %indvar.next2 = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp26, label %bb8, label %bb28 + +bb28: ; preds = %bb8, %bb + %theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ] ; [#uses=1] + store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4 + %tmp37 = add i16 %component.09, 1 ; [#uses=2] + %phitmp15 = sext i16 %tmp37 to i32 ; [#uses=1] + %tmp46 = icmp slt i32 %phitmp15, 42 ; [#uses=1] + br i1 %tmp46, label %bb, label %bb49 + +bb49: ; preds = %bb28, %entry + ret i16 0 +} -- 2.34.1