From 6cfbf12d7726f7c26583286bafd50ce1a6a461e0 Mon Sep 17 00:00:00 2001 From: Erik Eckstein Date: Thu, 27 Nov 2014 15:13:14 +0000 Subject: [PATCH] reinstate r222872: Peephole optimization in switch table lookup: reuse the guarding table comparison if possible. Fixed missing dominance check. Original commit message: This optimization tries to reuse the generated compare instruction, if there is a comparison against the default value after the switch. Example: if (idx < tablesize) r = table[idx]; // table does not contain default_value else r = default_value; if (r != default_value) ... Is optimized to: cond = idx < tablesize; if (cond) r = table[idx]; else r = default_value; if (cond) ... Jump threading will then eliminate the second if(cond). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222891 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 107 ++++++++++++++- .../SimplifyCFG/X86/switch_to_lookup_table.ll | 127 ++++++++++++++++++ 2 files changed, 227 insertions(+), 7 deletions(-) diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 92fd56a52af..daa576cfbdc 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -73,6 +73,7 @@ STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); +STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); @@ -3982,6 +3983,89 @@ static bool ShouldBuildLookupTable(SwitchInst *SI, return SI->getNumCases() * 10 >= TableSize * 4; } +/// Try to reuse the switch table index compare. Following pattern: +/// \code +/// if (idx < tablesize) +/// r = table[idx]; // table does not contain default_value +/// else +/// r = default_value; +/// if (r != default_value) +/// ... +/// \endcode +/// Is optimized to: +/// \code +/// cond = idx < tablesize; +/// if (cond) +/// r = table[idx]; +/// else +/// r = default_value; +/// if (cond) +/// ... +/// \endcode +/// Jump threading will then eliminate the second if(cond). +static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, + BranchInst *RangeCheckBranch, Constant *DefaultValue, + const SmallVectorImpl >& Values) { + + ICmpInst *CmpInst = dyn_cast(PhiUser); + if (!CmpInst) + return; + + // We require that the compare is in the same block as the phi so that jump + // threading can do its work afterwards. + if (CmpInst->getParent() != PhiBlock) + return; + + Constant *CmpOp1 = dyn_cast(CmpInst->getOperand(1)); + if (!CmpOp1) + return; + + Value *RangeCmp = RangeCheckBranch->getCondition(); + Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); + Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); + + // Check if the compare with the default value is constant true or false. + Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + DefaultValue, CmpOp1, true); + if (DefaultConst != TrueConst && DefaultConst != FalseConst) + return; + + // Check if the compare with the case values is distinct from the default + // compare result. + for (auto ValuePair : Values) { + Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), + ValuePair.second, CmpOp1, true); + if (!CaseConst || CaseConst == DefaultConst) + return; + assert((CaseConst == TrueConst || CaseConst == FalseConst) && + "Expect true or false as compare result."); + } + + // Check if the branch instruction dominates the phi node. It's a simple + // dominance check, but sufficient for our needs. + // Although this check is invariant in the calling loops, it's better to do it + // at this late stage. Practically we do it at most once for a switch. + BasicBlock *BranchBlock = RangeCheckBranch->getParent(); + for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) { + BasicBlock *Pred = *PI; + if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) + return; + } + + if (DefaultConst == FalseConst) { + // The compare yields the same result. We can replace it. + CmpInst->replaceAllUsesWith(RangeCmp); + ++NumTableCmpReuses; + } else { + // The compare yields the same result, just inverted. We can replace it. + Value *InvertedTableCmp = BinaryOperator::CreateXor(RangeCmp, + ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", + RangeCheckBranch); + CmpInst->replaceAllUsesWith(InvertedTableCmp); + ++NumTableCmpReuses; + } +} + /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. @@ -4058,11 +4142,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. SmallVector, 4> DefaultResultsList; - bool HasDefaultResults = false; - if (TableHasHoles) { - HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), + bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL); - } bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { @@ -4106,6 +4187,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // lookup table BB. Otherwise, check if the condition value is within the case // range. If it is so, branch to the new BB. Otherwise branch to SI's default // destination. + BranchInst *RangeCheckBranch = nullptr; + const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize; if (GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); @@ -4116,7 +4199,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, } else { Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( MinCaseVal->getType(), TableSize)); - Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); } // Populate the BB that does the lookups. @@ -4167,11 +4250,11 @@ static bool SwitchToLookupTable(SwitchInst *SI, bool ReturnedEarly = false; for (size_t I = 0, E = PHIs.size(); I != E; ++I) { PHINode *PHI = PHIs[I]; + const ResultListTy &ResultList = ResultLists[PHI]; // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; - SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], - DV, DL); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -4184,6 +4267,16 @@ static bool SwitchToLookupTable(SwitchInst *SI, break; } + // Do a small peephole optimization: re-use the switch table compare if + // possible. + if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { + BasicBlock *PhiBlock = PHI->getParent(); + // Search for compare instructions which use the phi. + for (auto *User : PHI->users()) { + reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); + } + } + PHI->addIncoming(Result, LookupBB); } diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index 5d9ecbf7077..95c9cc539d2 100644 --- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1077,3 +1077,130 @@ return: ; CHECK-NEXT: ret i8 %switch.idx.cast } +; Reuse the inverted table range compare. +define i32 @reuse_cmp1(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] +sw.bb: br label %sw.epilog +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog +sw.epilog: + %r.0 = phi i32 [ 0, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ] + %cmp = icmp eq i32 %r.0, 0 ; This compare can be "replaced". + br i1 %cmp, label %if.then, label %if.end +if.then: br label %return +if.end: br label %return +return: + %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ] + ret i32 %retval.0 +; CHECK-LABEL: @reuse_cmp1( +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: %inverted.cmp = xor i1 [[C]], true +; CHECK: [[R:%.+]] = select i1 %inverted.cmp, i32 100, i32 {{.*}} +; CHECK-NEXT: ret i32 [[R]] +} + +; Reuse the table range compare. +define i32 @reuse_cmp2(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] +sw.bb: br label %sw.epilog +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog +sw.epilog: + %r.0 = phi i32 [ 4, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %sw.bb ] + %cmp = icmp ne i32 %r.0, 4 ; This compare can be "replaced". + br i1 %cmp, label %if.then, label %if.end +if.then: br label %return +if.end: br label %return +return: + %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ] + ret i32 %retval.0 +; CHECK-LABEL: @reuse_cmp2( +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %switch.tableidx, 4 +; CHECK: [[R:%.+]] = select i1 [[C]], i32 {{.*}}, i32 100 +; CHECK-NEXT: ret i32 [[R]] +} + +; Cannot reuse the table range compare, because the default value is the same +; as one of the case values. +define i32 @no_reuse_cmp(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] +sw.bb: br label %sw.epilog +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog +sw.epilog: + %r.0 = phi i32 [ 12, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ] + %cmp = icmp ne i32 %r.0, 0 + br i1 %cmp, label %if.then, label %if.end +if.then: br label %return +if.end: br label %return +return: + %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ] + ret i32 %retval.0 +; CHECK-LABEL: @no_reuse_cmp( +; CHECK: [[S:%.+]] = select +; CHECK-NEXT: %cmp = icmp ne i32 [[S]], 0 +; CHECK-NEXT: [[R:%.+]] = select i1 %cmp, i32 [[S]], i32 100 +; CHECK-NEXT: ret i32 [[R]] +} + +; Cannot reuse the table range compare, because the phi at the switch merge +; point is not dominated by the switch. +define i32 @no_reuse_cmp2(i32 %x, i32 %y) { +entry: + %ec = icmp ne i32 %y, 0 + br i1 %ec, label %switch.entry, label %sw.epilog +switch.entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] +sw.bb: br label %sw.epilog +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog +sw.epilog: + %r.0 = phi i32 [100, %entry], [ 0, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ] + %cmp = icmp eq i32 %r.0, 0 ; This compare can be "replaced". + br i1 %cmp, label %if.then, label %if.end +if.then: br label %return +if.end: br label %return +return: + %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ] + ret i32 %retval.0 +; CHECK-LABEL: @no_reuse_cmp2( +; CHECK: %r.0 = phi +; CHECK-NEXT: %cmp = icmp eq i32 %r.0, 0 +; CHECK-NEXT: [[R:%.+]] = select i1 %cmp +; CHECK-NEXT: ret i32 [[R]] +} -- 2.34.1