From: James Molloy Date: Wed, 11 Feb 2015 12:15:41 +0000 (+0000) Subject: [SimplifyCFG] Swap to using TargetTransformInfo for cost X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=4de471dd0aabb0ddb72b29c4b04729ddb88e7315 [SimplifyCFG] Swap to using TargetTransformInfo for cost analysis. We're already using TTI in SimplifyCFG, so remove the hard-baked "cheapness" heuristic and use TTI directly. Generally NFC intended, but we're using a slightly different heuristic now so there is a slight test churn. Test changes: * combine-comparisons-by-cse.ll: Removed unneeded branch check. * 2014-08-04-muls-it.ll: Test now doesn't branch but emits muleq. * coalesce-subregs.ll: Superfluous block check. * 2008-01-02-hoist-fp-add.ll: fadd is safe to speculate. Change to udiv. * PhiBlockMerge.ll: Superfluous CFG checking code. Main checks still present. * select-gep.ll: A variable GEP is not expensive, just TCC_Basic, according to the TTI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228826 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 364eeca70d7..b65fd38b836 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -216,45 +216,15 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, } /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the -/// given instruction, which is assumed to be safe to speculate. 1 means -/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { +/// given instruction, which is assumed to be safe to speculate. TCC_Free means +/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively +/// expensive. +static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL, + const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I, DL) && "Instruction is not safe to speculatively execute!"); - switch (Operator::getOpcode(I)) { - default: - // In doubt, be conservative. - return UINT_MAX; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast(I)->hasAllConstantIndices()) - return UINT_MAX; - return 1; - case Instruction::ExtractValue: - case Instruction::Load: - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::ICmp: - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::BitCast: - case Instruction::ExtractElement: - case Instruction::InsertElement: - return 1; // These are all cheap. - - case Instruction::Call: - case Instruction::Select: - return 2; - } + return TTI.getUserCost(I); } - /// DominatesMergePoint - If we have a merge point of an "if condition" as /// accepted above, return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case @@ -275,7 +245,8 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl *AggressiveInsts, unsigned &CostRemaining, - const DataLayout *DL) { + const DataLayout *DL, + const TargetTransformInfo &TTI) { Instruction *I = dyn_cast(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -311,7 +282,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (!isSafeToSpeculativelyExecute(I, DL)) return false; - unsigned Cost = ComputeSpeculationCost(I, DL); + unsigned Cost = ComputeSpeculationCost(I, DL, TTI); if (Cost > CostRemaining) return false; @@ -321,7 +292,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -1489,7 +1460,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// /// \returns true if the conditional block is removed. static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, - const DataLayout *DL) { + const DataLayout *DL, + const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) @@ -1538,7 +1510,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. @@ -1597,9 +1570,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; - if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0; + unsigned MaxCost = 2 * PHINodeFoldingThreshold * + TargetTransformInfo::TCC_Basic; + if (OrigCost + ThenCost > MaxCost) return false; // Account for the cost of an unfolded ConstantExpr which could end up @@ -1804,7 +1779,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL, + const TargetTransformInfo &TTI) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1835,6 +1811,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { SmallPtrSet AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; + MaxCostVal0 *= TargetTransformInfo::TCC_Basic; + MaxCostVal1 *= TargetTransformInfo::TCC_Basic; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); @@ -1845,9 +1823,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0, DL) || + MaxCostVal0, DL, TTI) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1, DL)) + MaxCostVal1, DL, TTI)) return false; } @@ -4471,7 +4449,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { @@ -4480,7 +4458,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true; } @@ -4608,7 +4586,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // eliminate it, do so now. if (PHINode *PN = dyn_cast(BB->begin())) if (PN->getNumIncomingValues() == 2) - Changed |= FoldTwoEntryPHINode(PN, DL); + Changed |= FoldTwoEntryPHINode(PN, DL, TTI); Builder.SetInsertPoint(BB->getTerminator()); if (BranchInst *BI = dyn_cast(BB->getTerminator())) { diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll index 94fd8e33b89..c4c32c1916d 100644 --- a/test/CodeGen/AArch64/arm64-promote-const.ll +++ b/test/CodeGen/AArch64/arm64-promote-const.ll @@ -63,49 +63,23 @@ entry: ret <16 x i8> %add.i9 } -; Two different uses of the sane constant in two different basic blocks, +; Two different uses of the same constant in two different basic blocks, ; one dominates the other define <16 x i8> @test3(<16 x i8> %arg, i32 %path) { ; PROMOTED-LABEL: test3: ; In stress mode, constant vector are promoted ; Since, the constant is the same as the previous function, ; the same address must be used -; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE -; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF] -; Destination register is defined by ABI -; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]] -; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]] -; Next BB -; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE -; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTV2]]@PAGEOFF] -; Next BB -; PROMOTED-NEXT: [[LABEL]]: -; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]] -; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]] -; PROMOTED-NEXT: ret +; PROMOTED: ldr +; PROMOTED: ldr +; PROMOTED-NOT: ldr +; PROMOTED: ret ; REGULAR-LABEL: test3: -; Regular mode does not elimitate common sub expression by its own. -; In other words, the same loads appears several times. -; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE -; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF] -; Destination register is defined by ABI -; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]] -; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]] -; Next BB -; Redundant load -; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE -; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF] -; REGULAR-NEXT: b [[LABELend:LBB.*]] -; Next BB -; REGULAR-NEXT: [[LABELelse]] -; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE -; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL2]]@PAGEOFF] -; Next BB -; REGULAR-NEXT: [[LABELend]]: -; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]] -; REGULAR-NEXT: add.16b v0, v0, [[DESTV]] -; REGULAR-NEXT: ret +; REGULAR: ldr +; REGULAR: ldr +; REGULAR-NOT: ldr +; REGULAR: ret entry: %add.i = add <16 x i8> %arg, %tobool = icmp eq i32 %path, 0 @@ -132,33 +106,14 @@ define <16 x i8> @test4(<16 x i8> %arg, i32 %path) { ; In stress mode, constant vector are promoted ; Since, the constant is the same as the previous function, ; the same address must be used -; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE -; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF] -; Destination register is defined by ABI -; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]] -; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]] -; Next BB -; PROMOTED: mul.16b v0, v0, v[[REGNUM]] -; Next BB -; PROMOTED-NEXT: [[LABEL]]: -; PROMOTED-NEXT: ret - +; PROMOTED: ldr +; PROMOTED-NOT: ldr +; PROMOTED: ret ; REGULAR-LABEL: test4: -; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE -; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF] -; Destination register is defined by ABI -; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]] -; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]] -; Next BB -; Redundant expression -; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE -; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF] -; Destination register is defined by ABI -; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]] -; Next BB -; REGULAR-NEXT: [[LABEL]]: -; REGULAR-NEXT: ret +; REGULAR: ldr +; REGULAR-NOT: ldr +; REGULAR: ret entry: %add.i = add <16 x i8> %arg, %tobool = icmp eq i32 %path, 0 diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll index df8dc87176c..3686a1f9ed0 100644 --- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -366,7 +366,6 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) { ; CHECK-LABEL-DAG: .LBB9_3 ; CHECK: cmp w19, #0 ; CHECK: fcmp d8, #0.0 -; CHECK: b.gt .LBB9_5 ; CHECK-NOT: cmp w19, #1 ; CHECK-NOT: b.ge .LBB9_5 diff --git a/test/CodeGen/ARM/2014-08-04-muls-it.ll b/test/CodeGen/ARM/2014-08-04-muls-it.ll index 4636bff880a..5ba1347433d 100644 --- a/test/CodeGen/ARM/2014-08-04-muls-it.ll +++ b/test/CodeGen/ARM/2014-08-04-muls-it.ll @@ -17,9 +17,7 @@ if.end: ; preds = %if.then, %entry ; CHECK-LABEL: function ; CHECK: cmp r0, r1 -; CHECK: bne [[LABEL:[.*]]] ; CHECK-NOT: mulseq r0, r0, r0 -; CHECK: [[LABEL]] -; CHECK: muls r0, r0, r0 +; CHECK: muleq r0, r0, r0 ; CHECK: bx lr diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll index e7bd5f41bb4..e4e3315fcfd 100644 --- a/test/CodeGen/ARM/coalesce-subregs.ll +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -293,7 +293,6 @@ bb: ; CHECK: adjustCopiesBackFrom ; The shuffle in if.else3 must be preserved even though adjustCopiesBackFrom ; is tempted to remove it. -; CHECK: %if.else3 ; CHECK: vorr d define internal void @adjustCopiesBackFrom(<2 x i64>* noalias nocapture sret %agg.result, <2 x i64> %in) { entry: diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll index cf29b715979..8e156373998 100644 --- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll +++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll @@ -1,27 +1,27 @@ -; The phi should not be eliminated in this case, because the fp op could trap. +; The phi should not be eliminated in this case, because the divide op could trap. ; RUN: opt < %s -simplifycfg -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" -@G = weak global double 0.000000e+00, align 8 ; [#uses=2] +@G = weak global i32 0, align 8 ; [#uses=2] -define void @test(i32 %X, i32 %Y, double %Z) { +define void @test(i32 %X, i32 %Y, i32 %Z) { entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %tmp = load double* @G, align 8 ; [#uses=2] + %tmp = load i32* @G, align 8 ; [#uses=2] %tmp3 = icmp eq i32 %X, %Y ; [#uses=1] %tmp34 = zext i1 %tmp3 to i8 ; [#uses=1] %toBool = icmp ne i8 %tmp34, 0 ; [#uses=1] br i1 %toBool, label %cond_true, label %cond_next cond_true: ; preds = %entry - %tmp7 = fadd double %tmp, %Z ; [#uses=1] + %tmp7 = udiv i32 %tmp, %Z ; [#uses=1] br label %cond_next cond_next: ; preds = %cond_true, %entry -; CHECK: = phi double - %F.0 = phi double [ %tmp, %entry ], [ %tmp7, %cond_true ] ; [#uses=1] - store double %F.0, double* @G, align 8 +; CHECK: = phi i32 + %F.0 = phi i32 [ %tmp, %entry ], [ %tmp7, %cond_true ] ; [#uses=1] + store i32 %F.0, i32* @G, align 8 ret void } diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll index 36b52f52d49..555082921b9 100644 --- a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll +++ b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll @@ -4,9 +4,7 @@ ; define i32 @test(i1 %a, i1 %b) { -; CHECK: br i1 %a br i1 %a, label %M, label %O -; CHECK: O: O: ; preds = %0 ; CHECK: select i1 %b, i32 0, i32 1 ; CHECK-NOT: phi @@ -18,9 +16,9 @@ N: ; preds = %Q, %O %Wp = phi i32 [ 0, %O ], [ 1, %Q ] ; [#uses=1] br label %M M: ; preds = %N, %0 -; CHECK: %W = phi i32 %W = phi i32 [ %Wp, %N ], [ 2, %0 ] ; [#uses=1] %R = add i32 %W, 1 ; [#uses=1] ret i32 %R +; CHECK: ret } diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll index 96c214cbc81..43e46ca7efc 100644 --- a/test/Transforms/SimplifyCFG/select-gep.ll +++ b/test/Transforms/SimplifyCFG/select-gep.ll @@ -1,27 +1,8 @@ ; RUN: opt -S -simplifycfg < %s | FileCheck %s -define i8* @test1(i8* %x, i64 %y) nounwind { -entry: - %tmp1 = load i8* %x, align 1 - %cmp = icmp eq i8 %tmp1, 47 - br i1 %cmp, label %if.then, label %if.end - -if.then: - %incdec.ptr = getelementptr inbounds i8* %x, i64 %y - br label %if.end - -if.end: - %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %x, %entry ] - ret i8* %x.addr - -; CHECK-LABEL: @test1( -; CHECK-NOT: select -; CHECK: ret i8* %x.addr -} - %ST = type { i8, i8 } -define i8* @test2(%ST* %x, i8* %y) nounwind { +define i8* @test1(%ST* %x, i8* %y) nounwind { entry: %cmp = icmp eq %ST* %x, null br i1 %cmp, label %if.then, label %if.end @@ -34,7 +15,7 @@ if.end: %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %y, %entry ] ret i8* %x.addr -; CHECK-LABEL: @test2( +; CHECK-LABEL: @test1( ; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y ; CHECK: ret i8* %incdec.ptr.y }