From: Hal Finkel Date: Wed, 11 Sep 2013 21:20:40 +0000 (+0000) Subject: Implement TTI getUnrollingPreferences for PowerPC X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=a5d756ca39c8566a0995a3f748812befe9021ef0;p=oota-llvm.git Implement TTI getUnrollingPreferences for PowerPC The PowerPC A2 core greatly benefits from aggressive concatenation unrolling; use the new getUnrollingPreferences to enable this by default when targeting the PPC A2 core. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190549 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2504ba70c25..8879630270e 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -77,6 +77,7 @@ public: /// \name Scalar TTI Implementations /// @{ virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { + if (ST->getDarwinDirective() == PPC::DIR_A2) { + // The A2 is in-order with a deep pipeline, and concatenation unrolling + // helps expose latency-hiding opportunities to the instruction scheduler. + UP.Partial = UP.Runtime = true; + } +} + unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasAltivec()) return 0; diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll new file mode 100644 index 00000000000..17c91e5c07b --- /dev/null +++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s +define void @unroll_opt_for_size() nounwind optsize { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: @unroll_opt_for_size +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK: icmp + +define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + +; CHECK-LABEL: @test +; CHECK: unr.cmp{{.*}}: +; CHECK: for.body.unr{{.*}}: +; CHECK: for.body: +; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body + diff --git a/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..2e463005586 --- /dev/null +++ b/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg @@ -0,0 +1,4 @@ +targets = set(config.root.targets_to_build.split()) +if not 'PowerPC' in targets: + config.unsupported = True +