From: David Goodwin Date: Thu, 22 Oct 2009 23:19:17 +0000 (+0000) Subject: Allow the target to select the level of anti-dependence breaking that should be perfo... X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=4c3715c2e5e17d7216a96ac2baf9720630f04408 Allow the target to select the level of anti-dependence breaking that should be performed by the post-RA scheduler. The default is none. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84911 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h index 5edb86f7701..fd107e074cb 100644 --- a/include/llvm/Target/TargetSubtarget.h +++ b/include/llvm/Target/TargetSubtarget.h @@ -33,6 +33,10 @@ class TargetSubtarget { protected: // Can only create subclasses... TargetSubtarget(); public: + // AntiDepBreakMode - Type of anti-dependence breaking that should + // be performed before post-RA scheduling. + typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode; + virtual ~TargetSubtarget(); /// getSpecialAddressLatency - For targets where it is beneficial to @@ -43,8 +47,10 @@ public: // enablePostRAScheduler - If the target can benefit from post-regalloc // scheduling and the specified optimization level meets the requirement - // return true to enable post-register-allocation scheduling. - virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const { + // return true to enable post-register-allocation scheduling. + virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + AntiDepBreakMode& mode) const { + mode = ANTIDEP_NONE; return false; } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 4da5496c079..8fdbe9b1d74 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -128,6 +128,9 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; + /// AntiDepMode - Anti-dependence breaking mode + TargetSubtarget::AntiDepBreakMode AntiDepMode; + /// Classes - For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in @@ -156,10 +159,11 @@ namespace { const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, ScheduleHazardRecognizer *HR, - AliasAnalysis *aa) + AliasAnalysis *aa, + TargetSubtarget::AntiDepBreakMode adm) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AllocatableSet(TRI->getAllocatableSet(MF)), - HazardRec(HR), AA(aa) {} + HazardRec(HR), AA(aa), AntiDepMode(adm) {} ~SchedulePostRATDList() { delete HazardRec; @@ -234,16 +238,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis(); // Check for explicit enable/disable of post-ra scheduling. + TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) return false; } else { // Check that post-RA scheduling is enabled for this target. const TargetSubtarget &ST = Fn.getTarget().getSubtarget(); - if (!ST.enablePostRAScheduler(OptLevel)) + if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode)) return false; } + // Check for antidep breaking override... + if (EnableAntiDepBreaking.getPosition() > 0) { + AntiDepMode = (EnableAntiDepBreaking) ? + TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; + } + DEBUG(errs() << "PostRAScheduler\n"); const MachineLoopInfo &MLI = getAnalysis(); @@ -253,7 +264,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA, AntiDepMode); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -393,7 +404,7 @@ void SchedulePostRATDList::Schedule() { // Build the scheduling graph. BuildSchedGraph(AA); - if (EnableAntiDepBreaking) { + if (AntiDepMode != TargetSubtarget::ANTIDEP_NONE) { if (BreakAntiDependencies()) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index bc5768e63a2..74781593a0d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -128,7 +128,9 @@ protected: /// enablePostRAScheduler - True at 'More' optimization except /// for Thumb1. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const { + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_NONE; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 27e43333f8c..0e4cfde7788 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -218,7 +218,9 @@ public: /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling /// at 'More' optimization level. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel) const { + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_NONE; return OptLevel >= CodeGenOpt::Default; } }; diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index 81f0a1d7244..1eae2d2f209 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -11,11 +11,11 @@ define float @foo(float %x) nounwind { %tmp14 = fadd float %tmp12, %tmp7 ret float %tmp14 -; CHECK: mulss LCPI1_3(%rip) -; CHECK-NEXT: mulss LCPI1_0(%rip) +; CHECK: mulss LCPI1_0(%rip) ; CHECK-NEXT: mulss LCPI1_1(%rip) -; CHECK-NEXT: mulss LCPI1_2(%rip) ; CHECK-NEXT: addss +; CHECK: mulss LCPI1_3(%rip) +; CHECK-NEXT: mulss LCPI1_2(%rip) ; CHECK-NEXT: addss ; CHECK-NEXT: addss ; CHECK-NEXT: ret diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 58fe28b09fe..a3faada6615 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -10,10 +10,10 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK: t1: ; CHECK: movl 8(%esp), %eax -; CHECK-NEXT: movl 4(%esp), %ecx ; CHECK-NEXT: movapd (%eax), %xmm0 +; CHECK-NEXT: movl 4(%esp), %eax ; CHECK-NEXT: movlpd 12(%esp), %xmm0 -; CHECK-NEXT: movapd %xmm0, (%ecx) +; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } @@ -26,9 +26,9 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; CHECK: t2: ; CHECK: movl 8(%esp), %eax -; CHECK-NEXT: movl 4(%esp), %ecx ; CHECK-NEXT: movapd (%eax), %xmm0 +; CHECK-NEXT: movl 4(%esp), %eax ; CHECK-NEXT: movhpd 12(%esp), %xmm0 -; CHECK-NEXT: movapd %xmm0, (%ecx) +; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index 6319cb887af..f43e5a34aea 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -168,11 +168,11 @@ define internal void @t10() nounwind { ret void ; X64: t10: ; X64: pextrw $4, %xmm0, %eax -; X64: pextrw $6, %xmm0, %edx ; X64: movlhps %xmm1, %xmm1 ; X64: pshuflw $8, %xmm1, %xmm1 ; X64: pinsrw $2, %eax, %xmm1 -; X64: pinsrw $3, %edx, %xmm1 +; X64: pextrw $6, %xmm0, %eax +; X64: pinsrw $3, %eax, %xmm1 } diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll index ae845e0a33d..edcff6a7e7b 100644 --- a/test/CodeGen/X86/vshift-1.ll +++ b/test/CodeGen/X86/vshift-1.ll @@ -63,7 +63,7 @@ entry: ; CHECK: shift3b: ; CHECK: movzwl ; CHECK: movd -; CHECK-NEXT: psllw +; CHECK: psllw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll index 36feb11603d..be8972f2f4e 100644 --- a/test/CodeGen/X86/vshift-2.ll +++ b/test/CodeGen/X86/vshift-2.ll @@ -63,7 +63,7 @@ entry: ; CHECK: shift3b: ; CHECK: movzwl ; CHECK: movd -; CHECK-NEXT: psrlw +; CHECK: psrlw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll index 20d3f48a1a6..bdb5af133e8 100644 --- a/test/CodeGen/X86/vshift-3.ll +++ b/test/CodeGen/X86/vshift-3.ll @@ -52,7 +52,7 @@ entry: ; CHECK: shift3b: ; CHECK: movzwl ; CHECK: movd -; CHECK-NEXT: psraw +; CHECK: psraw %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 %2 = insertelement <8 x i16> %0, i16 %amt, i32 2 diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll index a543f382b51..675e33ffb33 100644 --- a/test/CodeGen/X86/vshift-5.ll +++ b/test/CodeGen/X86/vshift-5.ll @@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind { entry: ; CHECK: shift5a: ; CHECK: movd -; CHECK-NEXT: pslld +; CHECK: pslld %amt = load i32* %pamt %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer @@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind { entry: ; CHECK: shift5b: ; CHECK: movd -; CHECK-NEXT: psrad +; CHECK: psrad %amt = load i32* %pamt %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0 %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer