From 88a589c4b39830bbeed23654521ef2f77bb87abe Mon Sep 17 00:00:00 2001 From: David Goodwin Date: Tue, 25 Aug 2009 17:03:05 +0000 Subject: [PATCH] Fixup register kills after scheduling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@80002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/PostRASchedulerList.cpp | 115 +++++++++++++++++---- test/CodeGen/ARM/2009-08-21-PostRAKill.ll | 40 +++++++ test/CodeGen/ARM/2009-08-21-PostRAKill2.ll | 38 +++++++ test/CodeGen/ARM/2009-08-21-PostRAKill3.ll | 31 ++++++ 4 files changed, 203 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill2.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill3.ll diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 5042415c2cf..a74d29db8c7 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include +#include using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); @@ -140,6 +141,11 @@ namespace { /// Schedule - Schedule the instruction range using list scheduling. /// void Schedule(); + + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. @@ -150,6 +156,11 @@ namespace { /// void FinishBlock(); + /// GenerateLivenessForKills - If true then generate Def/Kill + /// information for use in updating register kill. If false then + /// generate Def/Kill information for anti-dependence breaking. + bool GenerateLivenessForKills; + private: void PrescanInstruction(MachineInstr *MI); void ScanInstruction(MachineInstr *MI, unsigned Count); @@ -202,6 +213,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { // Initialize register live-range state for scheduling in this block. + Scheduler.GenerateLivenessForKills = false; Scheduler.StartBlock(MBB); // Schedule each sequence of instructions not interrupted by a label @@ -228,6 +240,12 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Clean up register live-range state. Scheduler.FinishBlock(); + + // Initialize register live-range state again and update register kills + Scheduler.GenerateLivenessForKills = true; + Scheduler.StartBlock(MBB); + Scheduler.FixupKills(MBB); + Scheduler.FinishBlock(); } return true; @@ -287,26 +305,28 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { } } - // Consider callee-saved registers as live-out, since we're running after - // prologue/epilogue insertion so there's no way to add additional - // saved registers. - // - // TODO: If the callee saves and restores these, then we can potentially - // use them between the save and the restore. To do that, we could scan - // the exit blocks to see which of these registers are defined. - // Alternatively, callee-saved registers that aren't saved and restored - // could be marked live-in in every block. - for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; + if (!GenerateLivenessForKills) { + // Consider callee-saved registers as live-out, since we're running after + // prologue/epilogue insertion so there's no way to add additional + // saved registers. + // + // TODO: If the callee saves and restores these, then we can potentially + // use them between the save and the restore. To do that, we could scan + // the exit blocks to see which of these registers are defined. + // Alternatively, callee-saved registers that aren't saved and restored + // could be marked live-in in every block. + for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } } } } @@ -467,11 +487,17 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } - // Conservatively mark super-registers as unusable. + // Conservatively mark super-registers as unusable. If + // initializing for kill updating, then mark all supers as defined + // as well. for (const unsigned *Super = TRI->getSuperRegisters(Reg); *Super; ++Super) { unsigned SuperReg = *Super; Classes[SuperReg] = reinterpret_cast(-1); + if (GenerateLivenessForKills) { + DefIndices[SuperReg] = Count; + KillIndices[SuperReg] = ~0u; + } } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -753,6 +779,53 @@ bool SchedulePostRATDList::BreakAntiDependencies() { return Changed; } +/// FixupKills - Fix the register kill flags, they may have been made +/// incorrect by instruction reordering. +/// +void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { + DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n'); + + std::set killedRegs; + BitVector ReservedRegs = TRI->getReservedRegs(MF); + + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + + // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as + // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF + // is left behind appearing to clobber the super-register, while the + // subregister needs to remain live. So we just ignore them. + if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + PrescanInstruction(MI); + ScanInstruction(MI, Count); + + // Examine all used registers and set kill flag. When a register + // is used multiple times we only set the kill flag on the first + // use. + killedRegs.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + bool kill = ((KillIndices[Reg] == Count) && + (killedRegs.find(Reg) == killedRegs.end())); + if (MO.isKill() != kill) { + MO.setIsKill(kill); + DEBUG(errs() << "Fixed " << MO << " in "); + DEBUG(MI->dump()); + } + + killedRegs.insert(Reg); + } + } +} + //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll new file mode 100644 index 00000000000..08038b0e81b --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll @@ -0,0 +1,40 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* } +@g = common global %struct.tree* null + +define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind { +entry: + %t.idx51.val.i = load double* null ; [#uses=1] + br i1 undef, label %bb4.i, label %bb.i + +bb.i: ; preds = %entry + unreachable + +bb4.i: ; preds = %entry + %0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2] + %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; [#uses=1] + %.idx45.val.i = load double* %.idx45.i ; [#uses=1] + %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; [#uses=1] + %.idx46.val.i = load double* %.idx46.i ; [#uses=1] + %1 = fsub double 0.000000e+00, %.idx45.val.i ; [#uses=2] + %2 = fmul double %1, %1 ; [#uses=1] + %3 = fsub double %t.idx51.val.i, %.idx46.val.i ; [#uses=2] + %4 = fmul double %3, %3 ; [#uses=1] + %5 = fadd double %2, %4 ; [#uses=1] + %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; [#uses=1] + br i1 undef, label %bb7.i4, label %bb6.i + +bb6.i: ; preds = %bb4.i + br label %bb7.i4 + +bb7.i4: ; preds = %bb6.i, %bb4.i + %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; [#uses=0] + unreachable +} + +declare double @llvm.sqrt.f64(double) nounwind readonly diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll new file mode 100644 index 00000000000..e32bca98fd4 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll @@ -0,0 +1,38 @@ +; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] } +%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* } +%struct.icstruct = type { [3 x i32], i16 } +%struct.node = type { i16, double, [3 x double], i32, i32 } + +declare arm_apcscc double @floor(double) nounwind readnone + +define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) { +entry: + br i1 %a, label %bb3, label %bb1 + +bb1: ; preds = %entry + unreachable + +bb3: ; preds = %entry + br i1 %a, label %bb7, label %bb5 + +bb5: ; preds = %bb3 + unreachable + +bb7: ; preds = %bb3 + br i1 %a, label %bb11, label %bb9 + +bb9: ; preds = %bb7 + %0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; [#uses=0] + br label %bb11 + +bb11: ; preds = %bb9, %bb7 + %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %1 + ret void +} diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll new file mode 100644 index 00000000000..ddc16dec0d1 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } +%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* } +%struct.Patient = type { i32, i32, i32, %struct.Village* } +%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 } + +define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind { +entry: + br i1 %p, label %bb8, label %bb1 + +bb1: ; preds = %entry + %0 = malloc %struct.Village ; <%struct.Village*> [#uses=3] + %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; [#uses=1] + %.c = fptosi double %exp2 to i32 ; [#uses=1] + store i32 %.c, i32* null + %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1] + store %struct.List* null, %struct.List** %1 + %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1] + store %struct.List* null, %struct.List** %2 + ret %struct.Village* %0 + +bb8: ; preds = %entry + ret %struct.Village* null +} + +declare double @ldexp(double, i32) -- 2.34.1