From c54f6348861517398f17e85f41b30c4dd079fc3d Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 24 Feb 2012 01:19:29 +0000 Subject: [PATCH] Switch ARM target to register masks. I'll let the buildbots determine the compile time improvements from this change, but 464.h264ref has 5% faster codegen at -O2. This patch does cause some assembly changes. Branch folding can make different decisions about calls with dead return values. CriticalAntiDepBreaker may choose different registers because its liveness tracking is affected. MachineCopyPropagation may sometimes leave a dead copy behind. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 8 ++++++++ lib/Target/ARM/ARMISelLowering.cpp | 6 ++++++ lib/Target/ARM/ARMInstrInfo.td | 24 ++++++------------------ lib/Target/ARM/ARMInstrThumb.td | 18 ++++-------------- lib/Target/ARM/ARMInstrThumb2.td | 4 +--- 5 files changed, 25 insertions(+), 35 deletions(-) diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 458bb1d2d94..c7622f8ba5e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2151,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -2283,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I, for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 94ddf861067..aa51cedb632 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1600,6 +1600,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 798cf52fd04..dae09c70dd4 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1898,18 +1898,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { } } -// All calls clobber the non-callee saved registers. SP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. +// SP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. let isCall = 1, - // On non-IOS platforms R9 is callee-saved. // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. - Defs = [R0, R1, R2, R3, R12, LR, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, @@ -1964,10 +1959,7 @@ let isCall = 1, // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), 4, IIC_Br, [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, @@ -2071,9 +2063,7 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. - let Defs = [R0, R1, R2, R3, R9, R12, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], - Uses = [SP] in { + let Uses = [SP] in { def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), IIC_Br, []>, Requires<[IsIOS]>; @@ -2093,9 +2083,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } // Non-IOS versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], - Uses = [SP] in { + let Uses = [SP] in { def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), IIC_Br, []>, Requires<[IsNotIOS]>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 9019e8cd803..ba1791b8f74 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -406,10 +406,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // potentially appearing dead. let isCall = 1, // On non-IOS platforms R9 is callee-saved. - Defs = [R0, R1, R2, R3, R12, LR, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, @@ -460,10 +457,7 @@ let isCall = 1, // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { // Also used for Thumb2 def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], @@ -529,9 +523,7 @@ let isBranch = 1, isTerminator = 1 in // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. - let Defs = [R0, R1, R2, R3, R9, R12, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], - Uses = [SP] in { + let Uses = [SP] in { // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), @@ -540,9 +532,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb, IsIOS]>; } // Non-IOS versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], - Uses = [SP] in { + let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 555d8763d44..542996d7c3e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3260,9 +3260,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. - let Defs = [R0, R1, R2, R3, R9, R12, PC, - Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - Uses = [SP] in + let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], -- 2.34.1