From 1cc0c4325b069d6faa5d7a9c9ae1ce92758aaa36 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Fri, 6 Nov 2015 21:00:13 +0000 Subject: [PATCH] [ShrinkWrapping] Teach shrink-wrapping how to analyze RegMask. Previously we were conservatively assuming that RegMask operands clobber callee saved registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252341 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ShrinkWrap.cpp | 46 +++++++++++++++---- test/CodeGen/X86/x86-shrink-wrapping.ll | 59 +++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 8 deletions(-) diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index dff0973c924..8261caa4f84 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -43,9 +43,11 @@ // points must be in the same loop. // Property #3 is ensured via the MachineBlockFrequencyInfo. // -// If this pass found points matching all this properties, then +// If this pass found points matching all these properties, then // MachineFrameInfo is updated this that information. //===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" // To check for profitability. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -117,12 +119,32 @@ class ShrinkWrap : public MachineFunctionPass { unsigned FrameDestroyOpcode; /// Entry block. const MachineBasicBlock *Entry; + typedef SmallSetVector SetOfRegs; + /// Registers that need to be saved for the current function. + mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. + MachineFunction *MachineFunc; /// \brief Check if \p MI uses or defines a callee-saved register or /// a frame index. If this is the case, this means \p MI must happen /// after Save and before Restore. bool useOrDefCSROrFI(const MachineInstr &MI) const; + const SetOfRegs &getCurrentCSRs() const { + if (CurrentCSRs.empty()) { + BitVector SavedRegs; + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); + + TFI->determineCalleeSaves(*MachineFunc, SavedRegs, nullptr); + + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + CurrentCSRs.insert((unsigned)Reg); + } + return CurrentCSRs; + } + /// \brief Update the Save and Restore points such that \p MBB is in /// the region that is dominated by Save and post-dominated by Restore /// and Save and Restore still match the safe point definition. @@ -144,6 +166,8 @@ class ShrinkWrap : public MachineFunctionPass { FrameSetupOpcode = TII.getCallFrameSetupOpcode(); FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); Entry = &MF.front(); + CurrentCSRs.clear(); + MachineFunc = &MF; ++NumFunc; } @@ -199,20 +223,26 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const { return true; } for (const MachineOperand &MO : MI.operands()) { - bool UseCSR = false; + bool UseOrDefCSR = false; if (MO.isReg()) { unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Unallocated register?!"); - UseCSR = RCI.getLastCalleeSavedAlias(PhysReg); + UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } else if (MO.isRegMask()) { + // Check if this regmask clobbers any of the CSRs. + for (unsigned Reg : getCurrentCSRs()) { + if (MO.clobbersPhysReg(Reg)) { + UseOrDefCSR = true; + break; + } + } } - // TODO: Handle regmask more accurately. - // For now, be conservative about them. - if (UseCSR || MO.isFI() || MO.isRegMask()) { - DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI() - << "): " << MI << '\n'); + if (UseOrDefCSR || MO.isFI()) { + DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" + << MO.isFI() << "): " << MI << '\n'); return true; } } diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll index 5d4e63b329f..52e094b5417 100644 --- a/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -729,3 +729,62 @@ loop2b: ; preds = %loop1 end: ret void } + +; Check that we just don't bail out on RegMask. +; In this case, the RegMask does not touch a CSR so we are good to go! +; CHECK-LABEL: regmask: +; +; Compare the arguments and jump to exit. +; No prologue needed. +; ENABLE: cmpl %esi, %edi +; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; +; Prologue code. +; (What we push does not matter. It should be some random sratch register.) +; CHECK: pushq +; +; Compare the arguments and jump to exit. +; After the prologue is set. +; DISABLE: cmpl %esi, %edi +; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] +; +; CHECK: nop +; Set the first argument to zero. +; CHECK: xorl %edi, %edi +; Set the second argument to addr. +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: callq _doSomething +; CHECK-NEXT: popq +; CHECK-NEXT: retq +; +; CHECK: [[EXIT_LABEL]]: +; Set the first argument to 6. +; CHECK-NEXT: movl $6, %edi +; Set the second argument to addr. +; CHECK-NEXT: movq %rdx, %rsi +; +; Without shrink-wrapping, we need to restore the stack before +; making the tail call. +; Epilogue code. +; DISABLE-NEXT: popq +; +; CHECK-NEXT: jmp _doSomething +define i32 @regmask(i32 %a, i32 %b, i32* %addr) { + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + ; Clobber a CSR so that we check something on the regmask + ; of the tail call. + tail call void asm sideeffect "nop", "~{ebx}"() + %tmp4 = call i32 @doSomething(i32 0, i32* %addr) + br label %end + +false: + %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr) + br label %end + +end: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ] + ret i32 %tmp.0 +} -- 2.34.1