//
// This file defines the pass which inserts x86 AVX vzeroupper instructions
// before calls to SSE encoded functions. This avoids transition latency
-// penalty when tranfering control between AVX encoded instructions and old
+// penalty when transferring control between AVX encoded instructions and old
// SSE encoding mode.
//
//===----------------------------------------------------------------------===//
}
/// processBasicBlock - Loop over all of the instructions in the basic block,
-/// inserting vzero upper instructions before function calls.
+/// inserting vzeroupper instructions before function calls.
void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// Start by assuming that the block PASS_THROUGH, which implies no unguarded
// If the call won't clobber any YMM register, skip it as well. It usually
// happens on helper function calls (such as '_chkstk', '_ftol2') where
// standard calling convention is not used (RegMask is not used to mark
- // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // register clobbered and register usage (def/imp-def/use) is well-defined
// and explicitly specified.
if (MI->isCall() && !callClobbersAnyYmmReg(MI))
continue;
}
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
-/// vzero upper instructions before function calls.
+/// vzeroupper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getTarget().getSubtarget<X86Subtarget>().hasAVX512())
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (!ST.hasAVX() || ST.hasAVX512())
return false;
- TII = MF.getTarget().getInstrInfo();
+ TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
+ bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
+
// Fast check: if the function doesn't use any ymm registers, we don't need
// to insert any VZEROUPPER instructions. This is constant-time, so it is
// cheap in the common case of no ymm use.
- bool YMMUsed = false;
- const TargetRegisterClass *RC = &X86::VR256RegClass;
- for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
- i != e; i++) {
- if (!MRI.reg_nodbg_empty(*i)) {
- YMMUsed = true;
- break;
+ bool YMMUsed = FnHasLiveInYmm;
+ if (!YMMUsed) {
+ const TargetRegisterClass *RC = &X86::VR256RegClass;
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
+ i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YMMUsed = true;
+ break;
+ }
}
}
if (!YMMUsed) {
// Process all blocks. This will compute block exit states, record the first
// unguarded call in each block, and add successors of dirty blocks to the
// DirtySuccessors list.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- processBasicBlock(*I);
+ for (MachineBasicBlock &MBB : MF)
+ processBasicBlock(MBB);
// If any YMM regs are live in to this function, add the entry block to the
// DirtySuccessors list
- if (checkFnHasLiveInYmm(MRI))
+ if (FnHasLiveInYmm)
addDirtySuccessor(MF.front());
// Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add