1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of the TargetRegisterInfo class.
11 // This file is responsible for the frame pointer elimination optimization
14 //===----------------------------------------------------------------------===//
16 #include "X86RegisterInfo.h"
17 #include "X86FrameLowering.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineModuleInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/MC/MCAsmInfo.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetFrameLowering.h"
37 #include "llvm/Target/TargetInstrInfo.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include "llvm/Target/TargetOptions.h"
43 #define GET_REGINFO_TARGET_DESC
44 #include "X86GenRegisterInfo.inc"
47 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
48 cl::desc("Enable use of a base pointer for complex stack frames"));
50 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
51 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
52 X86_MC::getDwarfRegFlavour(TT, false),
53 X86_MC::getDwarfRegFlavour(TT, true),
54 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
55 X86_MC::InitLLVM2SEHRegisterMapping(this);
57 // Cache some information.
58 Is64Bit = TT.isArch64Bit();
59 IsWin64 = Is64Bit && TT.isOSWindows();
61 // Use a callee-saved register as the base pointer. These registers must
62 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
63 // requires GOT in the EBX register before function calls via PLT GOT pointer.
66 // This matches the simplified 32-bit pointer code in the data layout
68 // FIXME: Should use the data layout?
69 bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32;
70 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
71 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
72 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
82 X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
83 // ExeDepsFixer and PostRAScheduler require liveness.
88 X86RegisterInfo::getSEHRegNum(unsigned i) const {
89 return getEncodingValue(i);
92 const TargetRegisterClass *
93 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
95 // The sub_8bit sub-register index is more constrained in 32-bit mode.
96 // It behaves just like the sub_8bit_hi index.
97 if (!Is64Bit && Idx == X86::sub_8bit)
98 Idx = X86::sub_8bit_hi;
100 // Forward to TableGen's default version.
101 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
104 const TargetRegisterClass *
105 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
106 const TargetRegisterClass *B,
107 unsigned SubIdx) const {
108 // The sub_8bit sub-register index is more constrained in 32-bit mode.
109 if (!Is64Bit && SubIdx == X86::sub_8bit) {
110 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
114 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
117 const TargetRegisterClass *
118 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
119 const MachineFunction &MF) const {
120 // Don't allow super-classes of GR8_NOREX. This class is only used after
121 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
122 // to the full GR8 register class in 64-bit mode, so we cannot allow the
123 // reigster class inflation.
125 // The GR8_NOREX class is always used in a way that won't be constrained to a
126 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
128 if (RC == &X86::GR8_NOREXRegClass)
131 const TargetRegisterClass *Super = RC;
132 TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
134 switch (Super->getID()) {
135 case X86::GR8RegClassID:
136 case X86::GR16RegClassID:
137 case X86::GR32RegClassID:
138 case X86::GR64RegClassID:
139 case X86::FR32RegClassID:
140 case X86::FR64RegClassID:
141 case X86::RFP32RegClassID:
142 case X86::RFP64RegClassID:
143 case X86::RFP80RegClassID:
144 case X86::VR128RegClassID:
145 case X86::VR256RegClassID:
146 // Don't return a super-class that would shrink the spill size.
147 // That can happen with the vector and float classes.
148 if (Super->getSize() == RC->getSize())
156 const TargetRegisterClass *
157 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
158 unsigned Kind) const {
159 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
161 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
162 case 0: // Normal GPRs.
163 if (Subtarget.isTarget64BitLP64())
164 return &X86::GR64RegClass;
165 return &X86::GR32RegClass;
166 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
167 if (Subtarget.isTarget64BitLP64())
168 return &X86::GR64_NOSPRegClass;
169 return &X86::GR32_NOSPRegClass;
170 case 2: // NOREX GPRs.
171 if (Subtarget.isTarget64BitLP64())
172 return &X86::GR64_NOREXRegClass;
173 return &X86::GR32_NOREXRegClass;
174 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
175 if (Subtarget.isTarget64BitLP64())
176 return &X86::GR64_NOREX_NOSPRegClass;
177 return &X86::GR32_NOREX_NOSPRegClass;
178 case 4: // Available for tailcall (not callee-saved GPRs).
179 return getGPRsForTailCall(MF);
183 const TargetRegisterClass *
184 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
185 const Function *F = MF.getFunction();
186 if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
187 return &X86::GR64_TCW64RegClass;
189 return &X86::GR64_TCRegClass;
191 bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
193 return &X86::GR32RegClass;
194 return &X86::GR32_TCRegClass;
197 const TargetRegisterClass *
198 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
199 if (RC == &X86::CCRRegClass) {
201 return &X86::GR64RegClass;
203 return &X86::GR32RegClass;
209 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
210 MachineFunction &MF) const {
211 const X86FrameLowering *TFI = getFrameLowering(MF);
213 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
214 switch (RC->getID()) {
217 case X86::GR32RegClassID:
219 case X86::GR64RegClassID:
221 case X86::VR128RegClassID:
222 return Is64Bit ? 10 : 4;
223 case X86::VR64RegClassID:
229 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
230 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
231 bool HasSSE = Subtarget.hasSSE1();
232 bool HasAVX = Subtarget.hasAVX();
233 bool HasAVX512 = Subtarget.hasAVX512();
234 bool CallsEHReturn = MF->getMMI().callsEHReturn();
236 assert(MF && "MachineFunction required");
237 switch (MF->getFunction()->getCallingConv()) {
238 case CallingConv::GHC:
239 case CallingConv::HiPE:
240 return CSR_NoRegs_SaveList;
241 case CallingConv::AnyReg:
243 return CSR_64_AllRegs_AVX_SaveList;
244 return CSR_64_AllRegs_SaveList;
245 case CallingConv::PreserveMost:
246 return CSR_64_RT_MostRegs_SaveList;
247 case CallingConv::PreserveAll:
249 return CSR_64_RT_AllRegs_AVX_SaveList;
250 return CSR_64_RT_AllRegs_SaveList;
251 case CallingConv::CXX_FAST_TLS:
253 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
254 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
256 case CallingConv::Intel_OCL_BI: {
257 if (HasAVX512 && IsWin64)
258 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
259 if (HasAVX512 && Is64Bit)
260 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
261 if (HasAVX && IsWin64)
262 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
263 if (HasAVX && Is64Bit)
264 return CSR_64_Intel_OCL_BI_AVX_SaveList;
265 if (!HasAVX && !IsWin64 && Is64Bit)
266 return CSR_64_Intel_OCL_BI_SaveList;
269 case CallingConv::HHVM:
270 return CSR_64_HHVM_SaveList;
271 case CallingConv::Cold:
273 return CSR_64_MostRegs_SaveList;
275 case CallingConv::X86_64_Win64:
276 return CSR_Win64_SaveList;
277 case CallingConv::X86_64_SysV:
279 return CSR_64EHRet_SaveList;
280 return CSR_64_SaveList;
281 case CallingConv::X86_INTR:
284 return CSR_64_AllRegs_AVX_SaveList;
286 return CSR_64_AllRegs_SaveList;
289 return CSR_32_AllRegs_SSE_SaveList;
291 return CSR_32_AllRegs_SaveList;
299 return CSR_Win64_SaveList;
301 return CSR_64EHRet_SaveList;
302 return CSR_64_SaveList;
305 return CSR_32EHRet_SaveList;
306 return CSR_32_SaveList;
309 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
310 const MachineFunction *MF) const {
311 assert(MF && "Invalid MachineFunction pointer.");
312 if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
313 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
314 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
319 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
320 CallingConv::ID CC) const {
321 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
322 bool HasSSE = Subtarget.hasSSE1();
323 bool HasAVX = Subtarget.hasAVX();
324 bool HasAVX512 = Subtarget.hasAVX512();
327 case CallingConv::GHC:
328 case CallingConv::HiPE:
329 return CSR_NoRegs_RegMask;
330 case CallingConv::AnyReg:
332 return CSR_64_AllRegs_AVX_RegMask;
333 return CSR_64_AllRegs_RegMask;
334 case CallingConv::PreserveMost:
335 return CSR_64_RT_MostRegs_RegMask;
336 case CallingConv::PreserveAll:
338 return CSR_64_RT_AllRegs_AVX_RegMask;
339 return CSR_64_RT_AllRegs_RegMask;
340 case CallingConv::CXX_FAST_TLS:
342 return CSR_64_TLS_Darwin_RegMask;
344 case CallingConv::Intel_OCL_BI: {
345 if (HasAVX512 && IsWin64)
346 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
347 if (HasAVX512 && Is64Bit)
348 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
349 if (HasAVX && IsWin64)
350 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
351 if (HasAVX && Is64Bit)
352 return CSR_64_Intel_OCL_BI_AVX_RegMask;
353 if (!HasAVX && !IsWin64 && Is64Bit)
354 return CSR_64_Intel_OCL_BI_RegMask;
357 case CallingConv::HHVM:
358 return CSR_64_HHVM_RegMask;
359 case CallingConv::Cold:
361 return CSR_64_MostRegs_RegMask;
363 case CallingConv::X86_64_Win64:
364 return CSR_Win64_RegMask;
365 case CallingConv::X86_64_SysV:
366 return CSR_64_RegMask;
367 case CallingConv::X86_INTR:
370 return CSR_64_AllRegs_AVX_RegMask;
372 return CSR_64_AllRegs_RegMask;
375 return CSR_32_AllRegs_SSE_RegMask;
377 return CSR_32_AllRegs_RegMask;
383 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
387 return CSR_Win64_RegMask;
388 return CSR_64_RegMask;
390 return CSR_32_RegMask;
394 X86RegisterInfo::getNoPreservedMask() const {
395 return CSR_NoRegs_RegMask;
398 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
399 return CSR_64_TLS_Darwin_RegMask;
402 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
403 BitVector Reserved(getNumRegs());
404 const X86FrameLowering *TFI = getFrameLowering(MF);
406 // Set the stack-pointer register and its aliases as reserved.
407 for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
411 // Set the instruction pointer register and its aliases as reserved.
412 for (MCSubRegIterator I(X86::RIP, this, /*IncludeSelf=*/true); I.isValid();
416 // Set the frame-pointer register and its aliases as reserved if needed.
417 if (TFI->hasFP(MF)) {
418 for (MCSubRegIterator I(X86::RBP, this, /*IncludeSelf=*/true); I.isValid();
423 // Set the base-pointer register and its aliases as reserved if needed.
424 if (hasBasePointer(MF)) {
425 CallingConv::ID CC = MF.getFunction()->getCallingConv();
426 const uint32_t *RegMask = getCallPreservedMask(MF, CC);
427 if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
429 "Stack realignment in presence of dynamic allocas is not supported with"
430 "this calling convention.");
432 unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
433 for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
438 // Mark the segment registers as reserved.
439 Reserved.set(X86::CS);
440 Reserved.set(X86::SS);
441 Reserved.set(X86::DS);
442 Reserved.set(X86::ES);
443 Reserved.set(X86::FS);
444 Reserved.set(X86::GS);
446 // Mark the floating point stack registers as reserved.
447 for (unsigned n = 0; n != 8; ++n)
448 Reserved.set(X86::ST0 + n);
450 // Reserve the registers that only exist in 64-bit mode.
452 // These 8-bit registers are part of the x86-64 extension even though their
453 // super-registers are old 32-bits.
454 Reserved.set(X86::SIL);
455 Reserved.set(X86::DIL);
456 Reserved.set(X86::BPL);
457 Reserved.set(X86::SPL);
459 for (unsigned n = 0; n != 8; ++n) {
461 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
465 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
469 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
470 for (unsigned n = 16; n != 32; ++n) {
471 for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
479 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
480 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
481 // because the calling convention defines the EFLAGS register as NOT
484 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
485 // an assert to track this and clear the register afterwards to avoid
486 // unnecessary crashes during release builds.
487 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
488 "EFLAGS are not live-out from a patchpoint.");
490 // Also clean other registers that don't need preserving (IP).
491 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
492 Mask[Reg / 32] &= ~(1U << (Reg % 32));
495 //===----------------------------------------------------------------------===//
496 // Stack Frame Processing methods
497 //===----------------------------------------------------------------------===//
499 static bool CantUseSP(const MachineFrameInfo *MFI) {
500 return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
503 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
504 const MachineFrameInfo *MFI = MF.getFrameInfo();
506 if (!EnableBasePointer)
509 // When we need stack realignment, we can't address the stack from the frame
510 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
511 // can't address variables from the stack pointer. MS inline asm can
512 // reference locals while also adjusting the stack pointer. When we can't
513 // use both the SP and the FP, we need a separate base pointer register.
514 bool CantUseFP = needsStackRealignment(MF);
515 return CantUseFP && CantUseSP(MFI);
518 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
519 if (!TargetRegisterInfo::canRealignStack(MF))
522 const MachineFrameInfo *MFI = MF.getFrameInfo();
523 const MachineRegisterInfo *MRI = &MF.getRegInfo();
525 // Stack realignment requires a frame pointer. If we already started
526 // register allocation with frame pointer elimination, it is too late now.
527 if (!MRI->canReserveReg(FramePtr))
530 // If a base pointer is necessary. Check that it isn't too late to reserve
533 return MRI->canReserveReg(BasePtr);
537 bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
538 unsigned Reg, int &FrameIdx) const {
539 // Since X86 defines assignCalleeSavedSpillSlots which always return true
540 // this function neither used nor tested.
541 llvm_unreachable("Unused function on X86. Otherwise need a test case.");
545 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
546 int SPAdj, unsigned FIOperandNum,
547 RegScavenger *RS) const {
548 MachineInstr &MI = *II;
549 MachineFunction &MF = *MI.getParent()->getParent();
550 const X86FrameLowering *TFI = getFrameLowering(MF);
551 int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
554 unsigned Opc = MI.getOpcode();
555 bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
556 Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
558 if (hasBasePointer(MF))
559 BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
560 else if (needsStackRealignment(MF))
561 BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
565 BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
567 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
568 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
569 // offset is from the traditional base pointer location. On 64-bit, the
570 // offset is from the SP at the end of the prologue, not the FP location. This
571 // matches the behavior of llvm.frameaddress.
572 unsigned IgnoredFrameReg;
573 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
574 MachineOperand &FI = MI.getOperand(FIOperandNum);
576 Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
577 FI.ChangeToImmediate(Offset);
581 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
582 // register as source operand, semantic is the same and destination is
583 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
584 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
585 BasePtr = getX86SubSuperRegister(BasePtr, 64);
587 // This must be part of a four operand memory reference. Replace the
588 // FrameIndex with base register with EBP. Add an offset to the offset.
589 MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
591 // Now add the frame object offset to the offset from EBP.
594 // Tail call jmp happens after FP is popped.
595 const MachineFrameInfo *MFI = MF.getFrameInfo();
596 FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
598 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
600 if (BasePtr == StackPtr)
603 // The frame index format for stackmaps and patchpoints is different from the
604 // X86 format. It only has a FI and an offset.
605 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
606 assert(BasePtr == FramePtr && "Expected the FP as base register");
607 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
608 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
612 if (MI.getOperand(FIOperandNum+3).isImm()) {
613 // Offset is a 32-bit integer.
614 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
615 int Offset = FIOffset + Imm;
616 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
617 "Requesting 64-bit offset in 32-bit immediate!");
618 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
620 // Offset is symbolic. This is extremely rare.
621 uint64_t Offset = FIOffset +
622 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
623 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
627 unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
628 const X86FrameLowering *TFI = getFrameLowering(MF);
629 return TFI->hasFP(MF) ? FramePtr : StackPtr;
633 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
634 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
635 unsigned FrameReg = getFrameRegister(MF);
636 if (Subtarget.isTarget64BitILP32())
637 FrameReg = getX86SubSuperRegister(FrameReg, 32);
641 unsigned llvm::get512BitSuperRegister(unsigned Reg) {
642 if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
643 return X86::ZMM0 + (Reg - X86::XMM0);
644 if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
645 return X86::ZMM0 + (Reg - X86::YMM0);
646 if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)
648 llvm_unreachable("Unexpected SIMD register");