{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::VPABSBrr128, X86::VPABSBrm128, 0 },
{ X86::VPABSDrr128, X86::VPABSDrm128, 0 },
bool IsIntrinOpcode;
isFMA3(Opc, &IsIntrinOpcode);
- unsigned GroupsNum;
+ size_t GroupsNum;
const unsigned (*OpcodeGroups)[3];
if (IsIntrinOpcode) {
- GroupsNum = sizeof(IntrinOpcodeGroups) / sizeof(IntrinOpcodeGroups[0]);
+ GroupsNum = array_lengthof(IntrinOpcodeGroups);
OpcodeGroups = IntrinOpcodeGroups;
} else {
- GroupsNum = sizeof(RegularOpcodeGroups) / sizeof(RegularOpcodeGroups[0]);
+ GroupsNum = array_lengthof(RegularOpcodeGroups);
OpcodeGroups = RegularOpcodeGroups;
}
const unsigned *FoundOpcodesGroup = nullptr;
- unsigned FormIndex;
+ size_t FormIndex;
// Look for the input opcode in the corresponding opcodes table.
- unsigned GroupIndex = 0;
- for (; GroupIndex < GroupsNum && !FoundOpcodesGroup; GroupIndex++) {
- for (FormIndex = 0; FormIndex < FormsNum; FormIndex++) {
+ for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
+ ++GroupIndex) {
+ for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
FoundOpcodesGroup = OpcodeGroups[GroupIndex];
break;
return 0;
}
-inline static bool MaskRegClassContains(unsigned Reg) {
+static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
X86::VK32RegClass.contains(Reg) ||
X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
+
+static bool GRRegClassContains(unsigned Reg) {
+ return X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg) ||
+ X86::GR16RegClass.contains(Reg) ||
+ X86::GR8RegClass.contains(Reg);
+}
static
-unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) {
+ DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
+ return X86::KMOVBrk;
+ }
+ if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) {
+ SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
+ return X86::KMOVBkr;
+ }
+ return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg))
+ return X86::KMOVQkk;
+ if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg))
+ return X86::KMOVDrk;
+ if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg))
+ return X86::KMOVQrk;
+ if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ return X86::KMOVDkr;
+ if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg))
+ return X86::KMOVQkr;
+ return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
+ const X86Subtarget &Subtarget)
+{
+ if (Subtarget.hasDQI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg))
+ return Opc;
+ if (Subtarget.hasBWI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
+ return Opc;
if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
X86::VR256XRegClass.contains(DestReg, SrcReg) ||
X86::VR512RegClass.contains(DestReg, SrcReg)) {
SrcReg = get512BitSuperRegister(SrcReg);
return X86::VMOVAPSZrr;
}
- if (MaskRegClassContains(DestReg) &&
- MaskRegClassContains(SrcReg))
+ if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
return X86::KMOVWkk;
- if (MaskRegClassContains(DestReg) &&
- (X86::GR32RegClass.contains(SrcReg) ||
- X86::GR16RegClass.contains(SrcReg) ||
- X86::GR8RegClass.contains(SrcReg))) {
+ if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
return X86::KMOVWkr;
}
- if ((X86::GR32RegClass.contains(DestReg) ||
- X86::GR16RegClass.contains(DestReg) ||
- X86::GR8RegClass.contains(DestReg)) &&
- MaskRegClassContains(SrcReg)) {
+ if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) {
DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
return X86::KMOVWrk;
}
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else if (HasAVX512)
- Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+ Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
int Pop = is64 ? X86::POP64r : X86::POP32r;
int AX = is64 ? X86::RAX : X86::EAX;
- bool AXDead = (Reg == AX) ||
- (MachineBasicBlock::LQR_Dead ==
- MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+ bool AXDead = (Reg == AX);
+ // FIXME: The above could figure out that AX is dead in more cases with:
+ // || (MachineBasicBlock::LQR_Dead ==
+ // MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+ //
+ // Unfortunately this is slightly broken, see PR24535 and the likely
+ // related PR25033 PR24991 PR24992 PR25201. These issues seem to
+ // showcase sub-register / super-register confusion: a previous kill
+ // of AH but no kill of AL leads computeRegisterLiveness to
+ // erroneously conclude that AX is dead.
+ //
+ // Once fixed, also update cmpxchg-clobber-flags.ll and
+ // peephole-na-phys-copy-folding.ll.
if (!AXDead)
BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
for (unsigned i = 0; i != NumAddrOps; ++i) {
const MachineOperand &MO = MOs[i];
if (i == 3 && PtrOffset != 0) {
- assert((MO.isImm() || MO.isGlobal()) &&
- "Unexpected memory operand type");
- if (MO.isImm()) {
- MIB.addImm(MO.getImm() + PtrOffset);
- } else {
- MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset() + PtrOffset,
- MO.getTargetFlags());
- }
+ MIB.addDisp(MO, PtrOffset);
} else {
MIB.addOperand(MO);
}
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
+ case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
+ case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
+ case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
+ case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
+ case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
return false;
default:
return true;
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
+ case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
+ case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
+ case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
+ case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
+ case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
return false;
default:
return true;
// domains, but they require a bit more work than just switching opcodes.
static const uint16_t *lookup(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
- if (ReplaceableInstrs[i][domain-1] == opcode)
- return ReplaceableInstrs[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}
static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
- if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
- return ReplaceableInstrsAVX2[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}