X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=b5cbee566f2409276b87ea8ba1989aa3397929db;hp=cc38c393b13ed48427b50cfefda009f64a096fd8;hb=c31aaa5a3fcef2851898fb30c61c16a70564079a;hpb=c848b1bbcf88ab5d8318d990612fb1fda206ea3d diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index cc38c393b13..b5cbee566f2 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,8 +26,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -97,14 +99,11 @@ struct X86OpTblEntry { // Pin the vtable to this file. void X86InstrInfo::anchor() {} -X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : X86GenInstrInfo((tm.getSubtarget().is64Bit() - ? X86::ADJCALLSTACKDOWN64 - : X86::ADJCALLSTACKDOWN32), - (tm.getSubtarget().is64Bit() - ? X86::ADJCALLSTACKUP64 - : X86::ADJCALLSTACKUP32)), - TM(tm), RI(tm) { +X86InstrInfo::X86InstrInfo(X86Subtarget &STI) + : X86GenInstrInfo( + (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), + (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), + Subtarget(STI), RI(STI) { static const X86OpTblEntry OpTbl2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, @@ -379,7 +378,39 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, // AVX-512 foldable instructions - { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE } + { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, + { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, + { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE }, + // AVX-512 foldable instructions (256-bit versions) + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE }, + // AVX-512 foldable instructions (128-bit versions) + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { @@ -535,6 +566,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, + { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, + { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, + { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, + { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, // AVX2 foldable instructions { X86::VPABSBrr256, X86::VPABSBrm256, 0 }, @@ -543,13 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, - { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, - { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, - { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, - { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, - { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, - { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, - { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, @@ -603,18 +634,46 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX-512 foldable instructions { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, - { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, - { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, - { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, - { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, + { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, + { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, + { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, + { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, + { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 }, + { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 }, + { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, + { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, + { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, + { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, + // AVX-512 foldable instructions (256-bit versions) + { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, + { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, + { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 }, + { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 }, + { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 }, + { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 }, + { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, + { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, + { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, + // AVX-512 foldable instructions (256-bit versions) + { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, + { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, + { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 }, + { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 }, + { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 }, + { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 }, + { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, + { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, + { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, // AES foldable instructions { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 }, - { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }, + { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1472,7 +1531,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVSX32rr8: case X86::MOVZX32rr8: case X86::MOVSX64rr8: - if (!TM.getSubtarget().is64Bit()) + if (!Subtarget.is64Bit()) // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. return false; @@ -1513,12 +1572,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const { - if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && - MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && - MI->getOperand(Op+1).getImm() == 1 && - MI->getOperand(Op+2).getReg() == 0 && - MI->getOperand(Op+3).getImm() == 0) { - FrameIndex = MI->getOperand(Op).getIndex(); + if (MI->getOperand(Op+X86::AddrBaseReg).isFI() && + MI->getOperand(Op+X86::AddrScaleAmt).isImm() && + MI->getOperand(Op+X86::AddrIndexReg).isReg() && + MI->getOperand(Op+X86::AddrDisp).isImm() && + MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 && + MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 && + MI->getOperand(Op+X86::AddrDisp).getImm() == 0) { + FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex(); return true; } return false; @@ -1682,15 +1743,16 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::FsMOVAPSrm: case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + if (MI->getOperand(1+X86::AddrBaseReg).isReg() && + MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + if (!ReMatPICStubLoad && MI->getOperand(1+X86::AddrDisp).isGlobal()) return false; const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1701,13 +1763,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::LEA32r: case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { + if (MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && + !MI->getOperand(1+X86::AddrDisp).isReg()) { // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) + if (!MI->getOperand(1+X86::AddrBaseReg).isReg()) return true; - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0) return true; // Allow re-materialization of lea PICBase + x. @@ -1724,12 +1787,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, return true; } -/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that -/// would clobber the EFLAGS condition register. Note the result may be -/// conservative. If it cannot definitely determine the safety after visiting -/// a few instructions in each direction it assumes it's not safe. -static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { +bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { MachineBasicBlock::iterator E = MBB.end(); // For compile time consideration, if we are not able to determine the @@ -1950,7 +2009,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); unsigned Opc, leaInReg; - if (TM.getSubtarget().is64Bit()) { + if (Subtarget.is64Bit()) { Opc = X86::LEA64_32r; leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); } else { @@ -2006,7 +2065,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { - if (TM.getSubtarget().is64Bit()) + if (Subtarget.is64Bit()) leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); else leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); @@ -2076,38 +2135,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // we have better subtarget support, enable the 16-bit LEA generation here. // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; - bool is64Bit = TM.getSubtarget().is64Bit(); + bool is64Bit = Subtarget.is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { - case X86::SHUFPSrri: { - assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); - if (!TM.getSubtarget().hasSSE2()) return nullptr; - - unsigned B = MI->getOperand(1).getReg(); - unsigned C = MI->getOperand(2).getReg(); - if (B != C) return nullptr; - unsigned M = MI->getOperand(3).getImm(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addOperand(Dest).addOperand(Src).addImm(M); - break; - } - case X86::SHUFPDrri: { - assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); - if (!TM.getSubtarget().hasSSE2()) return nullptr; - - unsigned B = MI->getOperand(1).getReg(); - unsigned C = MI->getOperand(2).getReg(); - if (B != C) return nullptr; - unsigned M = MI->getOperand(3).getImm(); - - // Convert to PSHUFD mask. - M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; - - NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) - .addOperand(Dest).addOperand(Src).addImm(M); - break; - } case X86::SHL64ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); @@ -2672,8 +2703,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// getSETFromCond - Return a set opcode for the given condition and /// whether it has memory operand. -static unsigned getSETFromCond(X86::CondCode CC, - bool HasMemoryOperand) { +unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, @@ -2693,14 +2723,14 @@ static unsigned getSETFromCond(X86::CondCode CC, { X86::SETSr, X86::SETSm } }; - assert(CC < 16 && "Can only handle standard cond codes"); + assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); return Opc[CC][HasMemoryOperand ? 1 : 0]; } /// getCMovFromCond - Return a cmov opcode for the given condition, /// register size in bytes, and operand type. -static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { +unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, @@ -2976,7 +3006,7 @@ canInsertSelect(const MachineBasicBlock &MBB, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { // Not all subtargets have cmov instructions. - if (!TM.getSubtarget().hasCMov()) + if (!Subtarget.hasCMov()) return false; if (Cond.size() != 1) return false; @@ -3027,8 +3057,7 @@ static bool isHReg(unsigned Reg) { // Try and copy between VR128/VR64 and GR64 registers. static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, - const X86Subtarget& Subtarget) { - + const X86Subtarget &Subtarget) { // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) @@ -3071,6 +3100,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, inline static bool MaskRegClassContains(unsigned Reg) { return X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || + X86::VK32RegClass.contains(Reg) || + X86::VK64RegClass.contains(Reg) || X86::VK1RegClass.contains(Reg); } static @@ -3107,8 +3138,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. - bool HasAVX = TM.getSubtarget().hasAVX(); - bool HasAVX512 = TM.getSubtarget().hasAVX512(); + bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -3120,7 +3151,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget().is64Bit()) { + Subtarget.is64Bit()) { Opc = X86::MOV8rr_NOREX; // Both operands must be encodable without an REX prefix. assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && @@ -3137,7 +3168,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; if (!Opc) - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget()); + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -3147,7 +3178,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Moving EFLAGS to / from another register requires a push and a pop. // Notice that we have to adjust the stack if we don't want to clobber the - // first frame index. See X86FrameLowering.cpp - colobbersTheStack. + // first frame index. See X86FrameLowering.cpp - clobbersTheStack. if (SrcReg == X86::EFLAGS) { if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); @@ -3183,9 +3214,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM, + const X86Subtarget &STI, bool load) { - if (TM.getSubtarget().hasAVX512()) { + if (STI.hasAVX512()) { if (X86::VK8RegClass.hasSubClassEq(RC) || X86::VK16RegClass.hasSubClassEq(RC)) return load ? X86::KMOVWkm : X86::KMOVWmk; @@ -3197,13 +3228,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = STI.hasAVX(); switch (RC->getSize()) { default: llvm_unreachable("Unknown spill size"); case 1: assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); - if (TM.getSubtarget().is64Bit()) + if (STI.is64Bit()) // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) @@ -3270,16 +3301,16 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, - TargetMachine &TM) { - return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false); } static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM) { - return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -3291,9 +3322,12 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); unsigned Alignment = std::max(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + bool isAligned = (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); @@ -3309,7 +3343,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3327,9 +3361,12 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); unsigned Alignment = std::max(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + bool isAligned = (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } @@ -3343,7 +3380,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3555,6 +3592,26 @@ inline static bool isDefConvertible(MachineInstr *MI) { } } +/// isUseDefConvertible - check whether the use can be converted +/// to remove a comparison against zero. +static X86::CondCode isUseDefConvertible(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return X86::COND_INVALID; + case X86::LZCNT16rr: case X86::LZCNT16rm: + case X86::LZCNT32rr: case X86::LZCNT32rm: + case X86::LZCNT64rr: case X86::LZCNT64rm: + return X86::COND_B; + case X86::POPCNT16rr:case X86::POPCNT16rm: + case X86::POPCNT32rr:case X86::POPCNT32rm: + case X86::POPCNT64rr:case X86::POPCNT64rm: + return X86::COND_E; + case X86::TZCNT16rr: case X86::TZCNT16rm: + case X86::TZCNT32rr: case X86::TZCNT32rm: + case X86::TZCNT64rr: case X86::TZCNT64rm: + return X86::COND_B; + } +} + /// optimizeCompareInstr - Check if there exists an earlier instruction that /// operates on the same source operands and sets flags in the same way as /// Compare; remove Compare if possible. @@ -3621,10 +3678,35 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If we are comparing against zero, check whether we can use MI to update // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); - if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() || - !isDefConvertible(MI))) + if (IsCmpZero && MI->getParent() != CmpInstr->getParent()) return false; + // If we have a use of the source register between the def and our compare + // instruction we can eliminate the compare iff the use sets EFLAGS in the + // right way. + bool ShouldUpdateCC = false; + X86::CondCode NewCC = X86::COND_INVALID; + if (IsCmpZero && !isDefConvertible(MI)) { + // Scan forward from the use until we hit the use we're looking for or the + // compare instruction. + for (MachineBasicBlock::iterator J = MI;; ++J) { + // Do we have a convertible instruction? + NewCC = isUseDefConvertible(J); + if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && + J->getOperand(1).getReg() == SrcReg) { + assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); + ShouldUpdateCC = true; // Update CC later on. + // This is not a def of SrcReg, but still a def of EFLAGS. Keep going + // with the new def. + MI = Def = J; + break; + } + + if (J == I) + return false; + } + } + // We are searching for an earlier instruction that can make CmpInstr // redundant and that instruction will be saved in Sub. MachineInstr *Sub = nullptr; @@ -3696,7 +3778,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, continue; // EFLAGS is used by this instruction. - X86::CondCode OldCC; + X86::CondCode OldCC = X86::COND_INVALID; bool OpcIsSET = false; if (IsCmpZero || IsSwapped) { // We decode the condition code from opcode. @@ -3722,13 +3804,28 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // CF and OF are used, we can't perform this optimization. return false; } + + // If we're updating the condition code check if we have to reverse the + // condition. + if (ShouldUpdateCC) + switch (OldCC) { + default: + return false; + case X86::COND_E: + break; + case X86::COND_NE: + NewCC = GetOppositeBranchCondition(NewCC); + break; + } } else if (IsSwapped) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. // We swap the condition code and synthesize the new opcode. - X86::CondCode NewCC = getSwappedCondition(OldCC); + NewCC = getSwappedCondition(OldCC); if (NewCC == X86::COND_INVALID) return false; + } + if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) { // Synthesize the new opcode. bool HasMemoryOperand = Instr.hasOneMemOperand(); unsigned NewOpc; @@ -3810,10 +3907,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, /// operand at the use. We fold the load instructions if load defines a virtual /// register, the virtual register is used once in the same BB, and the /// instructions in-between do not load or store, and have no side effects. -MachineInstr* X86InstrInfo:: -optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, - unsigned &FoldAsLoadDefReg, - MachineInstr *&DefMI) const { +MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { if (FoldAsLoadDefReg == 0) return nullptr; // To be conservative, if there exists another load, clear the load candidate. @@ -3829,55 +3926,35 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, if (!DefMI->isSafeToMove(this, nullptr, SawStore)) return nullptr; - // We try to commute MI if possible. - unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; - for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { - // Collect information about virtual register operands of MI. - unsigned SrcOperandId = 0; - bool FoundSrcOperand = false; - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg != FoldAsLoadDefReg) - continue; - // Do not fold if we have a subreg use or a def or multiple uses. - if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) - return nullptr; - - SrcOperandId = i; - FoundSrcOperand = true; - } - if (!FoundSrcOperand) return nullptr; - - // Check whether we can fold the def into SrcOperandId. - SmallVector Ops; - Ops.push_back(SrcOperandId); - MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); - if (FoldMI) { - FoldAsLoadDefReg = 0; - return FoldMI; - } - - if (Idx == 1) { - // MI was changed but it didn't help, commute it back! - commuteInstruction(MI, false); + // Collect information about virtual register operands of MI. + unsigned SrcOperandId = 0; + bool FoundSrcOperand = false; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != FoldAsLoadDefReg) + continue; + // Do not fold if we have a subreg use or a def or multiple uses. + if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) return nullptr; - } - // Check whether we can commute MI and enable folding. - if (MI->isCommutable()) { - MachineInstr *NewMI = commuteInstruction(MI, false); - // Unable to commute. - if (!NewMI) return nullptr; - if (NewMI != MI) { - // New instruction. It doesn't need to be kept. - NewMI->eraseFromParent(); - return nullptr; - } - } + SrcOperandId = i; + FoundSrcOperand = true; } + if (!FoundSrcOperand) + return nullptr; + + // Check whether we can fold the def into SrcOperandId. + SmallVector Ops; + Ops.push_back(SrcOperandId); + MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + if (FoldMI) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + return nullptr; } @@ -3903,8 +3980,30 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB, return true; } +// LoadStackGuard has so far only been implemented for 64-bit MachO. Different +// code sequence is needed for other targets. +static void expandLoadStackGuard(MachineInstrBuilder &MIB, + const TargetInstrInfo &TII) { + MachineBasicBlock &MBB = *MIB->getParent(); + DebugLoc DL = MIB->getDebugLoc(); + unsigned Reg = MIB->getOperand(0).getReg(); + const GlobalValue *GV = + cast((*MIB->memoperands_begin())->getValue()); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()-> + getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8); + MachineBasicBlock::iterator I = MIB; + + BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1) + .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0) + .addMemOperand(MMO); + MIB->setDebugLoc(DL); + MIB->setDesc(TII.get(X86::MOV64rm)); + MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); +} + bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { case X86::MOV32r0: @@ -3937,6 +4036,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); case X86::KSET1B: case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); + case TargetOpcode::LOAD_STACK_GUARD: + expandLoadStackGuard(MIB, *this); + return true; } return false; } @@ -4012,10 +4114,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl &MOs, - unsigned Size, unsigned Align) const { + unsigned Size, unsigned Align, + bool AllowCommute) const { const DenseMap > *OpcodeTablePtr = nullptr; - bool isCallRegIndirect = TM.getSubtarget().callRegIndirect(); + bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; // Atom favors register form of call. So, we do not fold loads into calls @@ -4080,8 +4183,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) return nullptr; // If this is a 64-bit load, but the spill slot is 32, then we can do - // a 32-bit load which is implicitly zero-extended. This likely is due - // to liveintervalanalysis remat'ing a load from stack slot. + // a 32-bit load which is implicitly zero-extended. This likely is + // due to live interval analysis remat'ing a load from stack slot. if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) return nullptr; Opcode = X86::MOV32rm; @@ -4100,8 +4203,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // to a 32-bit one. unsigned DstReg = NewMI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) - NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, - X86::sub_32bit)); + NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit)); else NewMI->getOperand(0).setSubReg(X86::sub_32bit); } @@ -4109,6 +4211,65 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } } + // If the instruction and target operand are commutable, commute the + // instruction and try again. + if (AllowCommute) { + unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2; + if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) { + bool HasDef = MI->getDesc().getNumDefs(); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; + unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg(); + unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg(); + bool Tied0 = + 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO); + bool Tied1 = + 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO); + + // If either of the commutable operands are tied to the destination + // then we can not commute + fold. + if ((HasDef && Reg0 == Reg1 && Tied0) || + (HasDef && Reg0 == Reg2 && Tied1)) + return nullptr; + + if ((CommuteOpIdx1 == OriginalOpIdx) || + (CommuteOpIdx2 == OriginalOpIdx)) { + MachineInstr *CommutedMI = commuteInstruction(MI, false); + if (!CommutedMI) { + // Unable to commute. + return nullptr; + } + if (CommutedMI != MI) { + // New instruction. We can't fold from this. + CommutedMI->eraseFromParent(); + return nullptr; + } + + // Attempt to fold with the commuted version of the instruction. + unsigned CommuteOp = + (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1); + NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align, + /*AllowCommute=*/false); + if (NewMI) + return NewMI; + + // Folding failed again - undo the commute before returning. + MachineInstr *UncommutedMI = commuteInstruction(MI, false); + if (!UncommutedMI) { + // Unable to commute. + return nullptr; + } + if (UncommutedMI != MI) { + // New instruction. It doesn't need to be kept. + UncommutedMI->eraseFromParent(); + return nullptr; + } + + // Return here to prevent duplicate fuse failure report. + return nullptr; + } + } + } + // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; @@ -4256,7 +4417,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, if (X86::VR128RegClass.contains(Reg)) { // These instructions are all floating point domain, so xorps is the best // choice. - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); @@ -4292,7 +4453,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, // If the function stack isn't realigned we don't want to fold instructions // that need increased alignment. if (!RI.needsStackRealignment(MF)) - Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment()); + Alignment = std::min(Alignment, MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment()); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; unsigned RCSize = 0; @@ -4315,7 +4479,27 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, + Size, Alignment, /*AllowCommute=*/true); +} + +static bool isPartialRegisterLoad(const MachineInstr &LoadMI, + const MachineFunction &MF) { + unsigned Opc = LoadMI.getOpcode(); + unsigned RegSize = + MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); + + if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4) + // These instructions only load 32 bits, we can't fold them if the + // destination register is wider than 32 bits (4 bytes). + return true; + + if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8) + // These instructions only load 64 bits, we can't fold them if the + // destination register is wider than 64 bits (8 bytes). + return true; + + return false; } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -4325,8 +4509,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If loading from a FrameIndex, fold directly from the FrameIndex. unsigned NumOps = LoadMI->getDesc().getNumOperands(); int FrameIndex; - if (isLoadFromStackSlot(LoadMI, FrameIndex)) + if (isLoadFromStackSlot(LoadMI, FrameIndex)) { + if (isPartialRegisterLoad(*LoadMI, MF)) + return nullptr; return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); + } // Check switch flag if (NoFusing) return nullptr; @@ -4393,14 +4580,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. - if (TM.getCodeModel() != CodeModel::Small && - TM.getCodeModel() != CodeModel::Kernel) + if (MF.getTarget().getCodeModel() != CodeModel::Small && + MF.getTarget().getCodeModel() != CodeModel::Kernel) return nullptr; // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - if (TM.getSubtarget().is64Bit()) + if (MF.getTarget().getRelocationModel() == Reloc::PIC_) { + if (Subtarget.is64Bit()) PICBase = X86::RIP; else // FIXME: PICBase = getGlobalBaseReg(&MF); @@ -4437,19 +4624,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, break; } default: { - if ((LoadMI->getOpcode() == X86::MOVSSrm || - LoadMI->getOpcode() == X86::VMOVSSrm) && - MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() - > 4) - // These instructions only load 32 bits, we can't fold them if the - // destination register is wider than 32 bits (4 bytes). - return nullptr; - if ((LoadMI->getOpcode() == X86::MOVSDrm || - LoadMI->getOpcode() == X86::VMOVSDrm) && - MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() - > 8) - // These instructions only load 64 bits, we can't fold them if the - // destination register is wider than 64 bits (8 bytes). + if (isPartialRegisterLoad(*LoadMI, MF)) return nullptr; // Folding a normal load. Just copy the load's address operands. @@ -4458,7 +4633,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, break; } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, + /*Size=*/0, Alignment, /*AllowCommute=*/true); } @@ -4540,7 +4716,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Without memoperands, loadRegFromAddr and storeRegToStackSlot will // conservatively assume the address is unaligned. That's bad for // performance. @@ -4688,13 +4864,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned load. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); @@ -4731,15 +4907,15 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned store. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, AddrOps); + SDNode *Store = + DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget), + dl, MVT::Other, AddrOps); NewNodes.push_back(Store); // Preserve memory reference information. @@ -4900,7 +5076,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. - if (TM.getSubtargetImpl()->is64Bit()) { + if (Subtarget.is64Bit()) { if (NumLoads >= 3) return false; } else if (NumLoads) { @@ -4926,7 +5102,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. - if (!TM.getSubtarget().hasAVX()) + if (!Subtarget.hasAVX()) return false; enum { @@ -4978,6 +5154,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, case X86::TEST16rm: case X86::TEST32rm: case X86::TEST64rm: + case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: case X86::AND16ri8: @@ -5108,7 +5285,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { /// TODO: Eliminate this and move the code to X86MachineFunctionInfo. /// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { - assert(!TM.getSubtarget().is64Bit() && + assert(!Subtarget.is64Bit() && "X86-64 PIC uses RIP relative addressing"); X86MachineFunctionInfo *X86FI = MF->getInfo(); @@ -5211,7 +5388,7 @@ static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { std::pair X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; - bool hasAVX2 = TM.getSubtarget().hasAVX2(); + bool hasAVX2 = Subtarget.hasAVX2(); uint16_t validDomains = 0; if (domain && lookup(MI->getOpcode(), domain)) validDomains = 0xe; @@ -5226,7 +5403,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(dom && "Not an SSE instruction"); const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table - assert((TM.getSubtarget().hasAVX2() || Domain < 3) && + assert((Subtarget.hasAVX2() || Domain < 3) && "256-bit vector operations only available in AVX2"); table = lookupAVX2(MI->getOpcode(), dom); } @@ -5239,6 +5416,16 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +void X86InstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + Branch.setOpcode(X86::JMP_4); + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); +} + +void X86InstrInfo::getTrap(MCInst &MI) const { + MI.setOpcode(X86::TRAP); +} + bool X86InstrInfo::isHighLatencyDef(int opc) const { switch (opc) { default: return false; @@ -5335,8 +5522,10 @@ namespace { const X86TargetMachine *TM = static_cast(&MF.getTarget()); - assert(!TM->getSubtarget().is64Bit() && - "X86-64 PIC uses RIP relative addressing"); + // Don't do anything if this is 64-bit as 64-bit PIC + // uses RIP relative addressing. + if (TM->getSubtarget().is64Bit()) + return false; // Only emit a global base reg in PIC mode. if (TM->getRelocationModel() != Reloc::PIC_) @@ -5354,7 +5543,7 @@ namespace { MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc DL = FirstMBB.findDebugLoc(MBBI); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const X86InstrInfo *TII = TM->getInstrInfo(); + const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); unsigned PC; if (TM->getSubtarget().isPICStyleGOT()) @@ -5391,7 +5580,7 @@ namespace { char CGBR::ID = 0; FunctionPass* -llvm::createGlobalBaseRegPass() { return new CGBR(); } +llvm::createX86GlobalBaseRegPass() { return new CGBR(); } namespace { struct LDTLSCleanup : public MachineFunctionPass { @@ -5452,7 +5641,7 @@ namespace { const X86TargetMachine *TM = static_cast(&MF->getTarget()); const bool is64Bit = TM->getSubtarget().is64Bit(); - const X86InstrInfo *TII = TM->getInstrInfo(); + const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); // Insert a Copy from TLSBaseAddrReg to RAX/EAX. MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), @@ -5473,7 +5662,7 @@ namespace { const X86TargetMachine *TM = static_cast(&MF->getTarget()); const bool is64Bit = TM->getSubtarget().is64Bit(); - const X86InstrInfo *TII = TM->getInstrInfo(); + const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo(); // Create a virtual register for the TLS base address. MachineRegisterInfo &RegInfo = MF->getRegInfo();