X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=d03d76ad1f092734ad6529836a9a794326e3b518;hb=b96e8338171ed4cb23a3b14d94db2361b0d544e8;hp=6450f2af428d53d0f27c0be8ad3a6f5414bed40b;hpb=d285beabff6d4880da6badd1a574adfa572811b7;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 6450f2af428..d03d76ad1f0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -36,11 +37,13 @@ #include "llvm/Target/TargetOptions.h" #include +using namespace llvm; + +#define DEBUG_TYPE "x86-instr-info" + #define GET_INSTRINFO_CTOR_DTOR #include "X86GenInstrInfo.inc" -using namespace llvm; - static cl::opt NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions")); @@ -95,14 +98,11 @@ struct X86OpTblEntry { // Pin the vtable to this file. void X86InstrInfo::anchor() {} -X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : X86GenInstrInfo((tm.getSubtarget().is64Bit() - ? X86::ADJCALLSTACKDOWN64 - : X86::ADJCALLSTACKDOWN32), - (tm.getSubtarget().is64Bit() - ? X86::ADJCALLSTACKUP64 - : X86::ADJCALLSTACKUP32)), - TM(tm), RI(tm) { +X86InstrInfo::X86InstrInfo(X86Subtarget &STI) + : X86GenInstrInfo( + (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), + (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), + Subtarget(STI), RI(STI) { static const X86OpTblEntry OpTbl2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, @@ -1470,7 +1470,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVSX32rr8: case X86::MOVZX32rr8: case X86::MOVSX64rr8: - if (!TM.getSubtarget().is64Bit()) + if (!Subtarget.is64Bit()) // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. return false; @@ -1511,12 +1511,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const { - if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && - MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && - MI->getOperand(Op+1).getImm() == 1 && - MI->getOperand(Op+2).getReg() == 0 && - MI->getOperand(Op+3).getImm() == 0) { - FrameIndex = MI->getOperand(Op).getIndex(); + if (MI->getOperand(Op+X86::AddrBaseReg).isFI() && + MI->getOperand(Op+X86::AddrScaleAmt).isImm() && + MI->getOperand(Op+X86::AddrIndexReg).isReg() && + MI->getOperand(Op+X86::AddrDisp).isImm() && + MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 && + MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 && + MI->getOperand(Op+X86::AddrDisp).getImm() == 0) { + FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex(); return true; } return false; @@ -1680,15 +1682,16 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::FsMOVAPSrm: case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + if (MI->getOperand(1+X86::AddrBaseReg).isReg() && + MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + if (!ReMatPICStubLoad && MI->getOperand(1+X86::AddrDisp).isGlobal()) return false; const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1699,13 +1702,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::LEA32r: case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { + if (MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && + !MI->getOperand(1+X86::AddrDisp).isReg()) { // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) + if (!MI->getOperand(1+X86::AddrBaseReg).isReg()) return true; - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0) return true; // Allow re-materialization of lea PICBase + x. @@ -1722,12 +1726,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, return true; } -/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that -/// would clobber the EFLAGS condition register. Note the result may be -/// conservative. If it cannot definitely determine the safety after visiting -/// a few instructions in each direction it assumes it's not safe. -static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { +bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { MachineBasicBlock::iterator E = MBB.end(); // For compile time consideration, if we are not able to determine the @@ -1948,7 +1948,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); unsigned Opc, leaInReg; - if (TM.getSubtarget().is64Bit()) { + if (Subtarget.is64Bit()) { Opc = X86::LEA64_32r; leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); } else { @@ -1998,13 +1998,13 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; - MachineInstr *InsMI2 = 0; + MachineInstr *InsMI2 = nullptr; if (Src == Src2) { // ADD16rr %reg1028, %reg1028 // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { - if (TM.getSubtarget().is64Bit()) + if (Subtarget.is64Bit()) leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); else leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); @@ -2062,29 +2062,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // convert them to equivalent lea if the condition code register def's // are dead! if (hasLiveCondCodeDef(MI)) - return 0; + return nullptr; MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. const MachineOperand &Dest = MI->getOperand(0); const MachineOperand &Src = MI->getOperand(1); - MachineInstr *NewMI = NULL; + MachineInstr *NewMI = nullptr; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When // we have better subtarget support, enable the 16-bit LEA generation here. // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; - bool is64Bit = TM.getSubtarget().is64Bit(); + bool is64Bit = Subtarget.is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); - if (!TM.getSubtarget().hasSSE2()) return 0; + if (!Subtarget.hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); - if (B != C) return 0; + if (B != C) return nullptr; unsigned M = MI->getOperand(3).getImm(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) .addOperand(Dest).addOperand(Src).addImm(M); @@ -2092,11 +2092,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHUFPDrri: { assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); - if (!TM.getSubtarget().hasSSE2()) return 0; + if (!Subtarget.hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); - if (B != C) return 0; + if (B != C) return nullptr; unsigned M = MI->getOperand(3).getImm(); // Convert to PSHUFD mask. @@ -2109,13 +2109,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL64ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && !MF.getRegInfo().constrainRegClass(Src.getReg(), &X86::GR64_NOSPRegClass)) - return 0; + return nullptr; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addOperand(Dest) @@ -2125,7 +2125,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL32ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; @@ -2135,7 +2135,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2151,10 +2151,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL16ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest) .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); @@ -2163,7 +2163,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, default: { switch (MIOpc) { - default: return 0; + default: return nullptr; case X86::INC64r: case X86::INC32r: case X86::INC64_32r: { @@ -2175,7 +2175,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2189,7 +2189,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::INC16r: case X86::INC64_16r: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), 1); @@ -2206,7 +2207,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2221,7 +2222,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::DEC16r: case X86::DEC64_16r: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), -1); @@ -2242,7 +2244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; const MachineOperand &Src2 = MI->getOperand(2); bool isKill2, isUndef2; @@ -2250,7 +2252,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, SrcReg2, isKill2, isUndef2, ImplicitOp2)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest); @@ -2272,7 +2274,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16rr: case X86::ADD16rr_DB: { if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); @@ -2311,7 +2314,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2327,7 +2330,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16ri_DB: case X86::ADD16ri8_DB: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), @@ -2337,7 +2341,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } } - if (!NewMI) return 0; + if (!NewMI) return nullptr; if (LV) { // Update live variables if (Src.isKill()) @@ -2666,8 +2670,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// getSETFromCond - Return a set opcode for the given condition and /// whether it has memory operand. -static unsigned getSETFromCond(X86::CondCode CC, - bool HasMemoryOperand) { +unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, @@ -2687,14 +2690,14 @@ static unsigned getSETFromCond(X86::CondCode CC, { X86::SETSr, X86::SETSm } }; - assert(CC < 16 && "Can only handle standard cond codes"); + assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); return Opc[CC][HasMemoryOperand ? 1 : 0]; } /// getCMovFromCond - Return a cmov opcode for the given condition, /// register size in bytes, and operand type. -static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { +unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, @@ -2789,11 +2792,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); UnCondBrIter = MBB.end(); @@ -2970,7 +2973,7 @@ canInsertSelect(const MachineBasicBlock &MBB, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { // Not all subtargets have cmov instructions. - if (!TM.getSubtarget().hasCMov()) + if (!Subtarget.hasCMov()) return false; if (Cond.size() != 1) return false; @@ -3021,8 +3024,7 @@ static bool isHReg(unsigned Reg) { // Try and copy between VR128/VR64 and GR64 registers. static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, - const X86Subtarget& Subtarget) { - + const X86Subtarget &Subtarget) { // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) @@ -3065,6 +3067,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, inline static bool MaskRegClassContains(unsigned Reg) { return X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || + X86::VK32RegClass.contains(Reg) || + X86::VK64RegClass.contains(Reg) || X86::VK1RegClass.contains(Reg); } static @@ -3101,8 +3105,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. - bool HasAVX = TM.getSubtarget().hasAVX(); - bool HasAVX512 = TM.getSubtarget().hasAVX512(); + bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -3114,7 +3118,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget().is64Bit()) { + Subtarget.is64Bit()) { Opc = X86::MOV8rr_NOREX; // Both operands must be encodable without an REX prefix. assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && @@ -3131,7 +3135,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; if (!Opc) - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget()); + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -3177,9 +3181,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM, + const X86Subtarget &STI, bool load) { - if (TM.getSubtarget().hasAVX512()) { + if (STI.hasAVX512()) { if (X86::VK8RegClass.hasSubClassEq(RC) || X86::VK16RegClass.hasSubClassEq(RC)) return load ? X86::KMOVWkm : X86::KMOVWmk; @@ -3191,13 +3195,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = STI.hasAVX(); switch (RC->getSize()) { default: llvm_unreachable("Unknown spill size"); case 1: assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); - if (TM.getSubtarget().is64Bit()) + if (STI.is64Bit()) // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) @@ -3264,16 +3268,16 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, - TargetMachine &TM) { - return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false); } static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM) { - return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -3285,9 +3289,10 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); unsigned Alignment = std::max(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + bool isAligned = + (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); @@ -3303,7 +3308,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3321,9 +3326,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); unsigned Alignment = std::max(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + bool isAligned = + (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } @@ -3337,7 +3343,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, unsigned Alignment = std::max(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3549,6 +3555,26 @@ inline static bool isDefConvertible(MachineInstr *MI) { } } +/// isUseDefConvertible - check whether the use can be converted +/// to remove a comparison against zero. +static X86::CondCode isUseDefConvertible(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return X86::COND_INVALID; + case X86::LZCNT16rr: case X86::LZCNT16rm: + case X86::LZCNT32rr: case X86::LZCNT32rm: + case X86::LZCNT64rr: case X86::LZCNT64rm: + return X86::COND_B; + case X86::POPCNT16rr:case X86::POPCNT16rm: + case X86::POPCNT32rr:case X86::POPCNT32rm: + case X86::POPCNT64rr:case X86::POPCNT64rm: + return X86::COND_E; + case X86::TZCNT16rr: case X86::TZCNT16rm: + case X86::TZCNT32rr: case X86::TZCNT32rm: + case X86::TZCNT64rr: case X86::TZCNT64rm: + return X86::COND_B; + } +} + /// optimizeCompareInstr - Check if there exists an earlier instruction that /// operates on the same source operands and sets flags in the same way as /// Compare; remove Compare if possible. @@ -3615,13 +3641,38 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If we are comparing against zero, check whether we can use MI to update // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); - if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() || - !isDefConvertible(MI))) + if (IsCmpZero && MI->getParent() != CmpInstr->getParent()) return false; + // If we have a use of the source register between the def and our compare + // instruction we can eliminate the compare iff the use sets EFLAGS in the + // right way. + bool ShouldUpdateCC = false; + X86::CondCode NewCC = X86::COND_INVALID; + if (IsCmpZero && !isDefConvertible(MI)) { + // Scan forward from the use until we hit the use we're looking for or the + // compare instruction. + for (MachineBasicBlock::iterator J = MI;; ++J) { + // Do we have a convertible instruction? + NewCC = isUseDefConvertible(J); + if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && + J->getOperand(1).getReg() == SrcReg) { + assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); + ShouldUpdateCC = true; // Update CC later on. + // This is not a def of SrcReg, but still a def of EFLAGS. Keep going + // with the new def. + MI = Def = J; + break; + } + + if (J == I) + return false; + } + } + // We are searching for an earlier instruction that can make CmpInstr // redundant and that instruction will be saved in Sub. - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; const TargetRegisterInfo *TRI = &getRegisterInfo(); // We iterate backward, starting from the instruction before CmpInstr and @@ -3634,7 +3685,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, RE = CmpInstr->getParent() == MI->getParent() ? MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ : CmpInstr->getParent()->rend(); - MachineInstr *Movr0Inst = 0; + MachineInstr *Movr0Inst = nullptr; for (; RI != RE; ++RI) { MachineInstr *Instr = &*RI; // Check whether CmpInstr can be made redundant by the current instruction. @@ -3690,7 +3741,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, continue; // EFLAGS is used by this instruction. - X86::CondCode OldCC; + X86::CondCode OldCC = X86::COND_INVALID; bool OpcIsSET = false; if (IsCmpZero || IsSwapped) { // We decode the condition code from opcode. @@ -3716,13 +3767,28 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // CF and OF are used, we can't perform this optimization. return false; } + + // If we're updating the condition code check if we have to reverse the + // condition. + if (ShouldUpdateCC) + switch (OldCC) { + default: + return false; + case X86::COND_E: + break; + case X86::COND_NE: + NewCC = GetOppositeBranchCondition(NewCC); + break; + } } else if (IsSwapped) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. // We swap the condition code and synthesize the new opcode. - X86::CondCode NewCC = getSwappedCondition(OldCC); + NewCC = getSwappedCondition(OldCC); if (NewCC == X86::COND_INVALID) return false; + } + if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) { // Synthesize the new opcode. bool HasMemoryOperand = Instr.hasOneMemOperand(); unsigned NewOpc; @@ -3809,19 +3875,19 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const { if (FoldAsLoadDefReg == 0) - return 0; + return nullptr; // To be conservative, if there exists another load, clear the load candidate. if (MI->mayLoad()) { FoldAsLoadDefReg = 0; - return 0; + return nullptr; } // Check whether we can move DefMI here. DefMI = MRI->getVRegDef(FoldAsLoadDefReg); assert(DefMI); bool SawStore = false; - if (!DefMI->isSafeToMove(this, 0, SawStore)) - return 0; + if (!DefMI->isSafeToMove(this, nullptr, SawStore)) + return nullptr; // We try to commute MI if possible. unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; @@ -3838,12 +3904,12 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, continue; // Do not fold if we have a subreg use or a def or multiple uses. if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) - return 0; + return nullptr; SrcOperandId = i; FoundSrcOperand = true; } - if (!FoundSrcOperand) return 0; + if (!FoundSrcOperand) return nullptr; // Check whether we can fold the def into SrcOperandId. SmallVector Ops; @@ -3857,22 +3923,22 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, if (Idx == 1) { // MI was changed but it didn't help, commute it back! commuteInstruction(MI, false); - return 0; + return nullptr; } // Check whether we can commute MI and enable folding. if (MI->isCommutable()) { MachineInstr *NewMI = commuteInstruction(MI, false); // Unable to commute. - if (!NewMI) return 0; + if (!NewMI) return nullptr; if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - return 0; + return nullptr; } } } - return 0; + return nullptr; } /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr @@ -3898,7 +3964,7 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB, } bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { case X86::MOV32r0: @@ -4007,15 +4073,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl &MOs, unsigned Size, unsigned Align) const { - const DenseMap > *OpcodeTablePtr = 0; - bool isCallRegIndirect = TM.getSubtarget().callRegIndirect(); + const DenseMap > *OpcodeTablePtr = nullptr; + bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; // Atom favors register form of call. So, we do not fold loads into calls // when X86Subtarget is Atom. if (isCallRegIndirect && (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) { - return NULL; + return nullptr; } unsigned NumOps = MI->getDesc().getNumOperands(); @@ -4026,9 +4093,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. if (MI->getOpcode() == X86::ADD32ri && MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) - return NULL; + return nullptr; - MachineInstr *NewMI = NULL; + MachineInstr *NewMI = nullptr; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. @@ -4063,7 +4130,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned Opcode = I->second.first; unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; if (Align < MinAlign) - return NULL; + return nullptr; bool NarrowToMOV32rm = false; if (Size) { unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize(); @@ -4071,12 +4138,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) - return NULL; + return nullptr; // If this is a 64-bit load, but the spill slot is 32, then we can do // a 32-bit load which is implicitly zero-extended. This likely is due // to liveintervalanalysis remat'ing a load from stack slot. if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) - return NULL; + return nullptr; Opcode = X86::MOV32rm; NarrowToMOV32rm = true; } @@ -4105,7 +4172,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; - return NULL; + return nullptr; } /// hasPartialRegUpdate - Return true for all instructions that only update @@ -4249,7 +4316,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, if (X86::VR128RegClass.contains(Reg)) { // These instructions are all floating point domain, so xorps is the best // choice. - bool HasAVX = TM.getSubtarget().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); @@ -4270,14 +4337,14 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, int FrameIndex) const { // Check switch flag - if (NoFusing) return NULL; + if (NoFusing) return nullptr; // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) - return 0; + return nullptr; const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); @@ -4285,12 +4352,13 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, // If the function stack isn't realigned we don't want to fold instructions // that need increased alignment. if (!RI.needsStackRealignment(MF)) - Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment()); + Alignment = std::min( + Alignment, MF.getTarget().getFrameLowering()->getStackAlignment()); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; unsigned RCSize = 0; switch (MI->getOpcode()) { - default: return NULL; + default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; @@ -4299,12 +4367,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. if (Size < RCSize) - return NULL; + return nullptr; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); } else if (Ops.size() != 1) - return NULL; + return nullptr; SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); @@ -4322,14 +4390,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); // Check switch flag - if (NoFusing) return NULL; + if (NoFusing) return nullptr; // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) - return 0; + return nullptr; // Determine the alignment of the load. unsigned Alignment = 0; @@ -4352,12 +4420,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 4; break; default: - return 0; + return nullptr; } if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { - default: return NULL; + default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; @@ -4367,12 +4435,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); } else if (Ops.size() != 1) - return NULL; + return nullptr; // Make sure the subregisters match. // Otherwise we risk changing the size of the load. if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) - return NULL; + return nullptr; SmallVector MOs; switch (LoadMI->getOpcode()) { @@ -4386,21 +4454,21 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. - if (TM.getCodeModel() != CodeModel::Small && - TM.getCodeModel() != CodeModel::Kernel) - return NULL; + if (MF.getTarget().getCodeModel() != CodeModel::Small && + MF.getTarget().getCodeModel() != CodeModel::Kernel) + return nullptr; // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - if (TM.getSubtarget().is64Bit()) + if (MF.getTarget().getRelocationModel() == Reloc::PIC_) { + if (Subtarget.is64Bit()) PICBase = X86::RIP; else // FIXME: PICBase = getGlobalBaseReg(&MF); // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. - return NULL; + return nullptr; } // Create a constant-pool entry. @@ -4436,14 +4504,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, > 4) // These instructions only load 32 bits, we can't fold them if the // destination register is wider than 32 bits (4 bytes). - return NULL; + return nullptr; if ((LoadMI->getOpcode() == X86::MOVSDrm || LoadMI->getOpcode() == X86::VMOVSDrm) && MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() > 8) // These instructions only load 64 bits, we can't fold them if the // destination register is wider than 64 bits (8 bytes). - return NULL; + return nullptr; // Folding a normal load. Just copy the load's address operands. for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) @@ -4489,7 +4557,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap > *OpcodeTablePtr = 0; + const DenseMap > *OpcodeTablePtr = nullptr; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 @@ -4532,7 +4601,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Without memoperands, loadRegFromAddr and storeRegToStackSlot will // conservatively assume the address is unaligned. That's bad for // performance. @@ -4671,7 +4740,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); // Emit the load instruction. - SDNode *Load = 0; + SDNode *Load = nullptr; if (FoldedLoad) { EVT VT = *RC->vt_begin(); std::pair(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned load. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); @@ -4696,7 +4765,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the data processing instruction. std::vector VTs; - const TargetRegisterClass *DstRC = 0; + const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { DstRC = getRegClass(MCID, 0, &RI, MF); VTs.push_back(*DstRC->vt_begin()); @@ -4723,15 +4792,15 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned store. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, AddrOps); + SDNode *Store = + DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget), + dl, MVT::Other, AddrOps); NewNodes.push_back(Store); // Preserve memory reference information. @@ -4892,7 +4961,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. - if (TM.getSubtargetImpl()->is64Bit()) { + if (Subtarget.is64Bit()) { if (NumLoads >= 3) return false; } else if (NumLoads) { @@ -4918,7 +4987,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. - if (!TM.getSubtarget().hasAVX()) + if (!Subtarget.hasAVX()) return false; enum { @@ -4970,6 +5039,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, case X86::TEST16rm: case X86::TEST32rm: case X86::TEST64rm: + case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: case X86::AND16ri8: @@ -5100,7 +5170,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { /// TODO: Eliminate this and move the code to X86MachineFunctionInfo. /// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { - assert(!TM.getSubtarget().is64Bit() && + assert(!Subtarget.is64Bit() && "X86-64 PIC uses RIP relative addressing"); X86MachineFunctionInfo *X86FI = MF->getInfo(); @@ -5190,20 +5260,20 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) if (ReplaceableInstrs[i][domain-1] == opcode) return ReplaceableInstrs[i]; - return 0; + return nullptr; } static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) if (ReplaceableInstrsAVX2[i][domain-1] == opcode) return ReplaceableInstrsAVX2[i]; - return 0; + return nullptr; } std::pair X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; - bool hasAVX2 = TM.getSubtarget().hasAVX2(); + bool hasAVX2 = Subtarget.hasAVX2(); uint16_t validDomains = 0; if (domain && lookup(MI->getOpcode(), domain)) validDomains = 0xe; @@ -5218,7 +5288,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(dom && "Not an SSE instruction"); const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table - assert((TM.getSubtarget().hasAVX2() || Domain < 3) && + assert((Subtarget.hasAVX2() || Domain < 3) && "256-bit vector operations only available in AVX2"); table = lookupAVX2(MI->getOpcode(), dom); } @@ -5231,6 +5301,16 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +void X86InstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + Branch.setOpcode(X86::JMP_4); + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); +} + +void X86InstrInfo::getTrap(MCInst &MI) const { + MI.setOpcode(X86::TRAP); +} + bool X86InstrInfo::isHighLatencyDef(int opc) const { switch (opc) { default: return false; @@ -5327,8 +5407,10 @@ namespace { const X86TargetMachine *TM = static_cast(&MF.getTarget()); - assert(!TM->getSubtarget().is64Bit() && - "X86-64 PIC uses RIP relative addressing"); + // Don't do anything if this is 64-bit as 64-bit PIC + // uses RIP relative addressing. + if (TM->getSubtarget().is64Bit()) + return false; // Only emit a global base reg in PIC mode. if (TM->getRelocationModel() != Reloc::PIC_) @@ -5383,7 +5465,7 @@ namespace { char CGBR::ID = 0; FunctionPass* -llvm::createGlobalBaseRegPass() { return new CGBR(); } +llvm::createX86GlobalBaseRegPass() { return new CGBR(); } namespace { struct LDTLSCleanup : public MachineFunctionPass {