X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=3112dc7382a4bdf15765845794244bafbe7db0bc;hb=22fee2dff4c43b551aefa44a96ca74fcade6bfac;hp=b1ac36f206ebcc05b267929c918e5e97096b077c;hpb=afcd543a55c8cc6859443d9a63d52d9e8615770c;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b1ac36f206e..3112dc7382a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -13,7 +13,6 @@ #include "X86InstrInfo.h" #include "X86.h" -#include "X86GenInstrInfo.inc" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -34,9 +33,11 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" - #include +#define GET_INSTRINFO_MC_DESC +#include "X86GenInstrInfo.inc" + using namespace llvm; static cl::opt @@ -55,6 +56,11 @@ ReMatPICStubLoad("remat-pic-stub-load", X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), TM(tm), RI(tm, *this) { + enum { + TB_NOT_REVERSABLE = 1U << 31, + TB_FLAGS = TB_NOT_REVERSABLE + }; + static const unsigned OpTbl2Addr[][2] = { { X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri8, X86::ADC32mi8 }, @@ -64,13 +70,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ADC64rr, X86::ADC64mr }, { X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, + { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, { X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, + { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, { X86::ADD64ri32, X86::ADD64mi32 }, { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, + { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, { X86::ADD8ri, X86::ADD8mi }, { X86::ADD8rr, X86::ADD8mr }, { X86::AND16ri, X86::AND16mi }, @@ -215,16 +230,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,0))).second) - assert(false && "Duplicated entries?"); + unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; + assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); + + // If this is not a reversible operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load and store, no alignment requirement. unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, - AuxInfo))).second) - assert(false && "Duplicated entries in unfolding maps?"); + + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } // If the third value is 1, then it's folding either a load or a store. @@ -251,8 +271,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV64r, X86::DIV64m, 1, 0 }, { X86::DIV8r, X86::DIV8m, 1, 0 }, { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, { X86::IDIV16r, X86::IDIV16m, 1, 0 }, { X86::IDIV32r, X86::IDIV32m, 1, 0 }, { X86::IDIV64r, X86::IDIV64m, 1, 0 }, @@ -267,7 +287,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV16rr, X86::MOV16mr, 0, 0 }, { X86::MOV32ri, X86::MOV32mi, 0, 0 }, { X86::MOV32rr, X86::MOV32mr, 0, 0 }, - { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, { X86::MOV64rr, X86::MOV64mr, 0, 0 }, { X86::MOV8ri, X86::MOV8mi, 0, 0 }, @@ -311,19 +330,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - unsigned Align = OpTbl0[i][3]; - if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + unsigned RegOp = OpTbl0[i][0]; + unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; unsigned FoldedLoad = OpTbl0[i][2]; + unsigned Align = OpTbl0[i][3]; + assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversible operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl0[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load or store. unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - assert(false && "Duplicated entries in unfolding maps?"); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl1[][3] = { @@ -341,16 +363,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, { X86::IMUL16rri, X86::IMUL16rmi, 0 }, { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, { X86::IMUL32rri, X86::IMUL32rmi, 0 }, { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -379,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, { X86::MOV16rr, X86::MOV16rm, 0 }, { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, @@ -438,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; + unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; unsigned Align = OpTbl1[i][2]; - if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); + RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversible operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl1[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 1, folded load unsigned AuxInfo = 1 | (1 << 4); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - assert(false && "Duplicated entries in unfolding maps?"); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl2[][3] = { { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD8rr, X86::ADD8rm, 0 }, { X86::ADDPDrr, X86::ADDPDrm, 16 }, { X86::ADDPSrr, X86::ADDPSrm, 16 }, @@ -543,6 +568,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, @@ -651,16 +678,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; + unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; unsigned Align = OpTbl2[i][2]; - if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + + assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); + RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversible operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 2, folded load unsigned AuxInfo = 2 | (1 << 4); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - assert(false && "Duplicated entries in unfolding maps?"); + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } } @@ -741,9 +774,7 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: - case X86::MOV32rm_TC: case X86::MOV64rm: - case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -764,9 +795,7 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: - case X86::MOV32mr_TC: case X86::MOV64mr: - case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -781,7 +810,7 @@ static bool isFrameStoreOpcode(int Opcode) { return false; } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) @@ -789,7 +818,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) { unsigned Reg; @@ -889,7 +918,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: @@ -919,10 +947,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, isPICBase = true; } return isPICBase; - } + } return false; } - + case X86::LEA32r: case X86::LEA64r: { if (MI->getOperand(2).isImm() && @@ -1097,9 +1125,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - + // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi @@ -1133,9 +1161,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, break; case X86::ADD16ri: case X86::ADD16ri8: - addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; @@ -1147,7 +1178,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, } else { leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) @@ -1214,7 +1245,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); if (!TM.getSubtarget().hasSSE2()) return 0; - + unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; @@ -1348,18 +1379,27 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, -1); break; case X86::ADD64rr: - case X86::ADD32rr: { + case X86::ADD64rr_DB: + case X86::ADD32rr: + case X86::ADD32rr_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r - : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + unsigned Opc; + TargetRegisterClass *RC; + if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { + Opc = X86::LEA64r; + RC = X86::GR64_NOSPRegisterClass; + } else { + Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + RC = X86::GR32_NOSPRegisterClass; + } + + unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src2) && - !MF.getRegInfo().constrainRegClass(Src2, - MIOpc == X86::ADD64rr ? X86::GR64_NOSPRegisterClass : - X86::GR32_NOSPRegisterClass)) + !MF.getRegInfo().constrainRegClass(Src2, RC)) return 0; NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1370,7 +1410,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Src2, MI, NewMI); break; } - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1386,6 +1427,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD64ri32: case X86::ADD64ri8: + case X86::ADD64ri32_DB: + case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | @@ -1393,7 +1436,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: { + case X86::ADD32ri8: + case X86::ADD32ri_DB: + case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1404,6 +1449,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD16ri: case X86::ADD16ri8: + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1425,7 +1472,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Dest, MI, NewMI); } - MFI->insert(MBBI, NewMI); // Insert the new inst + MFI->insert(MBBI, NewMI); // Insert the new inst return NewMI; } @@ -1644,18 +1691,18 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.isTerminator()) return false; - + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isTerminator()) return false; + // Conditional branch is a special case. - if (TID.isBranch() && !TID.isBarrier()) + if (MCID.isBranch() && !MCID.isBarrier()) return true; - if (!TID.isPredicable()) + if (!MCID.isPredicable()) return true; return !isPredicated(MI); } -bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -1744,7 +1791,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, .addMBB(UnCondBrIter->getOperand(0).getMBB()); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) .addMBB(TargetBB); - MBB.addSuccessor(TargetBB); OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); @@ -1816,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); ++Count; } - + return Count; } @@ -1970,70 +2016,48 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool isStackAligned, const TargetMachine &TM, bool load) { - switch (RC->getID()) { + switch (RC->getSize()) { default: - llvm_unreachable("Unknown regclass"); - case X86::GR64RegClassID: - case X86::GR64_NOSPRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32RegClassID: - case X86::GR32_NOSPRegClassID: - case X86::GR32_ADRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16RegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; - case X86::GR8RegClassID: - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if (isHReg(Reg) && - TM.getSubtarget().is64Bit()) - return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; - else - return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_ABCDRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_ABCDRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_ABCDRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; - case X86::GR8_ABCD_LRegClassID: - return load ? X86::MOV8rm :X86::MOV8mr; - case X86::GR8_ABCD_HRegClassID: + llvm_unreachable("Unknown spill size"); + case 1: + assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); if (TM.getSubtarget().is64Bit()) - return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; - else - return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_NOREXRegClassID: - case X86::GR64_NOREX_NOSPRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_NOREXRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_NOREXRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; - case X86::GR8_NOREXRegClassID: + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_TCRegClassID: - return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; - case X86::GR32_TCRegClassID: - return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; - case X86::RFP80RegClassID: + case 2: + assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass"); + return load ? X86::MOV16rm : X86::MOV16mr; + case 4: + if (X86::GR32RegClass.hasSubClassEq(RC)) + return load ? X86::MOV32rm : X86::MOV32mr; + if (X86::FR32RegClass.hasSubClassEq(RC)) + return load ? X86::MOVSSrm : X86::MOVSSmr; + if (X86::RFP32RegClass.hasSubClassEq(RC)) + return load ? X86::LD_Fp32m : X86::ST_Fp32m; + llvm_unreachable("Unknown 4-byte regclass"); + case 8: + if (X86::GR64RegClass.hasSubClassEq(RC)) + return load ? X86::MOV64rm : X86::MOV64mr; + if (X86::FR64RegClass.hasSubClassEq(RC)) + return load ? X86::MOVSDrm : X86::MOVSDmr; + if (X86::VR64RegClass.hasSubClassEq(RC)) + return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; + if (X86::RFP64RegClass.hasSubClassEq(RC)) + return load ? X86::LD_Fp64m : X86::ST_Fp64m; + llvm_unreachable("Unknown 8-byte regclass"); + case 10: + assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; - case X86::RFP64RegClassID: - return load ? X86::LD_Fp64m : X86::ST_Fp64m; - case X86::RFP32RegClassID: - return load ? X86::LD_Fp32m : X86::ST_Fp32m; - case X86::FR32RegClassID: - return load ? X86::MOVSSrm : X86::MOVSSmr; - case X86::FR64RegClassID: - return load ? X86::MOVSDrm : X86::MOVSDmr; - case X86::VR128RegClassID: + case 16: + assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); // If stack is realigned we can use aligned stores. if (isStackAligned) return load ? X86::MOVAPSrm : X86::MOVAPSmr; else return load ? X86::MOVUPSrm : X86::MOVUPSmr; - case X86::VR64RegClassID: - return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; } } @@ -2060,7 +2084,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) @@ -2092,7 +2117,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); + bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) || + RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); @@ -2114,76 +2140,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } -bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - bool is64Bit = TM.getSubtarget().is64Bit(); - bool isWin64 = TM.getSubtarget().isTargetWin64(); - unsigned SlotSize = is64Bit ? 8 : 4; - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - X86MachineFunctionInfo *X86FI = MF.getInfo(); - unsigned CalleeFrameSize = 0; - - unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, &RI); - } - } - - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - return true; -} - -bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - bool is64Bit = TM.getSubtarget().is64Bit(); - bool isWin64 = TM.getSubtarget().isTargetWin64(); - unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, &RI); - } - } - return true; -} - MachineInstr* X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -2210,7 +2166,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only addOffset(MIB, 0); - + // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; for (unsigned i = 0; i != NumOps; ++i) { @@ -2231,7 +2187,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); MachineInstrBuilder MIB(NewMI); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { @@ -2267,11 +2223,17 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl &MOs, unsigned Size, unsigned Align) const { - const DenseMap > *OpcodeTablePtr=NULL; + const DenseMap > *OpcodeTablePtr = 0; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; + + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOpcode() == X86::ADD32ri && + MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return NULL; MachineInstr *NewMI = NULL; // Folding a memory location into the two-address part of a two-address @@ -2280,7 +2242,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (isTwoAddr && NumOps >= 2 && i < 2 && MI->getOperand(0).isReg() && MI->getOperand(1).isReg() && - MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 @@ -2294,19 +2256,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) return NewMI; - + OpcodeTablePtr = &RegOp2MemOpTable0; } else if (i == 1) { OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - + // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap >::const_iterator I = - OpcodeTablePtr->find((unsigned*)MI->getOpcode()); + DenseMap >::const_iterator I = + OpcodeTablePtr->find(MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; unsigned MinAlign = I->second.second; @@ -2314,7 +2276,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2349,8 +2311,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NewMI; } } - - // No fusion + + // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; @@ -2361,7 +2323,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, int FrameIndex) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2413,7 +2375,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, MachineInstr *LoadMI) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2453,13 +2415,15 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 16; break; case X86::FsFLD0SD: + case X86::VFsFLD0SD: Alignment = 8; break; case X86::FsFLD0SS: + case X86::VFsFLD0SS: Alignment = 4; break; default: - llvm_unreachable("Don't know how to fold this instruction!"); + return 0; } if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; @@ -2519,9 +2483,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; unsigned Opc = LoadMI->getOpcode(); - if (Opc == X86::FsFLD0SS) + if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (Opc == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); @@ -2554,17 +2518,23 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl &Ops) const { - // Check switch flag + // Check switch flag if (NoFusing) return 0; if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { switch (MI->getOpcode()) { default: return false; - case X86::TEST8rr: + case X86::TEST8rr: case X86::TEST16rr: case X86::TEST32rr: case X86::TEST64rr: return true; + case X86::ADD32ri: + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return false; + break; } } @@ -2575,21 +2545,20 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, unsigned Opc = MI->getOpcode(); unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && - MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap > *OpcodeTablePtr=NULL; - if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + const DenseMap > *OpcodeTablePtr = 0; + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - return true; + case X86::MOV64r0: return true; default: break; } OpcodeTablePtr = &RegOp2MemOpTable0; @@ -2598,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - - if (OpcodeTablePtr) { - // Find the Opcode to fuse - DenseMap >::const_iterator I = - OpcodeTablePtr->find((unsigned*)Opc); - if (I != OpcodeTablePtr->end()) - return true; - } + + if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) + return true; return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { - DenseMap >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); + DenseMap >::const_iterator I = + MemOp2RegOpTable.find(MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; @@ -2627,9 +2591,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, return false; UnfoldStore &= FoldedStore; - const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.getRegClass(&RI); + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && !TM.getSubtarget().isUnalignedMemAccessFast()) @@ -2671,9 +2634,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); + MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); - + if (FoldedStore) MIB.addReg(Reg, RegState::Define); for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) @@ -2724,7 +2687,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); std::pair MMOs = MF.extractStoreMemRefs(MI->memoperands_begin(), @@ -2741,17 +2704,17 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); + DenseMap >::const_iterator I = + MemOp2RegOpTable.find(N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; unsigned Index = I->second.second & 0xf; bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); - const TargetInstrDesc &TID = get(Opc); - const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); - unsigned NumDefs = TID.NumDefs; + const MCInstrDesc &MCID = get(Opc); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + unsigned NumDefs = MCID.NumDefs; std::vector AddrOps; std::vector BeforeOps; std::vector AfterOps; @@ -2795,13 +2758,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the data processing instruction. std::vector VTs; const TargetRegisterClass *DstRC = 0; - if (TID.getNumDefs() > 0) { - DstRC = TID.OpInfo[0].getRegClass(&RI); + if (MCID.getNumDefs() > 0) { + DstRC = getRegClass(MCID, 0, &RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) + if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs()) VTs.push_back(VT); } if (Load) @@ -2842,8 +2805,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)Opc); + DenseMap >::const_iterator I = + MemOp2RegOpTable.find(Opc); if (I == MemOp2RegOpTable.end()) return 0; bool FoldedLoad = I->second.second & (1 << 4); @@ -2881,11 +2844,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } switch (Opc2) { @@ -2905,11 +2866,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } @@ -3121,6 +3080,45 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +bool X86InstrInfo::isHighLatencyDef(int opc) const { + switch (opc) { + default: return false; + case X86::DIVSDrm: + case X86::DIVSDrm_Int: + case X86::DIVSDrr: + case X86::DIVSDrr_Int: + case X86::DIVSSrm: + case X86::DIVSSrm_Int: + case X86::DIVSSrr: + case X86::DIVSSrr_Int: + case X86::SQRTPDm: + case X86::SQRTPDm_Int: + case X86::SQRTPDr: + case X86::SQRTPDr_Int: + case X86::SQRTPSm: + case X86::SQRTPSm_Int: + case X86::SQRTPSr: + case X86::SQRTPSr_Int: + case X86::SQRTSDm: + case X86::SQRTSDm_Int: + case X86::SQRTSDr: + case X86::SQRTSDr_Int: + case X86::SQRTSSm: + case X86::SQRTSSm_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return true; + } +} + +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + return isHighLatencyDef(DefMI->getOpcode()); +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. @@ -3158,11 +3156,11 @@ namespace { PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); else PC = GlobalBaseReg; - + // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - + // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. if (TM->getSubtarget().isPICStyleGOT()) {