X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=d91e48d2737f0ae6f3b5d384813afb3a2553054f;hp=2323f5790ab0f6d1534600b828e9a3bf4bcdeaaf;hb=9edf7deb37f0f97664f279040fa15d89f32e23d9;hpb=8d1f0dd983a29d4e8d6c840085538fc9ba7427a1 diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2323f5790ab..d91e48d2737 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -597,7 +598,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, { X86::PMULHWrr, X86::PMULHWrm, 16 }, { X86::PMULLDrr, X86::PMULLDrm, 16 }, - { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, { X86::PMULLWrr, X86::PMULLWrm, 16 }, { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, { X86::PORrr, X86::PORrm, 16 }, @@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVZX32rr8: case X86::MOVSX64rr8: case X86::MOVZX64rr8: - SubIdx = 1; + SubIdx = X86::sub_8bit; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: case X86::MOVZX64rr16: - SubIdx = 3; + SubIdx = X86::sub_16bit; break; case X86::MOVSX64rr32: case X86::MOVZX64rr32: - SubIdx = 4; + SubIdx = X86::sub_32bit; break; } return true; @@ -1064,13 +1064,8 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(I); - - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } + const TargetRegisterInfo &TRI) const { + DebugLoc DL = Orig->getDebugLoc(); // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. @@ -1098,14 +1093,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); } else { - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); + NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1154,7 +1148,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) .addReg(leaInReg) .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1198,7 +1192,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) .addReg(leaInReg2) .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1212,7 +1206,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); if (LV) { // Update live variables @@ -1685,8 +1679,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; // Working from the bottom, when we see a non-terminator instruction, we're // done. @@ -1700,6 +1697,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Handle unconditional branches. if (I->getOpcode() == X86::JMP_4) { + UnCondBrIter = I; + if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1717,10 +1716,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, TBB = 0; I->eraseFromParent(); I = MBB.end(); + UnCondBrIter = MBB.end(); continue; } - // TBB is used to indicate the unconditinal destination. + // TBB is used to indicate the unconditional destination. TBB = I->getOperand(0).getMBB(); continue; } @@ -1732,6 +1732,45 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { + MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); + if (AllowModify && UnCondBrIter != MBB.end() && + MBB.isLayoutSuccessor(TargetBB)) { + // If we can modify the code and it ends in something like: + // + // jCC L1 + // jmp L2 + // L1: + // ... + // L2: + // + // Then we can change this to: + // + // jnCC L2 + // L1: + // ... + // L2: + // + // Which is a bit more efficient. + // We conditionally jump to the fall-through block. + BranchCode = GetOppositeBranchCondition(BranchCode); + unsigned JNCC = GetCondBranchFromCond(BranchCode); + MachineBasicBlock::iterator OldInst = I; + + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) + .addMBB(UnCondBrIter->getOperand(0).getMBB()); + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) + .addMBB(TargetBB); + MBB.addSuccessor(TargetBB); + + OldInst->eraseFromParent(); + UnCondBrIter->eraseFromParent(); + + // Restart the analysis. + UnCondBrIter = MBB.end(); + I = MBB.end(); + continue; + } + FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); @@ -1783,6 +1822,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; + if (I->isDebugValue()) + continue; if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; @@ -1800,7 +1841,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond) const { // FIXME this should probably have a DebugLoc operand - DebugLoc dl = DebugLoc::getUnknownLoc(); + DebugLoc dl; // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1854,8 +1895,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL = MBB.findDebugLoc(MI); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -1946,12 +1987,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcReg != X86::EFLAGS) return false; if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); + BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; } else if (DestRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; } @@ -1960,12 +2001,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFQ)); + BuildMI(MBB, MI, DL, get(X86::POPF64)); return true; } else if (SrcRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFD)); + BuildMI(MBB, MI, DL, get(X86::POPF32)); return true; } } @@ -2086,7 +2127,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -2104,7 +2146,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, SmallVectorImpl &NewMIs) const { bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); + DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); @@ -2183,7 +2225,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); @@ -2199,7 +2242,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &NewMIs) const { bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); + DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); @@ -2209,7 +2252,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI) const { + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2227,17 +2271,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + &X86::VR128RegClass, &RI); } } @@ -2247,7 +2291,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI) const { + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2263,16 +2308,29 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &X86::VR128RegClass, &RI); } } return true; } +MachineInstr* +X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + X86AddressMode AM; + AM.BaseType = X86AddressMode::FrameIndexBase; + AM.Base.FrameIndex = FrameIx; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); + addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, const SmallVectorImpl &MOs, MachineInstr *MI, @@ -2418,9 +2476,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned DstReg = NewMI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, - 4/*x86_subreg_32bit*/)); + X86::sub_32bit)); else - NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + NewMI->getOperand(0).setSubReg(X86::sub_32bit); } return NewMI; } @@ -2466,9 +2524,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; } // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2515,7 +2573,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: Alignment = 16; break; @@ -2533,9 +2593,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; } // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); @@ -2545,11 +2605,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector MOs; switch (LoadMI->getOpcode()) { - case X86::V_SET0: + case X86::V_SET0PS: + case X86::V_SET0PD: + case X86::V_SET0PI: case X86::V_SETALLONES: case X86::FsFLD0SD: case X86::FsFLD0SS: { - // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. + // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. @@ -2579,7 +2641,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -2741,16 +2803,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: + case X86::CMP64ri8: case X86::CMP32ri: + case X86::CMP32ri8: case X86::CMP16ri: + case X86::CMP16ri8: case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { switch (DataMI->getOpcode()) { default: break; + case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri8: case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri8: case X86::CMP16ri: NewOpc = X86::TEST16rr; break; case X86::CMP8ri: NewOpc = X86::TEST8rr; break; } @@ -2954,10 +3022,6 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, Load1->getOperand(2) == Load2->getOperand(2)) { if (cast(Load1->getOperand(1))->getZExtValue() != 1) return false; - SDValue Op2 = Load1->getOperand(2); - if (!isa(Op2) || - cast(Op2)->getReg() != 0) - return 0; // Now let's examine the displacements. if (isa(Load1->getOperand(3)) && @@ -3403,6 +3467,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: break; case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -3600,7 +3665,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot determine size: " << MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } @@ -3658,3 +3723,56 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } + +// These are the replaceable SSE instructions. Some of these have Int variants +// that we don't include here. We don't want to replace instructions selected +// by intrinsics. +static const unsigned ReplaceableInstrs[][3] = { + //PackedInt PackedSingle PackedDouble + { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, + { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, + { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, + { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr }, + { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm }, + { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr }, + { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm }, + { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr }, + { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm }, + { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr }, + { X86::ORPSrm, X86::ORPDrm, X86::PORrm }, + { X86::ORPSrr, X86::ORPDrr, X86::PORrr }, + { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, + { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, + { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, +}; + +// FIXME: Some shuffle and unpack instructions have equivalents in different +// domains, but they require a bit more work than just switching opcodes. + +static const unsigned *lookup(unsigned opcode, unsigned domain) { + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) + if (ReplaceableInstrs[i][domain-1] == opcode) + return ReplaceableInstrs[i]; + return 0; +} + +std::pair +X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const { + uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + return std::make_pair(domain, + domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); +} + +void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { + assert(Domain>0 && Domain<4 && "Invalid execution domain"); + uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + assert(dom && "Not an SSE instruction"); + const unsigned *table = lookup(MI->getOpcode(), dom); + assert(table && "Cannot change domain"); + MI->setDesc(get(table[Domain-1])); +} + +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(X86::NOOP); +}