X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86RegisterInfo.cpp;h=3c882eaa14e0cd723c8e371ed25e555b65ef5502;hb=f5da13367f88f06e3b585dc2263ab6e9ca6c4bf8;hp=73026bbfda431782b2184285b6fbf8310a87ae07;hpb=171049d10f71fdeffdfd9592243d7af40db86c71;p=oota-llvm.git diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 73026bbfda4..3c882eaa14e 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -15,19 +15,23 @@ #include "X86.h" #include "X86RegisterInfo.h" #include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Type.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/STLExtras.h" -#include - using namespace llvm; namespace { @@ -41,31 +45,54 @@ namespace { cl::Hidden); } -X86RegisterInfo::X86RegisterInfo() - : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP) {} +X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, + const TargetInstrInfo &tii) + : X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), + TM(tm), TII(tii) { + // Cache some information. + const X86Subtarget *Subtarget = &TM.getSubtarget(); + Is64Bit = Subtarget->is64Bit(); + if (Is64Bit) { + SlotSize = 8; + StackPtr = X86::RSP; + FramePtr = X86::RBP; + } else { + SlotSize = 4; + StackPtr = X86::ESP; + FramePtr = X86::EBP; + } +} void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, int FrameIdx, const TargetRegisterClass *RC) const { unsigned Opc; - if (RC == &X86::R32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64mr; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32mr; - } else if (RC == &X86::R8RegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::R16RegClass) { + } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; + } else if (RC == &X86::GR8RegClass) { + Opc = X86::MOV8mr; + } else if (RC == &X86::GR32_RegClass) { + Opc = X86::MOV32_mr; + } else if (RC == &X86::GR16_RegClass) { + Opc = X86::MOV16_mr; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FpST64m; - } else if (RC == &X86::V4F4RegClass) { + } else if (RC == &X86::FR32RegClass) { Opc = X86::MOVSSmr; - } else if (RC == &X86::V2F8RegClass) { + } else if (RC == &X86::FR64RegClass) { Opc = X86::MOVSDmr; + } else if (RC == &X86::VR128RegClass) { + Opc = X86::MOVAPSmr; } else { assert(0 && "Unknown regclass"); abort(); } - addFrameReference(BuildMI(MBB, MI, Opc, 5), FrameIdx).addReg(SrcReg); + addFrameReference(BuildMI(MBB, MI, TII.get(Opc)), FrameIdx).addReg(SrcReg); } void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -73,23 +100,31 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC) const{ unsigned Opc; - if (RC == &X86::R32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64rm; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32rm; - } else if (RC == &X86::R8RegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::R16RegClass) { + } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; + } else if (RC == &X86::GR8RegClass) { + Opc = X86::MOV8rm; + } else if (RC == &X86::GR32_RegClass) { + Opc = X86::MOV32_rm; + } else if (RC == &X86::GR16_RegClass) { + Opc = X86::MOV16_rm; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FpLD64m; - } else if (RC == &X86::V4F4RegClass) { + } else if (RC == &X86::FR32RegClass) { Opc = X86::MOVSSrm; - } else if (RC == &X86::V2F8RegClass) { + } else if (RC == &X86::FR64RegClass) { Opc = X86::MOVSDrm; + } else if (RC == &X86::VR128RegClass) { + Opc = X86::MOVAPSrm; } else { assert(0 && "Unknown regclass"); abort(); } - addFrameReference(BuildMI(MBB, MI, Opc, 4, DestReg), FrameIdx); + addFrameReference(BuildMI(MBB, MI, TII.get(Opc), DestReg), FrameIdx); } void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, @@ -97,314 +132,749 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *RC) const { unsigned Opc; - if (RC == &X86::R32RegClass) { + if (RC == &X86::GR64RegClass) { + Opc = X86::MOV64rr; + } else if (RC == &X86::GR32RegClass) { Opc = X86::MOV32rr; - } else if (RC == &X86::R8RegClass) { - Opc = X86::MOV8rr; - } else if (RC == &X86::R16RegClass) { + } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rr; + } else if (RC == &X86::GR8RegClass) { + Opc = X86::MOV8rr; + } else if (RC == &X86::GR32_RegClass) { + Opc = X86::MOV32_rr; + } else if (RC == &X86::GR16_RegClass) { + Opc = X86::MOV16_rr; } else if (RC == &X86::RFPRegClass || RC == &X86::RSTRegClass) { Opc = X86::FpMOV; - } else if (RC == &X86::V4F4RegClass) { - Opc = X86::MOVSSrr; - } else if (RC == &X86::V2F8RegClass) { - Opc = X86::MOVSDrr; + } else if (RC == &X86::FR32RegClass) { + Opc = X86::FsMOVAPSrr; + } else if (RC == &X86::FR64RegClass) { + Opc = X86::FsMOVAPDrr; + } else if (RC == &X86::VR128RegClass) { + Opc = X86::MOVAPSrr; } else { assert(0 && "Unknown regclass"); abort(); } - BuildMI(MBB, MI, Opc, 1, DestReg).addReg(SrcReg); + BuildMI(MBB, MI, TII.get(Opc), DestReg).addReg(SrcReg); } -unsigned X86RegisterInfo::isLoadFromStackSlot(MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV32rm: - case X86::FpLD64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - if (MI->getOperand(1).isFrameIndex() && MI->getOperand(2).isImmediate() && - MI->getOperand(3).isRegister() && MI->getOperand(4).isImmediate() && - MI->getOperand(2).getImmedValue() == 1 && - MI->getOperand(3).getReg() == 0 && - MI->getOperand(4).getImmedValue() == 0) { - FrameIndex = MI->getOperand(1).getFrameIndex(); - return MI->getOperand(0).getReg(); - } - break; +static MachineInstr *FuseTwoAddrInst(unsigned Opcode, unsigned FrameIndex, + MachineInstr *MI, + const TargetInstrInfo &TII) { + unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2; + // Create the base instruction with the memory operand as the first part. + MachineInstrBuilder MIB = addFrameReference(BuildMI(TII.get(Opcode)), + FrameIndex); + + // Loop over the rest of the ri operands, converting them over. + for (unsigned i = 0; i != NumOps; ++i) { + MachineOperand &MO = MI->getOperand(i+2); + if (MO.isReg()) + MIB = MIB.addReg(MO.getReg(), false, MO.isImplicit()); + else if (MO.isImm()) + MIB = MIB.addImm(MO.getImm()); + else if (MO.isGlobalAddress()) + MIB = MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset()); + else if (MO.isJumpTableIndex()) + MIB = MIB.addJumpTableIndex(MO.getJumpTableIndex()); + else + assert(0 && "Unknown operand type!"); } - return 0; + return MIB; } - -static MachineInstr *MakeMInst(unsigned Opcode, unsigned FrameIndex, - MachineInstr *MI) { - return addFrameReference(BuildMI(Opcode, 4), FrameIndex); +static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo, + unsigned FrameIndex, MachineInstr *MI, + const TargetInstrInfo &TII) { + MachineInstrBuilder MIB = BuildMI(TII.get(Opcode)); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (i == OpNo) { + assert(MO.isReg() && "Expected to fold into reg operand!"); + MIB = addFrameReference(MIB, FrameIndex); + } else if (MO.isReg()) + MIB = MIB.addReg(MO.getReg(), MO.isDef(), MO.isImplicit()); + else if (MO.isImm()) + MIB = MIB.addImm(MO.getImm()); + else if (MO.isGlobalAddress()) + MIB = MIB.addGlobalAddress(MO.getGlobal(), MO.getOffset()); + else if (MO.isJumpTableIndex()) + MIB = MIB.addJumpTableIndex(MO.getJumpTableIndex()); + else + assert(0 && "Unknown operand for FuseInst!"); + } + return MIB; } -static MachineInstr *MakeMRInst(unsigned Opcode, unsigned FrameIndex, +static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, + unsigned Opcode, unsigned FrameIndex, MachineInstr *MI) { - return addFrameReference(BuildMI(Opcode, 5), FrameIndex) - .addReg(MI->getOperand(1).getReg()); + return addFrameReference(BuildMI(TII.get(Opcode)), FrameIndex).addImm(0); } -static MachineInstr *MakeMRIInst(unsigned Opcode, unsigned FrameIndex, - MachineInstr *MI) { - return addFrameReference(BuildMI(Opcode, 6), FrameIndex) - .addReg(MI->getOperand(1).getReg()) - .addZImm(MI->getOperand(2).getImmedValue()); -} -static MachineInstr *MakeMIInst(unsigned Opcode, unsigned FrameIndex, - MachineInstr *MI) { - if (MI->getOperand(1).isImmediate()) - return addFrameReference(BuildMI(Opcode, 5), FrameIndex) - .addZImm(MI->getOperand(1).getImmedValue()); - else if (MI->getOperand(1).isGlobalAddress()) - return addFrameReference(BuildMI(Opcode, 5), FrameIndex) - .addGlobalAddress(MI->getOperand(1).getGlobal()); - assert(0 && "Unknown operand for MakeMI!"); - return 0; +//===----------------------------------------------------------------------===// +// Efficient Lookup Table Support +//===----------------------------------------------------------------------===// + +namespace { + /// TableEntry - Maps the 'from' opcode to a fused form of the 'to' opcode. + /// + struct TableEntry { + unsigned from; // Original opcode. + unsigned to; // New opcode. + + // less operators used by STL search. + bool operator<(const TableEntry &TE) const { return from < TE.from; } + friend bool operator<(const TableEntry &TE, unsigned V) { + return TE.from < V; + } + friend bool operator<(unsigned V, const TableEntry &TE) { + return V < TE.from; + } + }; } -static MachineInstr *MakeRMInst(unsigned Opcode, unsigned FrameIndex, - MachineInstr *MI) { - const MachineOperand& op = MI->getOperand(0); - return addFrameReference(BuildMI(Opcode, 5, op.getReg(), op.getUseType()), - FrameIndex); +/// TableIsSorted - Return true if the table is in 'from' opcode order. +/// +static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { + for (unsigned i = 1; i != NumEntries; ++i) + if (!(Table[i-1] < Table[i])) { + cerr << "Entries out of order " << Table[i-1].from + << " " << Table[i].from << "\n"; + return false; + } + return true; } -static MachineInstr *MakeRMIInst(unsigned Opcode, unsigned FrameIndex, - MachineInstr *MI) { - const MachineOperand& op = MI->getOperand(0); - return addFrameReference(BuildMI(Opcode, 6, op.getReg(), op.getUseType()), - FrameIndex).addZImm(MI->getOperand(2).getImmedValue()); +/// TableLookup - Return the table entry matching the specified opcode. +/// Otherwise return NULL. +static const TableEntry *TableLookup(const TableEntry *Table, unsigned N, + unsigned Opcode) { + const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); + if (I != Table+N && I->from == Opcode) + return I; + return NULL; } +#define ARRAY_SIZE(TABLE) \ + (sizeof(TABLE)/sizeof(TABLE[0])) + +#ifdef NDEBUG +#define ASSERT_SORTED(TABLE) +#else +#define ASSERT_SORTED(TABLE) \ + { static bool TABLE##Checked = false; \ + if (!TABLE##Checked) { \ + assert(TableIsSorted(TABLE, ARRAY_SIZE(TABLE)) && \ + "All lookup tables must be sorted for efficient access!"); \ + TABLE##Checked = true; \ + } \ + } +#endif + -MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI, +MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, int FrameIndex) const { + // Check switch flag if (NoFusing) return NULL; - /// FIXME: This should obviously be autogenerated by tablegen when patterns - /// are available! - MachineBasicBlock& MBB = *MI->getParent(); - if (i == 0) { - switch(MI->getOpcode()) { - case X86::XCHG8rr: return MakeMRInst(X86::XCHG8mr ,FrameIndex, MI); - case X86::XCHG16rr: return MakeMRInst(X86::XCHG16mr,FrameIndex, MI); - case X86::XCHG32rr: return MakeMRInst(X86::XCHG32mr,FrameIndex, MI); - case X86::MOV8rr: return MakeMRInst(X86::MOV8mr , FrameIndex, MI); - case X86::MOV16rr: return MakeMRInst(X86::MOV16mr, FrameIndex, MI); - case X86::MOV32rr: return MakeMRInst(X86::MOV32mr, FrameIndex, MI); - case X86::MOV8ri: return MakeMIInst(X86::MOV8mi , FrameIndex, MI); - case X86::MOV16ri: return MakeMIInst(X86::MOV16mi, FrameIndex, MI); - case X86::MOV32ri: return MakeMIInst(X86::MOV32mi, FrameIndex, MI); - case X86::MUL8r: return MakeMInst( X86::MUL8m , FrameIndex, MI); - case X86::MUL16r: return MakeMInst( X86::MUL16m, FrameIndex, MI); - case X86::MUL32r: return MakeMInst( X86::MUL32m, FrameIndex, MI); - case X86::IMUL8r: return MakeMInst( X86::IMUL8m , FrameIndex, MI); - case X86::IMUL16r: return MakeMInst( X86::IMUL16m, FrameIndex, MI); - case X86::IMUL32r: return MakeMInst( X86::IMUL32m, FrameIndex, MI); - case X86::DIV8r: return MakeMInst( X86::DIV8m , FrameIndex, MI); - case X86::DIV16r: return MakeMInst( X86::DIV16m, FrameIndex, MI); - case X86::DIV32r: return MakeMInst( X86::DIV32m, FrameIndex, MI); - case X86::IDIV8r: return MakeMInst( X86::IDIV8m , FrameIndex, MI); - case X86::IDIV16r: return MakeMInst( X86::IDIV16m, FrameIndex, MI); - case X86::IDIV32r: return MakeMInst( X86::IDIV32m, FrameIndex, MI); - case X86::NEG8r: return MakeMInst( X86::NEG8m , FrameIndex, MI); - case X86::NEG16r: return MakeMInst( X86::NEG16m, FrameIndex, MI); - case X86::NEG32r: return MakeMInst( X86::NEG32m, FrameIndex, MI); - case X86::NOT8r: return MakeMInst( X86::NOT8m , FrameIndex, MI); - case X86::NOT16r: return MakeMInst( X86::NOT16m, FrameIndex, MI); - case X86::NOT32r: return MakeMInst( X86::NOT32m, FrameIndex, MI); - case X86::INC8r: return MakeMInst( X86::INC8m , FrameIndex, MI); - case X86::INC16r: return MakeMInst( X86::INC16m, FrameIndex, MI); - case X86::INC32r: return MakeMInst( X86::INC32m, FrameIndex, MI); - case X86::DEC8r: return MakeMInst( X86::DEC8m , FrameIndex, MI); - case X86::DEC16r: return MakeMInst( X86::DEC16m, FrameIndex, MI); - case X86::DEC32r: return MakeMInst( X86::DEC32m, FrameIndex, MI); - case X86::ADD8rr: return MakeMRInst(X86::ADD8mr , FrameIndex, MI); - case X86::ADD16rr: return MakeMRInst(X86::ADD16mr, FrameIndex, MI); - case X86::ADD32rr: return MakeMRInst(X86::ADD32mr, FrameIndex, MI); - case X86::ADC32rr: return MakeMRInst(X86::ADC32mr, FrameIndex, MI); - case X86::ADC32ri: return MakeMIInst(X86::ADC32mi, FrameIndex, MI); - case X86::ADD8ri: return MakeMIInst(X86::ADD8mi , FrameIndex, MI); - case X86::ADD16ri: return MakeMIInst(X86::ADD16mi, FrameIndex, MI); - case X86::ADD32ri: return MakeMIInst(X86::ADD32mi, FrameIndex, MI); - case X86::SUB8rr: return MakeMRInst(X86::SUB8mr , FrameIndex, MI); - case X86::SUB16rr: return MakeMRInst(X86::SUB16mr, FrameIndex, MI); - case X86::SUB32rr: return MakeMRInst(X86::SUB32mr, FrameIndex, MI); - case X86::SBB32rr: return MakeMRInst(X86::SBB32mr, FrameIndex, MI); - case X86::SBB8ri: return MakeMIInst(X86::SBB8mi, FrameIndex, MI); - case X86::SBB16ri: return MakeMIInst(X86::SBB16mi, FrameIndex, MI); - case X86::SBB32ri: return MakeMIInst(X86::SBB32mi, FrameIndex, MI); - case X86::SUB8ri: return MakeMIInst(X86::SUB8mi , FrameIndex, MI); - case X86::SUB16ri: return MakeMIInst(X86::SUB16mi, FrameIndex, MI); - case X86::SUB32ri: return MakeMIInst(X86::SUB32mi, FrameIndex, MI); - case X86::AND8rr: return MakeMRInst(X86::AND8mr , FrameIndex, MI); - case X86::AND16rr: return MakeMRInst(X86::AND16mr, FrameIndex, MI); - case X86::AND32rr: return MakeMRInst(X86::AND32mr, FrameIndex, MI); - case X86::AND8ri: return MakeMIInst(X86::AND8mi , FrameIndex, MI); - case X86::AND16ri: return MakeMIInst(X86::AND16mi, FrameIndex, MI); - case X86::AND32ri: return MakeMIInst(X86::AND32mi, FrameIndex, MI); - case X86::OR8rr: return MakeMRInst(X86::OR8mr , FrameIndex, MI); - case X86::OR16rr: return MakeMRInst(X86::OR16mr, FrameIndex, MI); - case X86::OR32rr: return MakeMRInst(X86::OR32mr, FrameIndex, MI); - case X86::OR8ri: return MakeMIInst(X86::OR8mi , FrameIndex, MI); - case X86::OR16ri: return MakeMIInst(X86::OR16mi, FrameIndex, MI); - case X86::OR32ri: return MakeMIInst(X86::OR32mi, FrameIndex, MI); - case X86::XOR8rr: return MakeMRInst(X86::XOR8mr , FrameIndex, MI); - case X86::XOR16rr: return MakeMRInst(X86::XOR16mr, FrameIndex, MI); - case X86::XOR32rr: return MakeMRInst(X86::XOR32mr, FrameIndex, MI); - case X86::XOR8ri: return MakeMIInst(X86::XOR8mi , FrameIndex, MI); - case X86::XOR16ri: return MakeMIInst(X86::XOR16mi, FrameIndex, MI); - case X86::XOR32ri: return MakeMIInst(X86::XOR32mi, FrameIndex, MI); - case X86::SHL8rCL: return MakeMInst( X86::SHL8mCL ,FrameIndex, MI); - case X86::SHL16rCL: return MakeMInst( X86::SHL16mCL,FrameIndex, MI); - case X86::SHL32rCL: return MakeMInst( X86::SHL32mCL,FrameIndex, MI); - case X86::SHL8ri: return MakeMIInst(X86::SHL8mi , FrameIndex, MI); - case X86::SHL16ri: return MakeMIInst(X86::SHL16mi, FrameIndex, MI); - case X86::SHL32ri: return MakeMIInst(X86::SHL32mi, FrameIndex, MI); - case X86::SHR8rCL: return MakeMInst( X86::SHR8mCL ,FrameIndex, MI); - case X86::SHR16rCL: return MakeMInst( X86::SHR16mCL,FrameIndex, MI); - case X86::SHR32rCL: return MakeMInst( X86::SHR32mCL,FrameIndex, MI); - case X86::SHR8ri: return MakeMIInst(X86::SHR8mi , FrameIndex, MI); - case X86::SHR16ri: return MakeMIInst(X86::SHR16mi, FrameIndex, MI); - case X86::SHR32ri: return MakeMIInst(X86::SHR32mi, FrameIndex, MI); - case X86::SAR8rCL: return MakeMInst( X86::SAR8mCL ,FrameIndex, MI); - case X86::SAR16rCL: return MakeMInst( X86::SAR16mCL,FrameIndex, MI); - case X86::SAR32rCL: return MakeMInst( X86::SAR32mCL,FrameIndex, MI); - case X86::SAR8ri: return MakeMIInst(X86::SAR8mi , FrameIndex, MI); - case X86::SAR16ri: return MakeMIInst(X86::SAR16mi, FrameIndex, MI); - case X86::SAR32ri: return MakeMIInst(X86::SAR32mi, FrameIndex, MI); - case X86::ROL8rCL: return MakeMInst( X86::ROL8mCL ,FrameIndex, MI); - case X86::ROL16rCL: return MakeMInst( X86::ROL16mCL,FrameIndex, MI); - case X86::ROL32rCL: return MakeMInst( X86::ROL32mCL,FrameIndex, MI); - case X86::ROL8ri: return MakeMIInst(X86::ROL8mi , FrameIndex, MI); - case X86::ROL16ri: return MakeMIInst(X86::ROL16mi, FrameIndex, MI); - case X86::ROL32ri: return MakeMIInst(X86::ROL32mi, FrameIndex, MI); - case X86::ROR8rCL: return MakeMInst( X86::ROR8mCL ,FrameIndex, MI); - case X86::ROR16rCL: return MakeMInst( X86::ROR16mCL,FrameIndex, MI); - case X86::ROR32rCL: return MakeMInst( X86::ROR32mCL,FrameIndex, MI); - case X86::ROR8ri: return MakeMIInst(X86::ROR8mi , FrameIndex, MI); - case X86::ROR16ri: return MakeMIInst(X86::ROR16mi, FrameIndex, MI); - case X86::ROR32ri: return MakeMIInst(X86::ROR32mi, FrameIndex, MI); - case X86::SHLD32rrCL:return MakeMRInst( X86::SHLD32mrCL,FrameIndex, MI); - case X86::SHLD32rri8:return MakeMRIInst(X86::SHLD32mri8,FrameIndex, MI); - case X86::SHRD32rrCL:return MakeMRInst( X86::SHRD32mrCL,FrameIndex, MI); - case X86::SHRD32rri8:return MakeMRIInst(X86::SHRD32mri8,FrameIndex, MI); - case X86::SHLD16rrCL:return MakeMRInst( X86::SHLD16mrCL,FrameIndex, MI); - case X86::SHLD16rri8:return MakeMRIInst(X86::SHLD16mri8,FrameIndex, MI); - case X86::SHRD16rrCL:return MakeMRInst( X86::SHRD16mrCL,FrameIndex, MI); - case X86::SHRD16rri8:return MakeMRIInst(X86::SHRD16mri8,FrameIndex, MI); - case X86::SETBr: return MakeMInst( X86::SETBm, FrameIndex, MI); - case X86::SETAEr: return MakeMInst( X86::SETAEm, FrameIndex, MI); - case X86::SETEr: return MakeMInst( X86::SETEm, FrameIndex, MI); - case X86::SETNEr: return MakeMInst( X86::SETNEm, FrameIndex, MI); - case X86::SETBEr: return MakeMInst( X86::SETBEm, FrameIndex, MI); - case X86::SETAr: return MakeMInst( X86::SETAm, FrameIndex, MI); - case X86::SETSr: return MakeMInst( X86::SETSm, FrameIndex, MI); - case X86::SETNSr: return MakeMInst( X86::SETNSm, FrameIndex, MI); - case X86::SETPr: return MakeMInst( X86::SETPm, FrameIndex, MI); - case X86::SETNPr: return MakeMInst( X86::SETNPm, FrameIndex, MI); - case X86::SETLr: return MakeMInst( X86::SETLm, FrameIndex, MI); - case X86::SETGEr: return MakeMInst( X86::SETGEm, FrameIndex, MI); - case X86::SETLEr: return MakeMInst( X86::SETLEm, FrameIndex, MI); - case X86::SETGr: return MakeMInst( X86::SETGm, FrameIndex, MI); - case X86::TEST8rr: return MakeMRInst(X86::TEST8mr ,FrameIndex, MI); - case X86::TEST16rr: return MakeMRInst(X86::TEST16mr,FrameIndex, MI); - case X86::TEST32rr: return MakeMRInst(X86::TEST32mr,FrameIndex, MI); - case X86::TEST8ri: return MakeMIInst(X86::TEST8mi ,FrameIndex, MI); - case X86::TEST16ri: return MakeMIInst(X86::TEST16mi,FrameIndex, MI); - case X86::TEST32ri: return MakeMIInst(X86::TEST32mi,FrameIndex, MI); - case X86::CMP8rr: return MakeMRInst(X86::CMP8mr , FrameIndex, MI); - case X86::CMP16rr: return MakeMRInst(X86::CMP16mr, FrameIndex, MI); - case X86::CMP32rr: return MakeMRInst(X86::CMP32mr, FrameIndex, MI); - case X86::CMP8ri: return MakeMIInst(X86::CMP8mi , FrameIndex, MI); - case X86::CMP16ri: return MakeMIInst(X86::CMP16mi, FrameIndex, MI); - case X86::CMP32ri: return MakeMIInst(X86::CMP32mi, FrameIndex, MI); + // Table (and size) to search + const TableEntry *OpcodeTablePtr = NULL; + unsigned OpcodeTableSize = 0; + bool isTwoAddrFold = false; + unsigned NumOps = TII.getNumOperands(MI->getOpcode()); + bool isTwoAddr = NumOps > 1 && + MI->getInstrDescriptor()->getOperandConstraint(1, TOI::TIED_TO) != -1; + + MachineInstr *NewMI = NULL; + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + if (isTwoAddr && NumOps >= 2 && i < 2 && + MI->getOperand(0).isReg() && + MI->getOperand(1).isReg() && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + static const TableEntry OpcodeTable[] = { + { X86::ADC32ri, X86::ADC32mi }, + { X86::ADC32ri8, X86::ADC32mi8 }, + { X86::ADC32rr, X86::ADC32mr }, + { X86::ADC64ri32, X86::ADC64mi32 }, + { X86::ADC64ri8, X86::ADC64mi8 }, + { X86::ADC64rr, X86::ADC64mr }, + { X86::ADD16ri, X86::ADD16mi }, + { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD32ri, X86::ADD32mi }, + { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD64ri32, X86::ADD64mi32 }, + { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD8ri, X86::ADD8mi }, + { X86::ADD8rr, X86::ADD8mr }, + { X86::AND16ri, X86::AND16mi }, + { X86::AND16ri8, X86::AND16mi8 }, + { X86::AND16rr, X86::AND16mr }, + { X86::AND32ri, X86::AND32mi }, + { X86::AND32ri8, X86::AND32mi8 }, + { X86::AND32rr, X86::AND32mr }, + { X86::AND64ri32, X86::AND64mi32 }, + { X86::AND64ri8, X86::AND64mi8 }, + { X86::AND64rr, X86::AND64mr }, + { X86::AND8ri, X86::AND8mi }, + { X86::AND8rr, X86::AND8mr }, + { X86::DEC16r, X86::DEC16m }, + { X86::DEC32r, X86::DEC32m }, + { X86::DEC64_16r, X86::DEC16m }, + { X86::DEC64_32r, X86::DEC32m }, + { X86::DEC64r, X86::DEC64m }, + { X86::DEC8r, X86::DEC8m }, + { X86::INC16r, X86::INC16m }, + { X86::INC32r, X86::INC32m }, + { X86::INC64_16r, X86::INC16m }, + { X86::INC64_32r, X86::INC32m }, + { X86::INC64r, X86::INC64m }, + { X86::INC8r, X86::INC8m }, + { X86::NEG16r, X86::NEG16m }, + { X86::NEG32r, X86::NEG32m }, + { X86::NEG64r, X86::NEG64m }, + { X86::NEG8r, X86::NEG8m }, + { X86::NOT16r, X86::NOT16m }, + { X86::NOT32r, X86::NOT32m }, + { X86::NOT64r, X86::NOT64m }, + { X86::NOT8r, X86::NOT8m }, + { X86::OR16ri, X86::OR16mi }, + { X86::OR16ri8, X86::OR16mi8 }, + { X86::OR16rr, X86::OR16mr }, + { X86::OR32ri, X86::OR32mi }, + { X86::OR32ri8, X86::OR32mi8 }, + { X86::OR32rr, X86::OR32mr }, + { X86::OR64ri32, X86::OR64mi32 }, + { X86::OR64ri8, X86::OR64mi8 }, + { X86::OR64rr, X86::OR64mr }, + { X86::OR8ri, X86::OR8mi }, + { X86::OR8rr, X86::OR8mr }, + { X86::ROL16r1, X86::ROL16m1 }, + { X86::ROL16rCL, X86::ROL16mCL }, + { X86::ROL16ri, X86::ROL16mi }, + { X86::ROL32r1, X86::ROL32m1 }, + { X86::ROL32rCL, X86::ROL32mCL }, + { X86::ROL32ri, X86::ROL32mi }, + { X86::ROL64r1, X86::ROL64m1 }, + { X86::ROL64rCL, X86::ROL64mCL }, + { X86::ROL64ri, X86::ROL64mi }, + { X86::ROL8r1, X86::ROL8m1 }, + { X86::ROL8rCL, X86::ROL8mCL }, + { X86::ROL8ri, X86::ROL8mi }, + { X86::ROR16r1, X86::ROR16m1 }, + { X86::ROR16rCL, X86::ROR16mCL }, + { X86::ROR16ri, X86::ROR16mi }, + { X86::ROR32r1, X86::ROR32m1 }, + { X86::ROR32rCL, X86::ROR32mCL }, + { X86::ROR32ri, X86::ROR32mi }, + { X86::ROR64r1, X86::ROR64m1 }, + { X86::ROR64rCL, X86::ROR64mCL }, + { X86::ROR64ri, X86::ROR64mi }, + { X86::ROR8r1, X86::ROR8m1 }, + { X86::ROR8rCL, X86::ROR8mCL }, + { X86::ROR8ri, X86::ROR8mi }, + { X86::SAR16r1, X86::SAR16m1 }, + { X86::SAR16rCL, X86::SAR16mCL }, + { X86::SAR16ri, X86::SAR16mi }, + { X86::SAR32r1, X86::SAR32m1 }, + { X86::SAR32rCL, X86::SAR32mCL }, + { X86::SAR32ri, X86::SAR32mi }, + { X86::SAR64r1, X86::SAR64m1 }, + { X86::SAR64rCL, X86::SAR64mCL }, + { X86::SAR64ri, X86::SAR64mi }, + { X86::SAR8r1, X86::SAR8m1 }, + { X86::SAR8rCL, X86::SAR8mCL }, + { X86::SAR8ri, X86::SAR8mi }, + { X86::SBB32ri, X86::SBB32mi }, + { X86::SBB32ri8, X86::SBB32mi8 }, + { X86::SBB32rr, X86::SBB32mr }, + { X86::SBB64ri32, X86::SBB64mi32 }, + { X86::SBB64ri8, X86::SBB64mi8 }, + { X86::SBB64rr, X86::SBB64mr }, + { X86::SHL16r1, X86::SHL16m1 }, + { X86::SHL16rCL, X86::SHL16mCL }, + { X86::SHL16ri, X86::SHL16mi }, + { X86::SHL32r1, X86::SHL32m1 }, + { X86::SHL32rCL, X86::SHL32mCL }, + { X86::SHL32ri, X86::SHL32mi }, + { X86::SHL64r1, X86::SHL64m1 }, + { X86::SHL64rCL, X86::SHL64mCL }, + { X86::SHL64ri, X86::SHL64mi }, + { X86::SHL8r1, X86::SHL8m1 }, + { X86::SHL8rCL, X86::SHL8mCL }, + { X86::SHL8ri, X86::SHL8mi }, + { X86::SHLD16rrCL, X86::SHLD16mrCL }, + { X86::SHLD16rri8, X86::SHLD16mri8 }, + { X86::SHLD32rrCL, X86::SHLD32mrCL }, + { X86::SHLD32rri8, X86::SHLD32mri8 }, + { X86::SHLD64rrCL, X86::SHLD64mrCL }, + { X86::SHLD64rri8, X86::SHLD64mri8 }, + { X86::SHR16r1, X86::SHR16m1 }, + { X86::SHR16rCL, X86::SHR16mCL }, + { X86::SHR16ri, X86::SHR16mi }, + { X86::SHR32r1, X86::SHR32m1 }, + { X86::SHR32rCL, X86::SHR32mCL }, + { X86::SHR32ri, X86::SHR32mi }, + { X86::SHR64r1, X86::SHR64m1 }, + { X86::SHR64rCL, X86::SHR64mCL }, + { X86::SHR64ri, X86::SHR64mi }, + { X86::SHR8r1, X86::SHR8m1 }, + { X86::SHR8rCL, X86::SHR8mCL }, + { X86::SHR8ri, X86::SHR8mi }, + { X86::SHRD16rrCL, X86::SHRD16mrCL }, + { X86::SHRD16rri8, X86::SHRD16mri8 }, + { X86::SHRD32rrCL, X86::SHRD32mrCL }, + { X86::SHRD32rri8, X86::SHRD32mri8 }, + { X86::SHRD64rrCL, X86::SHRD64mrCL }, + { X86::SHRD64rri8, X86::SHRD64mri8 }, + { X86::SUB16ri, X86::SUB16mi }, + { X86::SUB16ri8, X86::SUB16mi8 }, + { X86::SUB16rr, X86::SUB16mr }, + { X86::SUB32ri, X86::SUB32mi }, + { X86::SUB32ri8, X86::SUB32mi8 }, + { X86::SUB32rr, X86::SUB32mr }, + { X86::SUB64ri32, X86::SUB64mi32 }, + { X86::SUB64ri8, X86::SUB64mi8 }, + { X86::SUB64rr, X86::SUB64mr }, + { X86::SUB8ri, X86::SUB8mi }, + { X86::SUB8rr, X86::SUB8mr }, + { X86::XOR16ri, X86::XOR16mi }, + { X86::XOR16ri8, X86::XOR16mi8 }, + { X86::XOR16rr, X86::XOR16mr }, + { X86::XOR32ri, X86::XOR32mi }, + { X86::XOR32ri8, X86::XOR32mi8 }, + { X86::XOR32rr, X86::XOR32mr }, + { X86::XOR64ri32, X86::XOR64mi32 }, + { X86::XOR64ri8, X86::XOR64mi8 }, + { X86::XOR64rr, X86::XOR64mr }, + { X86::XOR8ri, X86::XOR8mi }, + { X86::XOR8rr, X86::XOR8mr } + }; + ASSERT_SORTED(OpcodeTable); + OpcodeTablePtr = OpcodeTable; + OpcodeTableSize = ARRAY_SIZE(OpcodeTable); + isTwoAddrFold = true; + } else if (i == 0) { // If operand 0 + if (MI->getOpcode() == X86::MOV16r0) + NewMI = MakeM0Inst(TII, X86::MOV16mi, FrameIndex, MI); + else if (MI->getOpcode() == X86::MOV32r0) + NewMI = MakeM0Inst(TII, X86::MOV32mi, FrameIndex, MI); + else if (MI->getOpcode() == X86::MOV64r0) + NewMI = MakeM0Inst(TII, X86::MOV64mi32, FrameIndex, MI); + else if (MI->getOpcode() == X86::MOV8r0) + NewMI = MakeM0Inst(TII, X86::MOV8mi, FrameIndex, MI); + if (NewMI) { + NewMI->copyKillDeadInfo(MI); + return NewMI; } + + static const TableEntry OpcodeTable[] = { + { X86::CMP16ri, X86::CMP16mi }, + { X86::CMP16ri8, X86::CMP16mi8 }, + { X86::CMP32ri, X86::CMP32mi }, + { X86::CMP32ri8, X86::CMP32mi8 }, + { X86::CMP8ri, X86::CMP8mi }, + { X86::DIV16r, X86::DIV16m }, + { X86::DIV32r, X86::DIV32m }, + { X86::DIV64r, X86::DIV64m }, + { X86::DIV8r, X86::DIV8m }, + { X86::FsMOVAPDrr, X86::MOVSDmr }, + { X86::FsMOVAPSrr, X86::MOVSSmr }, + { X86::IDIV16r, X86::IDIV16m }, + { X86::IDIV32r, X86::IDIV32m }, + { X86::IDIV64r, X86::IDIV64m }, + { X86::IDIV8r, X86::IDIV8m }, + { X86::IMUL16r, X86::IMUL16m }, + { X86::IMUL32r, X86::IMUL32m }, + { X86::IMUL64r, X86::IMUL64m }, + { X86::IMUL8r, X86::IMUL8m }, + { X86::MOV16ri, X86::MOV16mi }, + { X86::MOV16rr, X86::MOV16mr }, + { X86::MOV32ri, X86::MOV32mi }, + { X86::MOV32rr, X86::MOV32mr }, + { X86::MOV64ri32, X86::MOV64mi32 }, + { X86::MOV64rr, X86::MOV64mr }, + { X86::MOV8ri, X86::MOV8mi }, + { X86::MOV8rr, X86::MOV8mr }, + { X86::MOVAPDrr, X86::MOVAPDmr }, + { X86::MOVAPSrr, X86::MOVAPSmr }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr }, + { X86::MOVPQIto64rr,X86::MOVPQIto64mr }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr }, + { X86::MOVSDrr, X86::MOVSDmr }, + { X86::MOVSSrr, X86::MOVSSmr }, + { X86::MOVUPDrr, X86::MOVUPDmr }, + { X86::MOVUPSrr, X86::MOVUPSmr }, + { X86::MUL16r, X86::MUL16m }, + { X86::MUL32r, X86::MUL32m }, + { X86::MUL64r, X86::MUL64m }, + { X86::MUL8r, X86::MUL8m }, + { X86::SETAEr, X86::SETAEm }, + { X86::SETAr, X86::SETAm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETBr, X86::SETBm }, + { X86::SETEr, X86::SETEm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETGr, X86::SETGm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETLr, X86::SETLm }, + { X86::SETNEr, X86::SETNEm }, + { X86::SETNPr, X86::SETNPm }, + { X86::SETNSr, X86::SETNSm }, + { X86::SETPr, X86::SETPm }, + { X86::SETSr, X86::SETSm }, + { X86::TEST16ri, X86::TEST16mi }, + { X86::TEST32ri, X86::TEST32mi }, + { X86::TEST64ri32, X86::TEST64mi32 }, + { X86::TEST8ri, X86::TEST8mi }, + { X86::XCHG16rr, X86::XCHG16mr }, + { X86::XCHG32rr, X86::XCHG32mr }, + { X86::XCHG64rr, X86::XCHG64mr }, + { X86::XCHG8rr, X86::XCHG8mr } + }; + ASSERT_SORTED(OpcodeTable); + OpcodeTablePtr = OpcodeTable; + OpcodeTableSize = ARRAY_SIZE(OpcodeTable); } else if (i == 1) { - switch(MI->getOpcode()) { - case X86::XCHG8rr: return MakeRMInst(X86::XCHG8rm ,FrameIndex, MI); - case X86::XCHG16rr: return MakeRMInst(X86::XCHG16rm,FrameIndex, MI); - case X86::XCHG32rr: return MakeRMInst(X86::XCHG32rm,FrameIndex, MI); - case X86::MOV8rr: return MakeRMInst(X86::MOV8rm , FrameIndex, MI); - case X86::MOV16rr: return MakeRMInst(X86::MOV16rm, FrameIndex, MI); - case X86::MOV32rr: return MakeRMInst(X86::MOV32rm, FrameIndex, MI); - case X86::CMOVB16rr: return MakeRMInst(X86::CMOVB16rm , FrameIndex, MI); - case X86::CMOVB32rr: return MakeRMInst(X86::CMOVB32rm , FrameIndex, MI); - case X86::CMOVAE16rr: return MakeRMInst(X86::CMOVAE16rm , FrameIndex, MI); - case X86::CMOVAE32rr: return MakeRMInst(X86::CMOVAE32rm , FrameIndex, MI); - case X86::CMOVE16rr: return MakeRMInst(X86::CMOVE16rm , FrameIndex, MI); - case X86::CMOVE32rr: return MakeRMInst(X86::CMOVE32rm , FrameIndex, MI); - case X86::CMOVNE16rr:return MakeRMInst(X86::CMOVNE16rm, FrameIndex, MI); - case X86::CMOVNE32rr:return MakeRMInst(X86::CMOVNE32rm, FrameIndex, MI); - case X86::CMOVBE16rr:return MakeRMInst(X86::CMOVBE16rm, FrameIndex, MI); - case X86::CMOVBE32rr:return MakeRMInst(X86::CMOVBE32rm, FrameIndex, MI); - case X86::CMOVA16rr:return MakeRMInst(X86::CMOVA16rm, FrameIndex, MI); - case X86::CMOVA32rr:return MakeRMInst(X86::CMOVA32rm, FrameIndex, MI); - case X86::CMOVS16rr: return MakeRMInst(X86::CMOVS16rm , FrameIndex, MI); - case X86::CMOVS32rr: return MakeRMInst(X86::CMOVS32rm , FrameIndex, MI); - case X86::CMOVNS16rr: return MakeRMInst(X86::CMOVNS16rm , FrameIndex, MI); - case X86::CMOVNS32rr: return MakeRMInst(X86::CMOVNS32rm , FrameIndex, MI); - case X86::CMOVP16rr: return MakeRMInst(X86::CMOVP16rm , FrameIndex, MI); - case X86::CMOVP32rr: return MakeRMInst(X86::CMOVP32rm , FrameIndex, MI); - case X86::CMOVNP16rr: return MakeRMInst(X86::CMOVNP16rm , FrameIndex, MI); - case X86::CMOVNP32rr: return MakeRMInst(X86::CMOVNP32rm , FrameIndex, MI); - case X86::CMOVL16rr: return MakeRMInst(X86::CMOVL16rm , FrameIndex, MI); - case X86::CMOVL32rr: return MakeRMInst(X86::CMOVL32rm , FrameIndex, MI); - case X86::CMOVGE16rr: return MakeRMInst(X86::CMOVGE16rm , FrameIndex, MI); - case X86::CMOVGE32rr: return MakeRMInst(X86::CMOVGE32rm , FrameIndex, MI); - case X86::CMOVLE16rr: return MakeRMInst(X86::CMOVLE16rm , FrameIndex, MI); - case X86::CMOVLE32rr: return MakeRMInst(X86::CMOVLE32rm , FrameIndex, MI); - case X86::CMOVG16rr: return MakeRMInst(X86::CMOVG16rm , FrameIndex, MI); - case X86::CMOVG32rr: return MakeRMInst(X86::CMOVG32rm , FrameIndex, MI); - case X86::ADD8rr: return MakeRMInst(X86::ADD8rm , FrameIndex, MI); - case X86::ADD16rr: return MakeRMInst(X86::ADD16rm, FrameIndex, MI); - case X86::ADD32rr: return MakeRMInst(X86::ADD32rm, FrameIndex, MI); - case X86::ADC32rr: return MakeRMInst(X86::ADC32rm, FrameIndex, MI); - case X86::SUB8rr: return MakeRMInst(X86::SUB8rm , FrameIndex, MI); - case X86::SUB16rr: return MakeRMInst(X86::SUB16rm, FrameIndex, MI); - case X86::SUB32rr: return MakeRMInst(X86::SUB32rm, FrameIndex, MI); - case X86::SBB32rr: return MakeRMInst(X86::SBB32rm, FrameIndex, MI); - case X86::AND8rr: return MakeRMInst(X86::AND8rm , FrameIndex, MI); - case X86::AND16rr: return MakeRMInst(X86::AND16rm, FrameIndex, MI); - case X86::AND32rr: return MakeRMInst(X86::AND32rm, FrameIndex, MI); - case X86::OR8rr: return MakeRMInst(X86::OR8rm , FrameIndex, MI); - case X86::OR16rr: return MakeRMInst(X86::OR16rm, FrameIndex, MI); - case X86::OR32rr: return MakeRMInst(X86::OR32rm, FrameIndex, MI); - case X86::XOR8rr: return MakeRMInst(X86::XOR8rm , FrameIndex, MI); - case X86::XOR16rr: return MakeRMInst(X86::XOR16rm, FrameIndex, MI); - case X86::XOR32rr: return MakeRMInst(X86::XOR32rm, FrameIndex, MI); - case X86::TEST8rr: return MakeRMInst(X86::TEST8rm ,FrameIndex, MI); - case X86::TEST16rr: return MakeRMInst(X86::TEST16rm,FrameIndex, MI); - case X86::TEST32rr: return MakeRMInst(X86::TEST32rm,FrameIndex, MI); - case X86::IMUL16rr: return MakeRMInst(X86::IMUL16rm,FrameIndex, MI); - case X86::IMUL32rr: return MakeRMInst(X86::IMUL32rm,FrameIndex, MI); - case X86::IMUL16rri: return MakeRMIInst(X86::IMUL16rmi, FrameIndex, MI); - case X86::IMUL32rri: return MakeRMIInst(X86::IMUL32rmi, FrameIndex, MI); - case X86::CMP8rr: return MakeRMInst(X86::CMP8rm , FrameIndex, MI); - case X86::CMP16rr: return MakeRMInst(X86::CMP16rm, FrameIndex, MI); - case X86::CMP32rr: return MakeRMInst(X86::CMP32rm, FrameIndex, MI); - case X86::MOVSX16rr8:return MakeRMInst(X86::MOVSX16rm8 , FrameIndex, MI); - case X86::MOVSX32rr8:return MakeRMInst(X86::MOVSX32rm8, FrameIndex, MI); - case X86::MOVSX32rr16:return MakeRMInst(X86::MOVSX32rm16, FrameIndex, MI); - case X86::MOVZX16rr8:return MakeRMInst(X86::MOVZX16rm8 , FrameIndex, MI); - case X86::MOVZX32rr8: return MakeRMInst(X86::MOVZX32rm8, FrameIndex, MI); - case X86::MOVZX32rr16:return MakeRMInst(X86::MOVZX32rm16, FrameIndex, MI); + static const TableEntry OpcodeTable[] = { + { X86::CMP16rr, X86::CMP16rm }, + { X86::CMP32rr, X86::CMP32rm }, + { X86::CMP64ri32, X86::CMP64mi32 }, + { X86::CMP64ri8, X86::CMP64mi8 }, + { X86::CMP64rr, X86::CMP64rm }, + { X86::CMP8rr, X86::CMP8rm }, + { X86::CMPPDrri, X86::CMPPDrmi }, + { X86::CMPPSrri, X86::CMPPSrmi }, + { X86::CMPSDrr, X86::CMPSDrm }, + { X86::CMPSSrr, X86::CMPSSrm }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, + { X86::FsMOVAPDrr, X86::MOVSDrm }, + { X86::FsMOVAPSrr, X86::MOVSSrm }, + { X86::IMUL16rri, X86::IMUL16rmi }, + { X86::IMUL16rri8, X86::IMUL16rmi8 }, + { X86::IMUL32rri, X86::IMUL32rmi }, + { X86::IMUL32rri8, X86::IMUL32rmi8 }, + { X86::IMUL64rr, X86::IMUL64rm }, + { X86::IMUL64rri32, X86::IMUL64rmi32 }, + { X86::IMUL64rri8, X86::IMUL64rmi8 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, + { X86::Int_COMISDrr, X86::Int_COMISDrm }, + { X86::Int_COMISSrr, X86::Int_COMISSrm }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, + { X86::MOV16rr, X86::MOV16rm }, + { X86::MOV32rr, X86::MOV32rm }, + { X86::MOV64rr, X86::MOV64rm }, + { X86::MOV64toPQIrr, X86::MOV64toPQIrm }, + { X86::MOV8rr, X86::MOV8rm }, + { X86::MOVAPDrr, X86::MOVAPDrm }, + { X86::MOVAPSrr, X86::MOVAPSrm }, + { X86::MOVDDUPrr, X86::MOVDDUPrm }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, + { X86::MOVSDrr, X86::MOVSDrm }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, + { X86::MOVSSrr, X86::MOVSSrm }, + { X86::MOVSX16rr8, X86::MOVSX16rm8 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8 }, + { X86::MOVUPDrr, X86::MOVUPDrm }, + { X86::MOVUPSrr, X86::MOVUPSrm }, + { X86::MOVZX16rr8, X86::MOVZX16rm8 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8 }, + { X86::PSHUFDri, X86::PSHUFDmi }, + { X86::PSHUFHWri, X86::PSHUFHWmi }, + { X86::PSHUFLWri, X86::PSHUFLWmi }, + { X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 }, + { X86::TEST16rr, X86::TEST16rm }, + { X86::TEST32rr, X86::TEST32rm }, + { X86::TEST64rr, X86::TEST64rm }, + { X86::TEST8rr, X86::TEST8rm }, + // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 + { X86::UCOMISDrr, X86::UCOMISDrm }, + { X86::UCOMISSrr, X86::UCOMISSrm }, + { X86::XCHG16rr, X86::XCHG16rm }, + { X86::XCHG32rr, X86::XCHG32rm }, + { X86::XCHG64rr, X86::XCHG64rm }, + { X86::XCHG8rr, X86::XCHG8rm } + }; + ASSERT_SORTED(OpcodeTable); + OpcodeTablePtr = OpcodeTable; + OpcodeTableSize = ARRAY_SIZE(OpcodeTable); + } else if (i == 2) { + static const TableEntry OpcodeTable[] = { + { X86::ADC32rr, X86::ADC32rm }, + { X86::ADC64rr, X86::ADC64rm }, + { X86::ADD16rr, X86::ADD16rm }, + { X86::ADD32rr, X86::ADD32rm }, + { X86::ADD64rr, X86::ADD64rm }, + { X86::ADD8rr, X86::ADD8rm }, + { X86::ADDPDrr, X86::ADDPDrm }, + { X86::ADDPSrr, X86::ADDPSrm }, + { X86::ADDSDrr, X86::ADDSDrm }, + { X86::ADDSSrr, X86::ADDSSrm }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, + { X86::AND16rr, X86::AND16rm }, + { X86::AND32rr, X86::AND32rm }, + { X86::AND64rr, X86::AND64rm }, + { X86::AND8rr, X86::AND8rm }, + { X86::ANDNPDrr, X86::ANDNPDrm }, + { X86::ANDNPSrr, X86::ANDNPSrm }, + { X86::ANDPDrr, X86::ANDPDrm }, + { X86::ANDPSrr, X86::ANDPSrm }, + { X86::CMOVA16rr, X86::CMOVA16rm }, + { X86::CMOVA32rr, X86::CMOVA32rm }, + { X86::CMOVA64rr, X86::CMOVA64rm }, + { X86::CMOVAE16rr, X86::CMOVAE16rm }, + { X86::CMOVAE32rr, X86::CMOVAE32rm }, + { X86::CMOVAE64rr, X86::CMOVAE64rm }, + { X86::CMOVB16rr, X86::CMOVB16rm }, + { X86::CMOVB32rr, X86::CMOVB32rm }, + { X86::CMOVB64rr, X86::CMOVB64rm }, + { X86::CMOVBE16rr, X86::CMOVBE16rm }, + { X86::CMOVBE32rr, X86::CMOVBE32rm }, + { X86::CMOVBE64rr, X86::CMOVBE64rm }, + { X86::CMOVE16rr, X86::CMOVE16rm }, + { X86::CMOVE32rr, X86::CMOVE32rm }, + { X86::CMOVE64rr, X86::CMOVE64rm }, + { X86::CMOVG16rr, X86::CMOVG16rm }, + { X86::CMOVG32rr, X86::CMOVG32rm }, + { X86::CMOVG64rr, X86::CMOVG64rm }, + { X86::CMOVGE16rr, X86::CMOVGE16rm }, + { X86::CMOVGE32rr, X86::CMOVGE32rm }, + { X86::CMOVGE64rr, X86::CMOVGE64rm }, + { X86::CMOVL16rr, X86::CMOVL16rm }, + { X86::CMOVL32rr, X86::CMOVL32rm }, + { X86::CMOVL64rr, X86::CMOVL64rm }, + { X86::CMOVLE16rr, X86::CMOVLE16rm }, + { X86::CMOVLE32rr, X86::CMOVLE32rm }, + { X86::CMOVLE64rr, X86::CMOVLE64rm }, + { X86::CMOVNE16rr, X86::CMOVNE16rm }, + { X86::CMOVNE32rr, X86::CMOVNE32rm }, + { X86::CMOVNE64rr, X86::CMOVNE64rm }, + { X86::CMOVNP16rr, X86::CMOVNP16rm }, + { X86::CMOVNP32rr, X86::CMOVNP32rm }, + { X86::CMOVNP64rr, X86::CMOVNP64rm }, + { X86::CMOVNS16rr, X86::CMOVNS16rm }, + { X86::CMOVNS32rr, X86::CMOVNS32rm }, + { X86::CMOVNS64rr, X86::CMOVNS64rm }, + { X86::CMOVP16rr, X86::CMOVP16rm }, + { X86::CMOVP32rr, X86::CMOVP32rm }, + { X86::CMOVP64rr, X86::CMOVP64rm }, + { X86::CMOVS16rr, X86::CMOVS16rm }, + { X86::CMOVS32rr, X86::CMOVS32rm }, + { X86::CMOVS64rr, X86::CMOVS64rm }, + { X86::DIVPDrr, X86::DIVPDrm }, + { X86::DIVPSrr, X86::DIVPSrm }, + { X86::DIVSDrr, X86::DIVSDrm }, + { X86::DIVSSrr, X86::DIVSSrm }, + { X86::HADDPDrr, X86::HADDPDrm }, + { X86::HADDPSrr, X86::HADDPSrm }, + { X86::HSUBPDrr, X86::HSUBPDrm }, + { X86::HSUBPSrr, X86::HSUBPSrm }, + { X86::IMUL16rr, X86::IMUL16rm }, + { X86::IMUL32rr, X86::IMUL32rm }, + { X86::MAXPDrr, X86::MAXPDrm }, + { X86::MAXPSrr, X86::MAXPSrm }, + { X86::MINPDrr, X86::MINPDrm }, + { X86::MINPSrr, X86::MINPSrm }, + { X86::MULPDrr, X86::MULPDrm }, + { X86::MULPSrr, X86::MULPSrm }, + { X86::MULSDrr, X86::MULSDrm }, + { X86::MULSSrr, X86::MULSSrm }, + { X86::OR16rr, X86::OR16rm }, + { X86::OR32rr, X86::OR32rm }, + { X86::OR64rr, X86::OR64rm }, + { X86::OR8rr, X86::OR8rm }, + { X86::ORPDrr, X86::ORPDrm }, + { X86::ORPSrr, X86::ORPSrm }, + { X86::PACKSSDWrr, X86::PACKSSDWrm }, + { X86::PACKSSWBrr, X86::PACKSSWBrm }, + { X86::PACKUSWBrr, X86::PACKUSWBrm }, + { X86::PADDBrr, X86::PADDBrm }, + { X86::PADDDrr, X86::PADDDrm }, + { X86::PADDSBrr, X86::PADDSBrm }, + { X86::PADDSWrr, X86::PADDSWrm }, + { X86::PADDWrr, X86::PADDWrm }, + { X86::PANDNrr, X86::PANDNrm }, + { X86::PANDrr, X86::PANDrm }, + { X86::PAVGBrr, X86::PAVGBrm }, + { X86::PAVGWrr, X86::PAVGWrm }, + { X86::PCMPEQBrr, X86::PCMPEQBrm }, + { X86::PCMPEQDrr, X86::PCMPEQDrm }, + { X86::PCMPEQWrr, X86::PCMPEQWrm }, + { X86::PCMPGTBrr, X86::PCMPGTBrm }, + { X86::PCMPGTDrr, X86::PCMPGTDrm }, + { X86::PCMPGTWrr, X86::PCMPGTWrm }, + { X86::PINSRWrri, X86::PINSRWrmi }, + { X86::PMADDWDrr, X86::PMADDWDrm }, + { X86::PMAXSWrr, X86::PMAXSWrm }, + { X86::PMAXUBrr, X86::PMAXUBrm }, + { X86::PMINSWrr, X86::PMINSWrm }, + { X86::PMINUBrr, X86::PMINUBrm }, + { X86::PMULHUWrr, X86::PMULHUWrm }, + { X86::PMULHWrr, X86::PMULHWrm }, + { X86::PMULLWrr, X86::PMULLWrm }, + { X86::PMULUDQrr, X86::PMULUDQrm }, + { X86::PORrr, X86::PORrm }, + { X86::PSADBWrr, X86::PSADBWrm }, + { X86::PSLLDrr, X86::PSLLDrm }, + { X86::PSLLQrr, X86::PSLLQrm }, + { X86::PSLLWrr, X86::PSLLWrm }, + { X86::PSRADrr, X86::PSRADrm }, + { X86::PSRAWrr, X86::PSRAWrm }, + { X86::PSRLDrr, X86::PSRLDrm }, + { X86::PSRLQrr, X86::PSRLQrm }, + { X86::PSRLWrr, X86::PSRLWrm }, + { X86::PSUBBrr, X86::PSUBBrm }, + { X86::PSUBDrr, X86::PSUBDrm }, + { X86::PSUBSBrr, X86::PSUBSBrm }, + { X86::PSUBSWrr, X86::PSUBSWrm }, + { X86::PSUBWrr, X86::PSUBWrm }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, + { X86::PXORrr, X86::PXORrm }, + { X86::RCPPSr, X86::RCPPSm }, + { X86::RSQRTPSr, X86::RSQRTPSm }, + { X86::SBB32rr, X86::SBB32rm }, + { X86::SBB64rr, X86::SBB64rm }, + { X86::SHUFPDrri, X86::SHUFPDrmi }, + { X86::SHUFPSrri, X86::SHUFPSrmi }, + { X86::SQRTPDr, X86::SQRTPDm }, + { X86::SQRTPSr, X86::SQRTPSm }, + { X86::SQRTSDr, X86::SQRTSDm }, + { X86::SQRTSSr, X86::SQRTSSm }, + { X86::SUB16rr, X86::SUB16rm }, + { X86::SUB32rr, X86::SUB32rm }, + { X86::SUB64rr, X86::SUB64rm }, + { X86::SUB8rr, X86::SUB8rm }, + { X86::SUBPDrr, X86::SUBPDrm }, + { X86::SUBPSrr, X86::SUBPSrm }, + { X86::SUBSDrr, X86::SUBSDrm }, + { X86::SUBSSrr, X86::SUBSSrm }, + // FIXME: TEST*rr -> swapped operand of TEST*mr. + { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, + { X86::XOR16rr, X86::XOR16rm }, + { X86::XOR32rr, X86::XOR32rm }, + { X86::XOR64rr, X86::XOR64rm }, + { X86::XOR8rr, X86::XOR8rm }, + { X86::XORPDrr, X86::XORPDrm }, + { X86::XORPSrr, X86::XORPSrm } + }; + ASSERT_SORTED(OpcodeTable); + OpcodeTablePtr = OpcodeTable; + OpcodeTableSize = ARRAY_SIZE(OpcodeTable); + } + + // If table selected... + if (OpcodeTablePtr) { + // Find the Opcode to fuse + unsigned fromOpcode = MI->getOpcode(); + // Lookup fromOpcode in table + if (const TableEntry *Entry = TableLookup(OpcodeTablePtr, OpcodeTableSize, + fromOpcode)) { + if (isTwoAddrFold) + NewMI = FuseTwoAddrInst(Entry->to, FrameIndex, MI, TII); + else + NewMI = FuseInst(Entry->to, i, FrameIndex, MI, TII); + NewMI->copyKillDeadInfo(MI); + return NewMI; } } + + // No fusion if (PrintFailedFusing) - std::cerr << "We failed to fuse: " << *MI; + cerr << "We failed to fuse (" + << ((i == 1) ? "r" : "s") << "): " << *MI; return NULL; } + +const unsigned *X86RegisterInfo::getCalleeSaveRegs() const { + static const unsigned CalleeSaveRegs32Bit[] = { + X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 + }; + static const unsigned CalleeSaveRegs64Bit[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + + return Is64Bit ? CalleeSaveRegs64Bit : CalleeSaveRegs32Bit; +} + +const TargetRegisterClass* const* +X86RegisterInfo::getCalleeSaveRegClasses() const { + static const TargetRegisterClass * const CalleeSaveRegClasses32Bit[] = { + &X86::GR32RegClass, &X86::GR32RegClass, + &X86::GR32RegClass, &X86::GR32RegClass, 0 + }; + static const TargetRegisterClass * const CalleeSaveRegClasses64Bit[] = { + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, + &X86::GR64RegClass, &X86::GR64RegClass, 0 + }; + + return Is64Bit ? CalleeSaveRegClasses64Bit : CalleeSaveRegClasses32Bit; +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -413,8 +883,10 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr* MI, // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. // -static bool hasFP(MachineFunction &MF) { - return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); +static bool hasFP(const MachineFunction &MF) { + return (NoFramePointerElim || + MF.getFrameInfo()->hasVarSizedObjects() || + MF.getInfo()->getForceFramePointer()); } void X86RegisterInfo:: @@ -435,16 +907,19 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) { - New=BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef) - .addZImm(Amount); + New=BuildMI(TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), StackPtr) + .addReg(StackPtr).addImm(Amount); } else { assert(Old->getOpcode() == X86::ADJCALLSTACKUP); // factor out the amount the callee already popped. unsigned CalleeAmt = Old->getOperand(1).getImmedValue(); Amount -= CalleeAmt; - if (Amount) - New = BuildMI(X86::ADD32ri, 1, X86::ESP, - MachineOperand::UseAndDef).addZImm(Amount); + if (Amount) { + unsigned Opc = (Amount < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + New = BuildMI(TII.get(Opc), StackPtr).addReg(StackPtr).addImm(Amount); + } } // Replace the pseudo instruction with a new instruction... @@ -455,9 +930,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (unsigned CalleeAmt = I->getOperand(1).getImmedValue()) { + unsigned Opc = (CalleeAmt < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); MachineInstr *New = - BuildMI(X86::SUB32ri, 1, X86::ESP, - MachineOperand::UseAndDef).addZImm(CalleeAmt); + BuildMI(TII.get(Opc), StackPtr).addReg(StackPtr).addImm(CalleeAmt); MBB.insert(I, New); } } @@ -475,28 +952,27 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{ } int FrameIndex = MI.getOperand(i).getFrameIndex(); - // This must be part of a four operand memory reference. Replace the - // FrameIndex with base register with EBP. Add add an offset to the offset. - MI.SetMachineOperandReg(i, hasFP(MF) ? X86::EBP : X86::ESP); + // FrameIndex with base register with EBP. Add an offset to the offset. + MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false); // Now add the frame object offset to the offset from EBP. int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(i+3).getImmedValue()+4; + MI.getOperand(i+3).getImmedValue()+SlotSize; if (!hasFP(MF)) Offset += MF.getFrameInfo()->getStackSize(); else - Offset += 4; // Skip the saved EBP + Offset += SlotSize; // Skip the saved EBP - MI.SetMachineOperandConst(i+3, MachineOperand::MO_SignExtendedImmed, Offset); + MI.getOperand(i+3).ChangeToImmediate(Offset); } void X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ if (hasFP(MF)) { // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,SlotSize * -2); assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); } @@ -506,57 +982,83 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + const Function* Fn = MF.getFunction(); + const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget(); MachineInstr *MI; - + // Get the number of bytes to allocate from the FrameInfo unsigned NumBytes = MFI->getStackSize(); - if (hasFP(MF)) { - // Get the offset of the stack slot for the EBP register... which is - // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4; + if (MFI->hasCalls() || MF.getFrameInfo()->hasVarSizedObjects()) { + // When we have no frame pointer, we reserve argument space for call sites + // in the function immediately on entry to the current function. This + // eliminates the need for add/sub ESP brackets around call sites. + // + if (!hasFP(MF)) + NumBytes += MFI->getMaxCallFrameSize(); + + // Round the size to a multiple of the alignment (don't forget the 4/8 byte + // offset though). + NumBytes = ((NumBytes+SlotSize)+Align-1)/Align*Align - SlotSize; + } - if (NumBytes) { // adjust stack pointer: ESP -= numbytes - MI= BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef) - .addZImm(NumBytes); + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + + if (NumBytes) { // adjust stack pointer: ESP -= numbytes + if (NumBytes >= 4096 && Subtarget->isTargetCygwin()) { + // Function prologue calls _alloca to probe the stack when allocating + // more than 4k bytes in one go. Touching the stack at 4K increments is + // necessary to ensure that the guard pages used by the OS virtual memory + // manager are allocated in correct sequence. + MI = BuildMI(TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes); + MBB.insert(MBBI, MI); + MI = BuildMI(TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca"); + MBB.insert(MBBI, MI); + } else { + unsigned Opc = (NumBytes < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + MI= BuildMI(TII.get(Opc), StackPtr).addReg(StackPtr).addImm(NumBytes); MBB.insert(MBBI, MI); } + } + if (hasFP(MF)) { + // Get the offset of the stack slot for the EBP register... which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+SlotSize; + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(SlotSize-NumBytes); + // Save EBP into the appropriate stack slot... - MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-], EBP - X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP); + // mov [ESP-], EBP + MI = addRegOffset(BuildMI(TII.get(Is64Bit ? X86::MOV64mr : X86::MOV32mr)), + StackPtr, EBPOffset+NumBytes).addReg(FramePtr); MBB.insert(MBBI, MI); // Update EBP with the new base value... - if (NumBytes == 4) // mov EBP, ESP - MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP); + if (NumBytes == SlotSize) // mov EBP, ESP + MI = BuildMI(TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr). + addReg(StackPtr); else // lea EBP, [ESP+StackSize] - MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4); + MI = addRegOffset(BuildMI(TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), + FramePtr), StackPtr, NumBytes-SlotSize); MBB.insert(MBBI, MI); + } - } else { - if (MFI->hasCalls()) { - // When we have no frame pointer, we reserve argument space for call sites - // in the function immediately on entry to the current function. This - // eliminates the need for add/sub ESP brackets around call sites. - // - NumBytes += MFI->getMaxCallFrameSize(); - - // Round the size to a multiple of the alignment (don't forget the 4 byte - // offset though). - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4; - } - - // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(NumBytes); + // If it's main() on Cygwin\Mingw32 we should align stack as well + if (Fn->hasExternalLinkage() && Fn->getName() == "main" && + Subtarget->isTargetCygwin()) { + MI= BuildMI(TII.get(X86::AND32ri), X86::ESP).addReg(X86::ESP).addImm(-Align); + MBB.insert(MBBI, MI); - if (NumBytes) { - // adjust stack pointer: ESP -= numbytes - MI= BuildMI(X86::SUB32ri, 1, X86::ESP, MachineOperand::UseAndDef) - .addZImm(NumBytes); - MBB.insert(MBBI, MI); - } + // Probe the stack + MI = BuildMI(TII.get(X86::MOV32ri), X86::EAX).addImm(Align); + MBB.insert(MBBI, MI); + MI = BuildMI(TII.get(X86::CALLpcrel32)).addExternalSymbol("_alloca"); + MBB.insert(MBBI, MI); } } @@ -568,7 +1070,6 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, switch (MBBI->getOpcode()) { case X86::RET: case X86::RETI: - case X86::RETVOID: // FIXME: See X86InstrInfo.td case X86::TAILJMPd: case X86::TAILJMPr: case X86::TAILJMPm: break; // These are ok @@ -577,15 +1078,12 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } if (hasFP(MF)) { - // Get the offset of the stack slot for the EBP register... which is - // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4; - // mov ESP, EBP - BuildMI(MBB, MBBI, X86::MOV32rr, 1,X86::ESP).addReg(X86::EBP); + BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr). + addReg(FramePtr); // pop EBP - BuildMI(MBB, MBBI, X86::POP32r, 0, X86::EBP); + BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); } else { // Get the number of bytes allocated from the FrameInfo... unsigned NumBytes = MFI->getStackSize(); @@ -595,29 +1093,210 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // instruction, merge the two instructions. if (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); - if (PI->getOpcode() == X86::ADD32ri && - PI->getOperand(0).getReg() == X86::ESP) { - NumBytes += PI->getOperand(1).getImmedValue(); - MBB.erase(PI); - } else if (PI->getOpcode() == X86::SUB32ri && - PI->getOperand(0).getReg() == X86::ESP) { - NumBytes -= PI->getOperand(1).getImmedValue(); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + NumBytes += PI->getOperand(2).getImmedValue(); MBB.erase(PI); - } else if (PI->getOpcode() == X86::ADJSTACKPTRri) { - NumBytes += PI->getOperand(1).getImmedValue(); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + NumBytes -= PI->getOperand(2).getImmedValue(); MBB.erase(PI); } } - if (NumBytes > 0) - BuildMI(MBB, MBBI, X86::ADD32ri, 2) - .addReg(X86::ESP, MachineOperand::UseAndDef).addZImm(NumBytes); - else if ((int)NumBytes < 0) - BuildMI(MBB, MBBI, X86::SUB32ri, 2) - .addReg(X86::ESP, MachineOperand::UseAndDef).addZImm(-NumBytes); + if (NumBytes > 0) { + unsigned Opc = (NumBytes < 128) ? + (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : + (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + BuildMI(MBB, MBBI, TII.get(Opc), StackPtr).addReg(StackPtr).addImm(NumBytes); + } else if ((int)NumBytes < 0) { + unsigned Opc = (-NumBytes < 128) ? + (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : + (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + BuildMI(MBB, MBBI, TII.get(Opc), StackPtr).addReg(StackPtr).addImm(-NumBytes); + } } } } +unsigned X86RegisterInfo::getRARegister() const { + return X86::ST0; // use a non-register register +} + +unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? FramePtr : StackPtr; +} + +namespace llvm { +unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) { + switch (VT) { + default: return Reg; + case MVT::i8: + if (High) { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AH; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DH; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CH; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BH; + } + } else { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AL; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DL; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CL; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BL; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SIL; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DIL; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BPL; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SPL; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8B; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9B; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10B; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11B; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12B; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13B; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14B; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15B; + } + } + case MVT::i16: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8W; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9W; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10W; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11W; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12W; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13W; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14W; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15W; + } + case MVT::i32: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::EAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::EDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::ECX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::EBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::ESI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::EDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::EBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::ESP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8D; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9D; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10D; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11D; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12D; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13D; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14D; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15D; + } + case MVT::i64: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::RAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::RDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::RCX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::RBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::RSI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::RDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::RBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::RSP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15; + } + } + + return Reg; +} +} + #include "X86GenRegisterInfo.inc"