{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
{ X86::CALL32r, X86::CALL32m, 1, 0 },
{ X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 },
- { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
- { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 },
+ { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
+ { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 },
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 },
- { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
- { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+ { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 },
+ { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
{ X86::CMOVB16rr, X86::CMOVB16rm, 0 },
{ X86::CMOVB32rr, X86::CMOVB32rm, 0 },
{ X86::CMOVB64rr, X86::CMOVB64rm, 0 },
- { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 },
- { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 },
- { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 },
+ { X86::CMOVBErr16, X86::CMOVBErm16, 0 },
+ { X86::CMOVBErr32, X86::CMOVBErm32, 0 },
+ { X86::CMOVBErr64, X86::CMOVBErm64, 0 },
{ X86::CMOVE16rr, X86::CMOVE16rm, 0 },
{ X86::CMOVE32rr, X86::CMOVE32rm, 0 },
{ X86::CMOVE64rr, X86::CMOVE64rm, 0 },
case X86::CMOVNE16rr:
case X86::CMOVNE32rr:
case X86::CMOVNE64rr:
- case X86::CMOVBE16rr:
- case X86::CMOVBE32rr:
- case X86::CMOVBE64rr:
+ case X86::CMOVBErr16:
+ case X86::CMOVBErr32:
+ case X86::CMOVBErr64:
case X86::CMOVA16rr:
case X86::CMOVA32rr:
case X86::CMOVA64rr:
case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
- case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
- case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
- case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
- case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
- case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
- case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
+ case X86::CMOVBErr16: Opc = X86::CMOVA16rr; break;
+ case X86::CMOVBErr32: Opc = X86::CMOVA32rr; break;
+ case X86::CMOVBErr64: Opc = X86::CMOVA64rr; break;
+ case X86::CMOVA16rr: Opc = X86::CMOVBErr16; break;
+ case X86::CMOVA32rr: Opc = X86::CMOVBErr32; break;
+ case X86::CMOVA64rr: Opc = X86::CMOVBErr64; break;
case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ return 0;
+}
+
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
Opc = X86::MOVAPSrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
- "Stack slot too small for load");
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
case X86::Int_CVTSS2SDrr:
case X86::RCPSSr:
case X86::RCPSSr_Int:
- case X86::ROUNDSDr_Int:
- case X86::ROUNDSSr_Int:
+ case X86::ROUNDSDr:
+ case X86::ROUNDSSr:
case X86::RSQRTSSr:
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
Alignment = 16;
break;
case X86::FsFLD0SD:
} else if (Ops.size() != 1)
return NULL;
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
- if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+ case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
+ case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
return true;
}
return false;
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
+ //PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
{ X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
/// global base register for x86-32.
struct CGBR : public MachineFunctionPass {
static char ID;
- CGBR() : MachineFunctionPass(&ID) {}
+ CGBR() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
if (TM->getRelocationModel() != Reloc::PIC_)
return false;
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+ // If we didn't need a GlobalBaseReg, don't insert code.
+ if (GlobalBaseReg == 0)
+ return false;
+
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
else
- PC = TII->getGlobalBaseReg(&MF);
+ PC = GlobalBaseReg;
// Operand of MovePCtoStack is completely ignored by asm printer. It's
// only used in JIT code emission as displacement to pc.
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
// Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
.addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",