X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrInfo.cpp;h=14382d7a67462e70cad0326d533861c13dd1053b;hb=d350e4757e649dce07f7492115bd9d19c71426bf;hp=61266227db2f4361f9e00868d00199529cd62a51;hpb=134d8eec8789184c7a7290ee101ca3d6f62f384a;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 61266227db2..14382d7a674 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::BT64ri8, X86::BT64mi8, 1, 0 }, { X86::CALL32r, X86::CALL32m, 1, 0 }, { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::WINCALL64r, X86::WINCALL64m, 1, 0 }, { X86::CMP16ri, X86::CMP16mi, 1, 0 }, { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, { X86::CMP16rr, X86::CMP16mr, 1, 0 }, @@ -359,8 +360,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -369,8 +370,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, @@ -481,9 +482,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVBErr16, X86::CMOVBErm16, 0 }, + { X86::CMOVBErr32, X86::CMOVBErm32, 0 }, + { X86::CMOVBErr64, X86::CMOVBErm64, 0 }, { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, @@ -787,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) { unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 1, FrameIndex)) + if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) return MI->getOperand(0).getReg(); return 0; } @@ -826,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) - if (isFrameOperand(MI, 0, FrameIndex)) + if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 && + isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1443,9 +1445,9 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: - case X86::CMOVBE16rr: - case X86::CMOVBE32rr: - case X86::CMOVBE64rr: + case X86::CMOVBErr16: + case X86::CMOVBErr32: + case X86::CMOVBErr64: case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: @@ -1494,12 +1496,12 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; - case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; - case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; - case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; - case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; - case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; - case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; + case X86::CMOVBErr16: Opc = X86::CMOVA16rr; break; + case X86::CMOVBErr32: Opc = X86::CMOVA32rr; break; + case X86::CMOVBErr64: Opc = X86::CMOVA64rr; break; + case X86::CMOVA16rr: Opc = X86::CMOVBErr16; break; + case X86::CMOVA32rr: Opc = X86::CMOVBErr32; break; + case X86::CMOVA64rr: Opc = X86::CMOVBErr64; break; case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; @@ -1843,6 +1845,33 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } +// Try and copy between VR128/VR64 and GR64 registers. +static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) { + // SrcReg(VR128) -> DestReg(GR64) + // SrcReg(VR64) -> DestReg(GR64) + // SrcReg(GR64) -> DestReg(VR128) + // SrcReg(GR64) -> DestReg(VR64) + + if (X86::GR64RegClass.contains(DestReg)) { + if (X86::VR128RegClass.contains(SrcReg)) { + // Copy from a VR128 register to a GR64 register. + return X86::MOVPQIto64rr; + } else if (X86::VR64RegClass.contains(SrcReg)) { + // Copy from a VR64 register to a GR64 register. + return X86::MOVSDto64rr; + } + } else if (X86::GR64RegClass.contains(SrcReg)) { + // Copy from a GR64 register to a VR128 register. + if (X86::VR128RegClass.contains(DestReg)) + return X86::MOV64toPQIrr; + // Copy from a GR64 register to a VR64 register. + else if (X86::VR64RegClass.contains(DestReg)) + return X86::MOV64toSDrr; + } + + return 0; +} + void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -1867,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = X86::MOVAPSrr; else if (X86::VR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MMX_MOVQ64rr; + else + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -1998,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); + assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && + "Stack slot too small for store"); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = MBB.findDebugLoc(MI); @@ -2310,8 +2343,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2362,8 +2395,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2377,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = (*LoadMI->memoperands_begin())->getAlignment(); else switch (LoadMI->getOpcode()) { + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: + Alignment = 32; + break; case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: Alignment = 16; break; case X86::FsFLD0SD: @@ -2407,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; + // Make sure the subregisters match. + // Otherwise we risk changing the size of the load. + if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) + return NULL; + SmallVector MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: case X86::V_SET0PI: case X86::V_SETALLONES: + case X86::AVX_SET0PS: + case X86::AVX_SET0PD: + case X86::AVX_SET0PI: + case X86::AVX_SET0PSY: + case X86::AVX_SET0PDY: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure. @@ -2439,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; - if (LoadMI->getOpcode() == X86::FsFLD0SS) + unsigned Opc = LoadMI->getOpcode(); + if (Opc == X86::FsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (LoadMI->getOpcode() == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) + Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? @@ -2940,6 +2993,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: + case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: return true; } return false; @@ -2972,7 +3027,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // that we don't include here. We don't want to replace instructions selected // by intrinsics. static const unsigned ReplaceableInstrs[][3] = { - //PackedInt PackedSingle PackedDouble + //PackedSingle PackedDouble PackedInt { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr }, @@ -2988,6 +3043,22 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI }, { X86::XORPSrm, X86::XORPDrm, X86::PXORrm }, { X86::XORPSrr, X86::XORPDrr, X86::PXORrr }, + // AVX 128-bit support + { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, + { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, + { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr }, + { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr }, + { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm }, + { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr }, + { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm }, + { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr }, + { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm }, + { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr }, + { X86::VORPSrm, X86::VORPDrm, X86::VPORrm }, + { X86::VORPSrr, X86::VORPDrr, X86::VPORrr }, + { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI }, + { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm }, + { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -3026,7 +3097,7 @@ namespace { /// global base register for x86-32. struct CGBR : public MachineFunctionPass { static char ID; - CGBR() : MachineFunctionPass(&ID) {} + CGBR() : MachineFunctionPass(ID) {} virtual bool runOnMachineFunction(MachineFunction &MF) { const X86TargetMachine *TM = @@ -3039,6 +3110,13 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; + X86MachineFunctionInfo *X86FI = MF.getInfo(); + unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + + // If we didn't need a GlobalBaseReg, don't insert code. + if (GlobalBaseReg == 0) + return false; + // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -3050,7 +3128,7 @@ namespace { if (TM->getSubtarget().isPICStyleGOT()) PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); else - PC = TII->getGlobalBaseReg(&MF); + PC = GlobalBaseReg; // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. @@ -3059,7 +3137,6 @@ namespace { // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. if (TM->getSubtarget().isPICStyleGOT()) { - unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",