X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86Schedule.td;h=a261356afe6a6aa9d2e6747e62f89f8b68bf8b03;hb=41ab11ccf0d228f0f442b6852caa196360b65d31;hp=f670f28b443f91ed398fcf19186024a666cf7025;hpb=d05b46115f5049b7b094d4049aa88f09f7d6b65a;p=oota-llvm.git diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index f670f28b443..a261356afe6 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,9 +7,131 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. +defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. +defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. + +// FMA Scheduling helper class. +class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteBlend : X86SchedWritePair; // Vector blends. +defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. +defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +defm WritePCmpIStrM : X86SchedWritePair; +// Packed Compare Explicit Length Strings, Return Mask +defm WritePCmpEStrM : X86SchedWritePair; +// Packed Compare Implicit Length Strings, Return Index +defm WritePCmpIStrI : X86SchedWritePair; +// Packed Compare Explicit Length Strings, Return Index +defm WritePCmpEStrI : X86SchedWritePair; + +// AES instructions. +defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption. +defm WriteAESIMC : X86SchedWritePair; // InvMixColumn. +defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. + +// Carry-less multiplication instructions. +defm WriteCLMul : X86SchedWritePair; + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// AVX2. +defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. +defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. +defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + +// Fence instructions. +def WriteFence : SchedWrite; + +// Nop, not very useful expect it provides a model for nops! +def WriteNop : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 -def IIC_DEFAULT : InstrItinClass; def IIC_ALU_MEM : InstrItinClass; def IIC_ALU_NONMEM : InstrItinClass; def IIC_LEA : InstrItinClass; @@ -55,9 +177,12 @@ def IIC_IDIV64 : InstrItinClass; // neg/not/inc/dec def IIC_UNARY_REG : InstrItinClass; def IIC_UNARY_MEM : InstrItinClass; -// add/sub/and/or/xor/adc/sbc/cmp/test +// add/sub/and/or/xor/sbc/cmp/test def IIC_BIN_MEM : InstrItinClass; def IIC_BIN_NONMEM : InstrItinClass; +// adc/sbc +def IIC_BIN_CARRY_MEM : InstrItinClass; +def IIC_BIN_CARRY_NONMEM : InstrItinClass; // shift/rotate def IIC_SR : InstrItinClass; // shift double @@ -164,11 +289,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass; def IIC_SSE_INTSH_P_RM : InstrItinClass; def IIC_SSE_INTSH_P_RI : InstrItinClass; -def IIC_SSE_CMPP_RR : InstrItinClass; -def IIC_SSE_CMPP_RM : InstrItinClass; +def IIC_SSE_INTSHDQ_P_RI : InstrItinClass; def IIC_SSE_SHUFP : InstrItinClass; -def IIC_SSE_PSHUF : InstrItinClass; +def IIC_SSE_PSHUF_RI : InstrItinClass; +def IIC_SSE_PSHUF_MI : InstrItinClass; def IIC_SSE_UNPCK : InstrItinClass; @@ -181,10 +306,19 @@ def IIC_SSE_PINSRW : InstrItinClass; def IIC_SSE_PABS_RR : InstrItinClass; def IIC_SSE_PABS_RM : InstrItinClass; -def IIC_SSE_SQRTP_RR : InstrItinClass; -def IIC_SSE_SQRTP_RM : InstrItinClass; -def IIC_SSE_SQRTS_RR : InstrItinClass; -def IIC_SSE_SQRTS_RM : InstrItinClass; +def IIC_SSE_SQRTPS_RR : InstrItinClass; +def IIC_SSE_SQRTPS_RM : InstrItinClass; +def IIC_SSE_SQRTSS_RR : InstrItinClass; +def IIC_SSE_SQRTSS_RM : InstrItinClass; +def IIC_SSE_SQRTPD_RR : InstrItinClass; +def IIC_SSE_SQRTPD_RM : InstrItinClass; +def IIC_SSE_SQRTSD_RR : InstrItinClass; +def IIC_SSE_SQRTSD_RM : InstrItinClass; + +def IIC_SSE_RSQRTPS_RR : InstrItinClass; +def IIC_SSE_RSQRTPS_RM : InstrItinClass; +def IIC_SSE_RSQRTSS_RR : InstrItinClass; +def IIC_SSE_RSQRTSS_RM : InstrItinClass; def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; @@ -226,7 +360,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass; def IIC_SSE_PMADD : InstrItinClass; def IIC_SSE_PMULHRSW : InstrItinClass; -def IIC_SSE_PALIGNR : InstrItinClass; +def IIC_SSE_PALIGNRR : InstrItinClass; +def IIC_SSE_PALIGNRM : InstrItinClass; def IIC_SSE_MWAIT : InstrItinClass; def IIC_SSE_MONITOR : InstrItinClass; @@ -402,8 +537,8 @@ def IIC_PUSH_REG : InstrItinClass; def IIC_PUSH_F : InstrItinClass; def IIC_PUSH_A : InstrItinClass; def IIC_BSWAP : InstrItinClass; -def IIC_BSF : InstrItinClass; -def IIC_BSR : InstrItinClass; +def IIC_BIT_SCAN_MEM : InstrItinClass; +def IIC_BIT_SCAN_REG : InstrItinClass; def IIC_MOVS : InstrItinClass; def IIC_STOS : InstrItinClass; def IIC_SCAS : InstrItinClass; @@ -450,12 +585,66 @@ def IIC_BOUND : InstrItinClass; def IIC_ARPL_REG : InstrItinClass; def IIC_ARPL_MEM : InstrItinClass; def IIC_MOVBE : InstrItinClass; +def IIC_AES : InstrItinClass; +def IIC_BLEND_MEM : InstrItinClass; +def IIC_BLEND_NOMEM : InstrItinClass; +def IIC_CBW : InstrItinClass; +def IIC_CRC32_REG : InstrItinClass; +def IIC_CRC32_MEM : InstrItinClass; +def IIC_SSE_DPPD_RR : InstrItinClass; +def IIC_SSE_DPPD_RM : InstrItinClass; +def IIC_SSE_DPPS_RR : InstrItinClass; +def IIC_SSE_DPPS_RM : InstrItinClass; +def IIC_MMX_EMMS : InstrItinClass; +def IIC_SSE_EXTRACTPS_RR : InstrItinClass; +def IIC_SSE_EXTRACTPS_RM : InstrItinClass; +def IIC_SSE_INSERTPS_RR : InstrItinClass; +def IIC_SSE_INSERTPS_RM : InstrItinClass; +def IIC_SSE_MPSADBW_RR : InstrItinClass; +def IIC_SSE_MPSADBW_RM : InstrItinClass; +def IIC_SSE_PMULLD_RR : InstrItinClass; +def IIC_SSE_PMULLD_RM : InstrItinClass; +def IIC_SSE_ROUNDPS_REG : InstrItinClass; +def IIC_SSE_ROUNDPS_MEM : InstrItinClass; +def IIC_SSE_ROUNDPD_REG : InstrItinClass; +def IIC_SSE_ROUNDPD_MEM : InstrItinClass; +def IIC_SSE_POPCNT_RR : InstrItinClass; +def IIC_SSE_POPCNT_RM : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RR : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RM : InstrItinClass; def IIC_NOP : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], [], []>; +// IssueWidth is analogous to the number of decode units. Core and its +// descendents, including Nehalem and SandyBridge have 4 decoders. +// Resources beyond the decoder operate on micro-ops and are bufferred +// so adjacent micro-ops don't directly compete. +// +// MicroOpBufferSize > 1 indicates that RAW dependencies can be +// decoded in the same cycle. The value 32 is a reasonably arbitrary +// number of in-flight instructions. +// +// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef +// indicates high latency opcodes. Alternatively, InstrItinData +// entries may be included here to define specific operand +// latencies. Since these latencies are not used for pipeline hazards, +// they do not need to be exact. +// +// The GenericModel contains no instruction itineraries. +def GenericModel : SchedMachineModel { + let IssueWidth = 4; + let MicroOpBufferSize = 32; + let LoadLatency = 4; + let HighLatency = 10; + let PostRAScheduler = 0; +} include "X86ScheduleAtom.td" +include "X86SchedSandyBridge.td" +include "X86SchedHaswell.td" +include "X86ScheduleSLM.td" +include "X86ScheduleBtVer2.td" +