X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86Schedule.td;h=a261356afe6a6aa9d2e6747e62f89f8b68bf8b03;hb=53d8ef00d82460b9c8ce08617d91bbce8313d4a3;hp=625a05cb8da67f8f6e5df9ffd7957853c05c7618;hpb=a5ce5f36d3a1e312304e8312ca64a1342f5f55a6;p=oota-llvm.git diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 625a05cb8da..a261356afe6 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -42,6 +42,7 @@ multiclass X86SchedWritePair { // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. defm WriteIMul : X86SchedWritePair; // Integer multiplication. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. defm WriteIDiv : X86SchedWritePair; // Integer division. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. @@ -62,12 +63,16 @@ def WriteZero : SchedWrite; defm WriteJump : X86SchedWritePair; // Floating point. This covers both scalar and vector operations. -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. -defm WriteFMul : X86SchedWritePair; // Floating point multiplication. -defm WriteFDiv : X86SchedWritePair; // Floating point division. -defm WriteFSqrt : X86SchedWritePair; // Floating point square root. -defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. -defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. +defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. +defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -76,23 +81,55 @@ class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteBlend : X86SchedWritePair; // Vector blends. +defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. +defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. // Vector bitwise operations. // These are often used on both floating point and integer vectors. defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. -defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. // Conversion between integer and float. defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +defm WritePCmpIStrM : X86SchedWritePair; +// Packed Compare Explicit Length Strings, Return Mask +defm WritePCmpEStrM : X86SchedWritePair; +// Packed Compare Implicit Length Strings, Return Index +defm WritePCmpIStrI : X86SchedWritePair; +// Packed Compare Explicit Length Strings, Return Index +defm WritePCmpEStrI : X86SchedWritePair; + +// AES instructions. +defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption. +defm WriteAESIMC : X86SchedWritePair; // InvMixColumn. +defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. + +// Carry-less multiplication instructions. +defm WriteCLMul : X86SchedWritePair; + // Catch-all for expensive system instructions. def WriteSystem : SchedWrite; +// AVX2. +defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. +defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. +defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. + // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; +// Fence instructions. +def WriteFence : SchedWrite; + +// Nop, not very useful expect it provides a model for nops! +def WriteNop : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 def IIC_ALU_MEM : InstrItinClass; @@ -140,9 +177,12 @@ def IIC_IDIV64 : InstrItinClass; // neg/not/inc/dec def IIC_UNARY_REG : InstrItinClass; def IIC_UNARY_MEM : InstrItinClass; -// add/sub/and/or/xor/adc/sbc/cmp/test +// add/sub/and/or/xor/sbc/cmp/test def IIC_BIN_MEM : InstrItinClass; def IIC_BIN_NONMEM : InstrItinClass; +// adc/sbc +def IIC_BIN_CARRY_MEM : InstrItinClass; +def IIC_BIN_CARRY_NONMEM : InstrItinClass; // shift/rotate def IIC_SR : InstrItinClass; // shift double @@ -249,11 +289,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass; def IIC_SSE_INTSH_P_RM : InstrItinClass; def IIC_SSE_INTSH_P_RI : InstrItinClass; -def IIC_SSE_CMPP_RR : InstrItinClass; -def IIC_SSE_CMPP_RM : InstrItinClass; +def IIC_SSE_INTSHDQ_P_RI : InstrItinClass; def IIC_SSE_SHUFP : InstrItinClass; -def IIC_SSE_PSHUF : InstrItinClass; +def IIC_SSE_PSHUF_RI : InstrItinClass; +def IIC_SSE_PSHUF_MI : InstrItinClass; def IIC_SSE_UNPCK : InstrItinClass; @@ -275,6 +315,11 @@ def IIC_SSE_SQRTPD_RM : InstrItinClass; def IIC_SSE_SQRTSD_RR : InstrItinClass; def IIC_SSE_SQRTSD_RM : InstrItinClass; +def IIC_SSE_RSQRTPS_RR : InstrItinClass; +def IIC_SSE_RSQRTPS_RM : InstrItinClass; +def IIC_SSE_RSQRTSS_RR : InstrItinClass; +def IIC_SSE_RSQRTSS_RM : InstrItinClass; + def IIC_SSE_RCPP_RR : InstrItinClass; def IIC_SSE_RCPP_RM : InstrItinClass; def IIC_SSE_RCPS_RR : InstrItinClass; @@ -315,7 +360,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass; def IIC_SSE_PMADD : InstrItinClass; def IIC_SSE_PMULHRSW : InstrItinClass; -def IIC_SSE_PALIGNR : InstrItinClass; +def IIC_SSE_PALIGNRR : InstrItinClass; +def IIC_SSE_PALIGNRM : InstrItinClass; def IIC_SSE_MWAIT : InstrItinClass; def IIC_SSE_MONITOR : InstrItinClass; @@ -491,8 +537,8 @@ def IIC_PUSH_REG : InstrItinClass; def IIC_PUSH_F : InstrItinClass; def IIC_PUSH_A : InstrItinClass; def IIC_BSWAP : InstrItinClass; -def IIC_BSF : InstrItinClass; -def IIC_BSR : InstrItinClass; +def IIC_BIT_SCAN_MEM : InstrItinClass; +def IIC_BIT_SCAN_REG : InstrItinClass; def IIC_MOVS : InstrItinClass; def IIC_STOS : InstrItinClass; def IIC_SCAS : InstrItinClass; @@ -539,13 +585,40 @@ def IIC_BOUND : InstrItinClass; def IIC_ARPL_REG : InstrItinClass; def IIC_ARPL_MEM : InstrItinClass; def IIC_MOVBE : InstrItinClass; +def IIC_AES : InstrItinClass; +def IIC_BLEND_MEM : InstrItinClass; +def IIC_BLEND_NOMEM : InstrItinClass; +def IIC_CBW : InstrItinClass; +def IIC_CRC32_REG : InstrItinClass; +def IIC_CRC32_MEM : InstrItinClass; +def IIC_SSE_DPPD_RR : InstrItinClass; +def IIC_SSE_DPPD_RM : InstrItinClass; +def IIC_SSE_DPPS_RR : InstrItinClass; +def IIC_SSE_DPPS_RM : InstrItinClass; +def IIC_MMX_EMMS : InstrItinClass; +def IIC_SSE_EXTRACTPS_RR : InstrItinClass; +def IIC_SSE_EXTRACTPS_RM : InstrItinClass; +def IIC_SSE_INSERTPS_RR : InstrItinClass; +def IIC_SSE_INSERTPS_RM : InstrItinClass; +def IIC_SSE_MPSADBW_RR : InstrItinClass; +def IIC_SSE_MPSADBW_RM : InstrItinClass; +def IIC_SSE_PMULLD_RR : InstrItinClass; +def IIC_SSE_PMULLD_RM : InstrItinClass; +def IIC_SSE_ROUNDPS_REG : InstrItinClass; +def IIC_SSE_ROUNDPS_MEM : InstrItinClass; +def IIC_SSE_ROUNDPD_REG : InstrItinClass; +def IIC_SSE_ROUNDPD_MEM : InstrItinClass; +def IIC_SSE_POPCNT_RR : InstrItinClass; +def IIC_SSE_POPCNT_RM : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RR : InstrItinClass; +def IIC_SSE_PCLMULQDQ_RM : InstrItinClass; def IIC_NOP : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -// IssueWidth is analagous to the number of decode units. Core and its +// IssueWidth is analogous to the number of decode units. Core and its // descendents, including Nehalem and SandyBridge have 4 decoders. // Resources beyond the decoder operate on micro-ops and are bufferred // so adjacent micro-ops don't directly compete. @@ -560,14 +633,18 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // -// The GenericModel contains no instruciton itineraries. +// The GenericModel contains no instruction itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MicroOpBufferSize = 32; let LoadLatency = 4; let HighLatency = 10; + let PostRAScheduler = 0; } include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" +include "X86ScheduleSLM.td" +include "X86ScheduleBtVer2.td" +