Enabling 3DNow! prefetch instruction for a few AMD processors: bobcat, jaguar,
[oota-llvm.git] / lib / Target / X86 / X86.td
index 8ad0bc08ac57f9b53fc0b4a3032a4b65871341a9..b41a9c9608265fe26d68220423e0ee88a337d5a9 100644 (file)
@@ -50,10 +50,10 @@ def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
                                       "Enable SSSE3 instructions",
                                       [FeatureSSE3]>;
-def FeatureSSE41   : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
                                       "Enable SSE 4.1 instructions",
                                       [FeatureSSSE3]>;
-def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
                                       [FeatureSSE41]>;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
@@ -68,7 +68,7 @@ def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
                                       "Support 64-bit instructions",
                                       [FeatureCMOV]>;
-def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
                                       "64-bit with cmpxchg16b",
                                       [Feature64Bit]>;
 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
@@ -86,6 +86,19 @@ def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
                                       "Enable AVX2 instructions",
                                       [FeatureAVX]>;
+def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
+                                      "Enable AVX-512 instructions",
+                                      [FeatureAVX2]>;
+def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
+                      "Enable AVX-512 Exponential and Reciprocal Instructions",
+                                      [FeatureAVX512]>;
+def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
+                      "Enable AVX-512 Conflict Detection Instructions",
+                                      [FeatureAVX512]>;
+def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
+                      "Enable AVX-512 PreFetch Instructions",
+                                      [FeatureAVX512]>;
+
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;
@@ -104,12 +117,15 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
                                       "Enable AES instructions",
                                       [FeatureSSE2]>;
+def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
+                                      "Enable TBM instructions">;
 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
                                       "Support MOVBE instruction">;
-def FeatureRDRAND  : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
                                       "Support RDRAND instruction">;
 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
-                       "Support 16-bit floating point conversion instructions">;
+                       "Support 16-bit floating point conversion instructions",
+                       [FeatureAVX]>;
 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
                                        "Support FS/GS Base instructions">;
 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
@@ -120,11 +136,30 @@ def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
                                       "Support BMI2 instructions">;
 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
                                       "Support RTM instructions">;
+def FeatureHLE     : SubtargetFeature<"hle", "HasHLE", "true",
+                                      "Support HLE">;
+def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
+                                      "Support ADX instructions">;
+def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
+                                      "Enable SHA instructions",
+                                      [FeatureSSE2]>;
+def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+                                      "Support PRFCHW instructions">;
+def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+                                      "Support RDSEED instruction">;
 def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
-                          "HasSlowDivide", "true",
-                          "Use small divide for positive values less than 256">;
+                                     "HasSlowDivide", "true",
+                                     "Use small divide for positive values less than 256">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+                                     "PadShortFunctions", "true",
+                                     "Pad short functions">;
+def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
+                                     "CallRegIndirect", "true",
+                                     "Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+                                   "LEA instruction needs inputs at AG stage">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -134,13 +169,12 @@ include "X86Schedule.td"
 
 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
                     "Intel Atom processors">;
+def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
+                    "Intel Silvermont processors">;
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
-class AtomProc<string Name, list<SubtargetFeature> Features>
- : ProcessorModel<Name, AtomModel, Features>;
-
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
 def : Proc<"i486",            []>;
@@ -155,47 +189,81 @@ def : Proc<"pentium3m",       [FeatureSSE1, FeatureSlowBTMem]>;
 def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"pentium4",        [FeatureSSE2]>;
 def : Proc<"pentium4m",       [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64",          [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona",          [FeatureSSE3, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"core2",           [FeatureSSSE3, FeatureCMPXCHG16B,
+def : Proc<"x86-64",          [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
+// Intel Core Duo.
+def : ProcessorModel<"yonah", SandyBridgeModel,
+                     [FeatureSSE3, FeatureSlowBTMem]>;
+
+// NetBurst.
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",   [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Intel Core 2 Solo/Duo.
+def : ProcessorModel<"core2", SandyBridgeModel,
+                     [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"penryn", SandyBridgeModel,
+                     [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+
+// Atom.
+def : ProcessorModel<"atom", AtomModel,
+                     [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
+                      FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
+                      FeatureSlowDivide,
+                      FeatureCallRegIndirect,
+                      FeatureLEAUsesAG,
+                      FeaturePadShortFunctions]>;
+
+// Atom Silvermont.
+def : ProcessorModel<"slm",  SLMModel, [ProcIntelSLM,
+                               FeatureSSE42, FeatureCMPXCHG16B,
+                               FeatureMOVBE, FeaturePOPCNT,
+                               FeaturePCLMUL, FeatureAES,
+                               FeatureCallRegIndirect,
+                               FeaturePRFCHW,
                                FeatureSlowBTMem]>;
-def : Proc<"penryn",          [FeatureSSE41, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : AtomProc<"atom",        [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
-                               FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
-                               FeatureSlowDivide]>;
 // "Arrandale" along with corei3 and corei5
-def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES]>;
-def : Proc<"nehalem",         [FeatureSSE42,  FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT]>;
+def : ProcessorModel<"corei7", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>;
+
+def : ProcessorModel<"nehalem", SandyBridgeModel,
+                     [FeatureSSE42,  FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT]>;
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem,
-                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"westmere", SandyBridgeModel,
+                     [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                      FeatureFastUAMem, FeaturePOPCNT, FeatureAES,
+                      FeaturePCLMUL]>;
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL]>;
+def : ProcessorModel<"corei7-avx", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL,
-                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
+def : ProcessorModel<"core-avx-i", SandyBridgeModel,
+                     [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL, FeatureRDRAND,
-                               FeatureF16C, FeatureFSGSBase,
-                               FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                               FeatureBMI2, FeatureFMA,
-                               FeatureRTM]>;
+def : ProcessorModel<"core-avx2", HaswellModel,
+                     [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,
+                      FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,
+                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,
+                      FeatureHLE]>;
+
+// KNL
+// FIXME: define KNL model
+def : ProcessorModel<"knl", HaswellModel,
+                     [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
+                      FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
+                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
+                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
+                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
@@ -224,16 +292,22 @@ def : Proc<"amdfam10",        [FeatureSSE4A,
                                FeaturePOPCNT, FeatureSlowBTMem]>;
 // Bobcat
 def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT]>;
+// Jaguar
+def : Proc<"btver2",          [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
+                               FeatureBMI, FeatureF16C, FeatureMOVBE,
                                FeatureLZCNT, FeaturePOPCNT]>;
 // Bulldozer
 def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePCLMUL,
+                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureLZCNT, FeaturePOPCNT]>;
-// Enhanced Bulldozer
+// Piledriver
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePCLMUL,
+                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
+                               FeaturePOPCNT, FeatureBMI,  FeatureTBM,
+                               FeatureFMA]>;
 def : Proc<"geode",           [Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
@@ -273,6 +347,9 @@ def ATTAsmParser : AsmParser {
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
+  // Variant name.
+  string Name = "att";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = "#";
 
@@ -283,6 +360,9 @@ def ATTAsmParserVariant : AsmParserVariant {
 def IntelAsmParserVariant : AsmParserVariant {
   int Variant = 1;
 
+  // Variant name.
+  string Name = "intel";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = ";";