Address a FIXME and update the fast unaligned memory feature for newer
authorChandler Carruth <chandlerc@gmail.com>
Mon, 10 Dec 2012 09:18:44 +0000 (09:18 +0000)
committerChandler Carruth <chandlerc@gmail.com>
Mon, 10 Dec 2012 09:18:44 +0000 (09:18 +0000)
Intel chips.

The model number rules were determined by inspecting Intel's
documentation for their newer chip model numbers. My understanding is
that all of the newer Intel chips have fast unaligned memory access, but
if anyone is concerned about a particular chip, just shout.

No tests updated; it's not clear we have dedicated tests for the chips'
various features, but if anyone would like tests (or can point me at
some existing ones), I'm happy to oblige.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169730 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86.td
lib/Target/X86/X86Subtarget.cpp

index 8ad0bc08ac57f9b53fc0b4a3032a4b65871341a9..e3c22d9c3b8c417c80a23c14659de46529e1561e 100644 (file)
@@ -182,17 +182,17 @@ def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL]>;
+def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL,
+def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
                                FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL, FeatureRDRAND,
-                               FeatureF16C, FeatureFSGSBase,
+def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
+                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase,
                                FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                                FeatureBMI2, FeatureFMA,
                                FeatureRTM]>;
index 34f9fad6969dc664f4aa53af96544d0c9b7820c9..bdce4d0221bc59c3b1bcca8b6969dff106c575fd 100644 (file)
@@ -234,12 +234,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
       ToggleFeature(X86::FeatureSlowBTMem);
     }
 
-    // If it's Nehalem, unaligned memory access is fast.
-    // Include Westmere and Sandy Bridge as well.
-    // FIXME: add later processors.
-    if (IsIntel && ((Family == 6 && Model == 26) ||
-        (Family == 6 && Model == 44) ||
-        (Family == 6 && Model == 42))) {
+    // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+    // memory access is fast. We hard code model numbers here because they
+    // aren't strictly increasing for Intel chips it seems.
+    if (IsIntel &&
+        ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+                                           //          Jasper Froest
+         (Family == 6 && Model == 0x2A) || // Nehalem: Bloomfield, Nehalem-EP
+         (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+         (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+         (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+         (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+         (Family == 6 && Model == 0x2A) || // SandyBridge
+         (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+         (Family == 6 && Model == 0x3A))) {// IvyBridge
       IsUAMemFast = true;
       ToggleFeature(X86::FeatureFastUAMem);
     }