From 6113df3d73d60f2da45604dc3fd63aff6b9cc827 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 21 Aug 2015 20:39:17 +0000 Subject: [PATCH] remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245733 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 18 +++++++----------- test/CodeGen/X86/slow-unaligned-mem.ll | 14 +++++++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index d081614a821..d00a1113e2e 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -433,21 +433,19 @@ def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA, def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A, +def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A, +def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD]>; -// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here. - // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowSHLD, FeatureSlowUAMem]>; + FeatureSlowSHLD]>; // Jaguar def : ProcessorModel<"btver2", BtVer2Model, @@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Model, def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAVX, FeatureSSE4A, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowSHLD, - FeatureSlowUAMem]>; + FeaturePOPCNT, FeatureSlowSHLD]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, - FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureSlowUAMem]>; + FeatureTBM, FeatureFMA, FeatureSlowSHLD]>; // Steamroller def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -477,7 +473,7 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAVX, FeatureSSE4A, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureTBM, FeatureFMA, FeatureSlowSHLD, - FeatureFSGSBase, FeatureSlowUAMem]>; + FeatureFSGSBase]>; // Excavator def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, @@ -485,7 +481,7 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI, FeatureBMI2, FeatureTBM, FeatureFMA, FeatureSSE4A, - FeatureFSGSBase, FeatureSlowUAMem]>; + FeatureFSGSBase]>; def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>; diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll index e865ca16ca1..5c8166b63a3 100644 --- a/test/CodeGen/X86/slow-unaligned-mem.ll +++ b/test/CodeGen/X86/slow-unaligned-mem.ll @@ -39,14 +39,14 @@ ; AMD chips with fast unaligned memory accesses ; FIXME: These are wrong except for btver2. -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=SLOW +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=SLOW +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST ; Other chips with slow unaligned memory accesses -- 2.34.1