#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
-unsigned char X86Subtarget::
-ClassifyBlockAddressReference() const {
+unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
return 0;
}
+bool X86Subtarget::hasSinCos() const {
+ return getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 9) &&
+ is64Bit();
+}
+
/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
/// to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-/// getSpecialAddressLatency - For targets where it is beneficial to
-/// backschedule instructions that compute addresses, return a value
-/// indicating the number of scheduling cycles of backscheduling that
-/// should be attempted.
-unsigned X86Subtarget::getSpecialAddressLatency() const {
- // For x86 out-of-order targets, back-schedule address computations so
- // that loads and stores aren't blocked.
- // This value was chosen arbitrarily.
- return 200;
+static bool OSHasAVXSupport() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+#if defined(__GNUC__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ int rEAX, rEDX;
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+ int rEAX = 0; // Ensures we return false
+#endif
+ return (rEAX & 6) == 6;
+#else
+ return false;
+#endif
}
void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
- if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
+ if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) {
+ X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX);
+ }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
ToggleFeature(X86::FeatureSlowBTMem);
}
- // If it's Nehalem, unaligned memory access is fast.
- // Include Westmere and Sandy Bridge as well.
- // FIXME: add later processors.
- if (IsIntel && ((Family == 6 && Model == 26) ||
- (Family == 6 && Model == 44) ||
- (Family == 6 && Model == 42))) {
+ // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+ // memory access is fast. We hard code model numbers here because they
+ // aren't strictly increasing for Intel chips it seems.
+ if (IsIntel &&
+ ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+ // Jasper Froest
+ (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
+ (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+ (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+ (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+ (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+ (Family == 6 && Model == 0x2A) || // SandyBridge
+ (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+ (Family == 6 && Model == 0x3A))) {// IvyBridge
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
HasLZCNT = true;
ToggleFeature(X86::FeatureLZCNT);
}
+ if (IsIntel && ((ECX >> 8) & 0x1)) {
+ HasPRFCHW = true;
+ ToggleFeature(X86::FeaturePRFCHW);
+ }
if (IsAMD) {
if ((ECX >> 6) & 0x1) {
HasSSE4A = true;
HasBMI = true;
ToggleFeature(X86::FeatureBMI);
}
+ if ((EBX >> 4) & 0x1) {
+ HasHLE = true;
+ ToggleFeature(X86::FeatureHLE);
+ }
if (IsIntel && ((EBX >> 5) & 0x1)) {
X86SSELevel = AVX2;
ToggleFeature(X86::FeatureAVX2);
HasBMI2 = true;
ToggleFeature(X86::FeatureBMI2);
}
+ if (IsIntel && ((EBX >> 11) & 0x1)) {
+ HasRTM = true;
+ ToggleFeature(X86::FeatureRTM);
+ }
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 18) & 0x1)) {
+ HasRDSEED = true;
+ ToggleFeature(X86::FeatureRDSEED);
+ }
}
}
}
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
- unsigned StackAlignOverride, bool is64Bit)
- : X86GenSubtargetInfo(TT, CPU, FS)
- , X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , X86SSELevel(NoMMXSSE)
- , X863DNowLevel(NoThreeDNow)
- , HasCMov(false)
- , HasX86_64(false)
- , HasPOPCNT(false)
- , HasSSE4A(false)
- , HasAES(false)
- , HasPCLMUL(false)
- , HasFMA(false)
- , HasFMA4(false)
- , HasXOP(false)
- , HasMOVBE(false)
- , HasRDRAND(false)
- , HasF16C(false)
- , HasFSGSBase(false)
- , HasLZCNT(false)
- , HasBMI(false)
- , HasBMI2(false)
- , IsBTMemSlow(false)
- , IsUAMemFast(false)
- , HasVectorUAMem(false)
- , HasCmpxchg16b(false)
- , UseLeaForSP(false)
- , HasSlowDivide(false)
- , PostRAScheduler(false)
- , stackAlignment(4)
- // FIXME: this is a known good value for Yonah. How about others?
- , MaxInlineSizeThreshold(128)
- , TargetTriple(TT)
- , In64BitMode(is64Bit) {
- // Determine default and user specified characteristics
+void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (!FS.empty() || !CPU.empty()) {
if (CPUName.empty()) {
assert((!In64BitMode || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
// 32 and 64 bit) and for all 64-bit targets.
if (StackAlignOverride)
stackAlignment = StackAlignOverride;
- else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
- isTargetSolaris() || In64BitMode)
+ else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
+ In64BitMode)
stackAlignment = 16;
}
+void X86Subtarget::initializeEnvironment() {
+ X86SSELevel = NoMMXSSE;
+ X863DNowLevel = NoThreeDNow;
+ HasCMov = false;
+ HasX86_64 = false;
+ HasPOPCNT = false;
+ HasSSE4A = false;
+ HasAES = false;
+ HasPCLMUL = false;
+ HasFMA = false;
+ HasFMA4 = false;
+ HasXOP = false;
+ HasMOVBE = false;
+ HasRDRAND = false;
+ HasF16C = false;
+ HasFSGSBase = false;
+ HasLZCNT = false;
+ HasBMI = false;
+ HasBMI2 = false;
+ HasRTM = false;
+ HasHLE = false;
+ HasADX = false;
+ HasPRFCHW = false;
+ HasRDSEED = false;
+ IsBTMemSlow = false;
+ IsUAMemFast = false;
+ HasVectorUAMem = false;
+ HasCmpxchg16b = false;
+ UseLeaForSP = false;
+ HasSlowDivide = false;
+ PostRAScheduler = false;
+ PadShortFunctions = false;
+ CallRegIndirect = false;
+ LEAUsesAG = false;
+ stackAlignment = 4;
+ // FIXME: this is a known good value for Yonah. How about others?
+ MaxInlineSizeThreshold = 128;
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS,
+ unsigned StackAlignOverride, bool is64Bit)
+ : X86GenSubtargetInfo(TT, CPU, FS)
+ , X86ProcFamily(Others)
+ , PICStyle(PICStyles::None)
+ , TargetTriple(TT)
+ , StackAlignOverride(StackAlignOverride)
+ , In64BitMode(is64Bit) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
bool X86Subtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,