def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
+def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
+ "Enable AVX instructions">;
+def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
+ "Enable three-operand fused multiple-add">;
+def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
+ "Enable four-operand fused multiple-add">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+// Sandy Bridge does not have FMA
+def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasAVX = ((ECX >> 28) & 0x1);
+
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
unsigned Family = 0;
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+ HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
}
}
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
+ , HasSSE4A(false)
+ , HasAVX(false)
+ , HasFMA3(false)
+ , HasFMA4(false)
, IsBTMemSlow(false)
, DarwinVers(0)
, IsLinux(false)
///
bool HasX86_64;
- /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
- bool IsBTMemSlow;
-
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
+ /// HasAVX - Target has AVX instructions
+ bool HasAVX;
+
+ /// HasFMA3 - Target has 3-operand fused multiply-add
+ bool HasFMA3;
+
+ /// HasFMA4 - Target has 4-operand fused multiply-add
+ bool HasFMA4;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
+
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
unsigned char DarwinVers; // Is any darwin-x86 platform.
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasAVX() const { return hasAVX(); }
+ bool hasFMA3() const { return HasFMA3; }
+ bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }