From: Craig Topper Date: Sun, 16 Oct 2011 00:21:51 +0000 (+0000) Subject: Add X86 feature detection support for BMI instructions. Added new cpuid function... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=4145c49aa0e0e70a51a395bdd4107a464d05e592;p=oota-llvm.git Add X86 feature detection support for BMI instructions. Added new cpuid function for accessing leafs with sub leafs specified in ECX. Also added code to keep track of the max cpuid level supported in both basic and extended leaves and qualified the existing cpuid calls and the new call to leaf 7. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142089 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index f98d5e331fe..7cdd5b1b20f 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -107,6 +107,74 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, return true; } +/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the +/// 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + #if defined(__GNUC__) + // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value), + "c" (subleaf)); + return false; + #elif defined(_MSC_VER) + // can't use __cpuidex because it isn't available in all supported versions + // of MSC + __asm { + mov eax,value + mov ecx,subleaf + cpuid + mov rsi,rEAX + mov dword ptr [rsi],eax + mov rsi,rEBX + mov dword ptr [rsi],ebx + mov rsi,rECX + mov dword ptr [rsi],ecx + mov rsi,rEDX + mov dword ptr [rsi],edx + } + return false; + #endif +#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + #if defined(__GNUC__) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value), + "c" (subleaf)); + return false; + #elif defined(_MSC_VER) + __asm { + mov eax,value + mov ecx,subleaf + cpuid + mov esi,rEAX + mov dword ptr [esi],eax + mov esi,rEBX + mov dword ptr [esi],ebx + mov esi,rECX + mov dword ptr [esi],ecx + mov esi,rEDX + mov dword ptr [esi],edx + } + return false; + #endif +#endif + return true; +} + void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) { Family = (EAX >> 8) & 0xf; // Bits 8 - 11 diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index c144c513de1..a4e0b5af9e1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -54,6 +54,11 @@ namespace X86_MC { /// the specified arguments. If we can't run cpuid on the host, return true. bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX); + /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return + /// the 4 values in the specified arguments. If we can't run cpuid on the + /// host, return true. + bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX); void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 7064dd06fa3..670073a07bb 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -177,16 +177,18 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + unsigned MaxLevel; union { unsigned u[3]; char c[12]; } text; - - if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + + if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) || + MaxLevel < 1) return; X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - + if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); } if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); } @@ -245,27 +247,41 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureSlowBTMem); } // If it's Nehalem, unaligned memory access is fast. + // FIXME: Nehalem is family 6. Also include Westmere and later processors? if (Family == 15 && Model == 26) { IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } - X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 29) & 0x1) { - HasX86_64 = true; - ToggleFeature(X86::Feature64Bit); - } - if ((ECX >> 5) & 0x1) { - HasLZCNT = true; - ToggleFeature(X86::FeatureLZCNT); - } - if (IsAMD && ((ECX >> 6) & 0x1)) { - HasSSE4A = true; - ToggleFeature(X86::FeatureSSE4A); + unsigned MaxExtLevel; + X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + if (MaxExtLevel >= 0x80000001) { + X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if ((EDX >> 29) & 0x1) { + HasX86_64 = true; + ToggleFeature(X86::Feature64Bit); + } + if ((ECX >> 5) & 0x1) { + HasLZCNT = true; + ToggleFeature(X86::FeatureLZCNT); + } + if (IsAMD && ((ECX >> 6) & 0x1)) { + HasSSE4A = true; + ToggleFeature(X86::FeatureSSE4A); + } + if (IsAMD && ((ECX >> 16) & 0x1)) { + HasFMA4 = true; + ToggleFeature(X86::FeatureFMA4); + } } - if (IsAMD && ((ECX >> 16) & 0x1)) { - HasFMA4 = true; - ToggleFeature(X86::FeatureFMA4); + } + + if (IsIntel && MaxLevel >= 7) { + X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + if ((EBX >> 3) & 0x1) { + HasBMI = true; + ToggleFeature(X86::FeatureBMI); } } }