Using popcount should check the popcount feature flag not the SSE41 feature flag.
authorCraig Topper <craig.topper@gmail.com>
Sun, 8 Sep 2013 00:47:31 +0000 (00:47 +0000)
committerCraig Topper <craig.topper@gmail.com>
Sun, 8 Sep 2013 00:47:31 +0000 (00:47 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190258 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86TargetTransformInfo.cpp

index 5f81d33872024f4a2aaa926bb09a307b68693f83..935a6da5d55f694d4d89d555b9b5db5cb5fd41f2 100644 (file)
@@ -127,8 +127,8 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
   // TODO: Currently the __builtin_popcount() implementation using SSE3
   //   instructions is inefficient. Once the problem is fixed, we should
-  //   call ST->hasSSE3() instead of ST->hasSSE4().
-  return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+  //   call ST->hasSSE3() instead of ST->hasPOPCNT().
+  return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
 }
 
 unsigned X86TTI::getNumberOfRegisters(bool Vector) const {