[AVX512] Fix copy-and-paste bugs in vpermil

[oota-llvm.git] / lib / Target / X86 / README-MMX.txt
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt

index 57c7c3f48c890e5da219b881c7de62cd1ed6dcee..a6c8616b6d2c6b77a897de92552ef3111b20d25b 100644 (file)
--- a/lib/Target/X86/README-MMX.txt
+++ b/lib/Target/X86/README-MMX.txt
@@ -43,27 +43,29 @@ _qux:
  
  //===---------------------------------------------------------------------===//
  
-int main() {
-  __m64 A[1] = { _mm_cvtsi32_si64(1)  };
-  __m64 B[1] = { _mm_cvtsi32_si64(10) };
-  __m64 sum = _mm_cvtsi32_si64(0);
+We generate crappy code for this:
  
-  sum = __builtin_ia32_paddq(__builtin_ia32_paddq(A[0], B[0]), sum);
-
-  printf("Sum = %d\n", _mm_cvtsi64_si32(sum));
-  return 0;
+__m64 t() {
+  return _mm_cvtsi32_si64(1);
  }
  
-Generates:
-
-        movl $11, %eax
-###     movd %eax, %mm0
-###     movq %mm0, 8(%esp)
-###     movl 8(%esp), %eax
-        movl %eax, 4(%esp)
-        movl $_str, (%esp)
-        call L_printf$stub
-        xorl %eax, %eax
-        addl $28, %esp
-
-These instructions are unnecessary.
+_t:
+       subl    $12, %esp
+       movl    $1, %eax
+       movd    %eax, %mm0
+       movq    %mm0, (%esp)
+       movl    (%esp), %eax
+       movl    4(%esp), %edx
+       addl    $12, %esp
+       ret
+
+The extra stack traffic is covered in the previous entry. But the other reason
+is we are not smart about materializing constants in MMX registers. With -m64
+
+       movl    $1, %eax
+       movd    %eax, %mm0
+       movd    %mm0, %rax
+       ret
+
+We should be using a constantpool load instead:
+       movq    LC0(%rip), %rax