[X86] Use ArrayRef. NFC

[oota-llvm.git] / lib / Target / X86 / README-MMX.txt
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt

index b4886aed2739af8d1a9893ab005aa1820e801863..a6c8616b6d2c6b77a897de92552ef3111b20d25b 100644 (file)
--- a/lib/Target/X86/README-MMX.txt
+++ b/lib/Target/X86/README-MMX.txt
@@ -4,56 +4,68 @@
  
  //===---------------------------------------------------------------------===//
  
-We should compile 
+This:
  
  #include <mmintrin.h>
  
-extern __m64 C;
-
-void baz(__v2si *A, __v2si *B)
-{
-  *A = __builtin_ia32_psllq(*B, C);
-  _mm_empty();
+__v2si qux(int A) {
+  return (__v2si){ 0, A };
  }
  
-to:
+is compiled into:
+
+_qux:
+        subl $28, %esp
+        movl 32(%esp), %eax
+        movd %eax, %mm0
+        movq %mm0, (%esp)
+        movl (%esp), %eax
+        movl %eax, 20(%esp)
+        movq %mm0, 8(%esp)
+        movl 12(%esp), %eax
+        movl %eax, 16(%esp)
+        movq 16(%esp), %mm0
+        addl $28, %esp
+        ret
+
+Yuck!
+
+GCC gives us:
+
+_qux:
+        subl    $12, %esp
+        movl    16(%esp), %eax
+        movl    20(%esp), %edx
+        movl    $0, (%eax)
+        movl    %edx, 4(%eax)
+        addl    $12, %esp
+        ret     $4
+
+//===---------------------------------------------------------------------===//
+
+We generate crappy code for this:
+
+__m64 t() {
+  return _mm_cvtsi32_si64(1);
+}
  
-.globl _baz
-_baz:
-       call    L3
-"L00000000001$pb":
-L3:
-       popl    %ecx
+_t:
         subl    $12, %esp
-       movl    20(%esp), %eax
-       movq    (%eax), %mm0
-       movl    L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax
-       movq    (%eax), %mm1
-       movl    16(%esp), %eax
-       psllq   %mm1, %mm0
-       movq    %mm0, (%eax)
-       emms
+       movl    $1, %eax
+       movd    %eax, %mm0
+       movq    %mm0, (%esp)
+       movl    (%esp), %eax
+       movl    4(%esp), %edx
         addl    $12, %esp
         ret
  
-not:
-
-_baz:
-       subl $12, %esp
-       call "L1$pb"
-"L1$pb":
-       popl %eax
-       movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax
-       movl (%eax), %ecx
-       movl %ecx, (%esp)
-       movl 4(%eax), %eax
-       movl %eax, 4(%esp)
-       movl 20(%esp), %eax
-       movq (%eax), %mm0
-       movq (%esp), %mm1
-       psllq %mm1, %mm0
-       movl 16(%esp), %eax
-       movq %mm0, (%eax)
-       emms
-       addl $12, %esp
+The extra stack traffic is covered in the previous entry. But the other reason
+is we are not smart about materializing constants in MMX registers. With -m64
+
+       movl    $1, %eax
+       movd    %eax, %mm0
+       movd    %mm0, %rax
         ret
+
+We should be using a constantpool load instead:
+       movq    LC0(%rip), %rax