lib/Target/X86/README-MMX.txt

   1 //===---------------------------------------------------------------------===//
   2 // Random ideas for the X86 backend: MMX-specific stuff.
   3 //===---------------------------------------------------------------------===//
   4
   5 //===---------------------------------------------------------------------===//
   6
   7 This:
   8
   9 #include <mmintrin.h>
  10
  11 __v2si qux(int A) {
  12   return (__v2si){ 0, A };
  13 }
  14
  15 is compiled into:
  16
  17 _qux:
  18         subl $28, %esp
  19         movl 32(%esp), %eax
  20         movd %eax, %mm0
  21         movq %mm0, (%esp)
  22         movl (%esp), %eax
  23         movl %eax, 20(%esp)
  24         movq %mm0, 8(%esp)
  25         movl 12(%esp), %eax
  26         movl %eax, 16(%esp)
  27         movq 16(%esp), %mm0
  28         addl $28, %esp
  29         ret
  30
  31 Yuck!
  32
  33 GCC gives us:
  34
  35 _qux:
  36         subl    $12, %esp
  37         movl    16(%esp), %eax
  38         movl    20(%esp), %edx
  39         movl    $0, (%eax)
  40         movl    %edx, 4(%eax)
  41         addl    $12, %esp
  42         ret     $4
  43
  44 //===---------------------------------------------------------------------===//
  45
  46 We generate crappy code for this:
  47
  48 __m64 t() {
  49   return _mm_cvtsi32_si64(1);
  50 }
  51
  52 _t:
  53         subl    $12, %esp
  54         movl    $1, %eax
  55         movd    %eax, %mm0
  56         movq    %mm0, (%esp)
  57         movl    (%esp), %eax
  58         movl    4(%esp), %edx
  59         addl    $12, %esp
  60         ret
  61
  62 The extra stack traffic is covered in the previous entry. But the other reason
  63 is we are not smart about materializing constants in MMX registers. With -m64
  64
  65         movl    $1, %eax
  66         movd    %eax, %mm0
  67         movd    %mm0, %rax
  68         ret
  69
  70 We should be using a constantpool load instead:
  71         movq    LC0(%rip), %rax