//===---------------------------------------------------------------------===// // Random ideas for the X86 backend: MMX-specific stuff. //===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===// This: #include __v2si qux(int A) { return (__v2si){ 0, A }; } is compiled into: _qux: subl $28, %esp movl 32(%esp), %eax movd %eax, %mm0 movq %mm0, (%esp) movl (%esp), %eax movl %eax, 20(%esp) movq %mm0, 8(%esp) movl 12(%esp), %eax movl %eax, 16(%esp) movq 16(%esp), %mm0 addl $28, %esp ret Yuck! GCC gives us: _qux: subl $12, %esp movl 16(%esp), %eax movl 20(%esp), %edx movl $0, (%eax) movl %edx, 4(%eax) addl $12, %esp ret $4 //===---------------------------------------------------------------------===// int main() { __m64 A[1] = { _mm_cvtsi32_si64(1) }; __m64 B[1] = { _mm_cvtsi32_si64(10) }; __m64 sum = _mm_cvtsi32_si64(0); sum = __builtin_ia32_paddq(__builtin_ia32_paddq(A[0], B[0]), sum); printf("Sum = %d\n", _mm_cvtsi64_si32(sum)); return 0; } Generates: movl $11, %eax ### movd %eax, %mm0 ### movq %mm0, 8(%esp) ### movl 8(%esp), %eax movl %eax, 4(%esp) movl $_str, (%esp) call L_printf$stub xorl %eax, %eax addl $28, %esp These instructions are unnecessary.