A lo/hi mul has higher latency than an imul r,ri, e.g. 5 cycles compared to 3
authorCameron Zwarich <zwarich@apple.com>
Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
committerCameron Zwarich <zwarich@apple.com>
Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
on Core 2 and Nehalem, so the code we generate is better than GCC's here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126100 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/README.txt

index ed3bff150bcf24ee8bb3f00223805e0dd126f434..1e1660dbca0108e363911b0e115fc641defbb81b 100644 (file)
@@ -1878,38 +1878,3 @@ _add32carry:
        ret
 
 //===---------------------------------------------------------------------===//
-
-This:
-char t(char c) {
-  return c/3;
-}
-
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
-
-_t:                                     ## @t
-       movslq  %edi, %rax
-       imulq   $1431655766, %rax, %rax ## imm = 0x55555556
-       movq    %rax, %rcx
-       shrq    $63, %rcx
-       shrq    $32, %rax
-       addl    %ecx, %eax
-       movsbl  %al, %eax
-       ret
-
-GCC gets:
-
-_t:
-       movl    $86, %eax
-       imulb   %dil
-       shrw    $8, %ax
-       sarb    $7, %dil
-       subb    %dil, %al
-       movsbl  %al,%eax
-       ret
-
-which is nicer.  This also happens for int, not just char.
-
-//===---------------------------------------------------------------------===//
-
-
-