A lo/hi mul has higher latency than an imul r,ri, e.g. 5 cycles compared to 3

author Cameron Zwarich <zwarich@apple.com>

Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)

committer Cameron Zwarich <zwarich@apple.com>

Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
author Cameron Zwarich <zwarich@apple.com>
Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
committer Cameron Zwarich <zwarich@apple.com>
Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index ed3bff150bcf24ee8bb3f00223805e0dd126f434..1e1660dbca0108e363911b0e115fc641defbb81b 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1878,38 +1878,3 @@ _add32carry:
         ret
  
  //===---------------------------------------------------------------------===//
-
-This:
-char t(char c) {
-  return c/3;
-}
-
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
-
-_t:                                     ## @t
-       movslq  %edi, %rax
-       imulq   $1431655766, %rax, %rax ## imm = 0x55555556
-       movq    %rax, %rcx
-       shrq    $63, %rcx
-       shrq    $32, %rax
-       addl    %ecx, %eax
-       movsbl  %al, %eax
-       ret
-
-GCC gets:
-
-_t:
-       movl    $86, %eax
-       imulb   %dil
-       shrw    $8, %ax
-       sarb    $7, %dil
-       subb    %dil, %al
-       movsbl  %al,%eax
-       ret
-
-which is nicer.  This also happens for int, not just char.
-
-//===---------------------------------------------------------------------===//
-
-
-
author	Cameron Zwarich <zwarich@apple.com>
	Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)
committer	Cameron Zwarich <zwarich@apple.com>
	Mon, 21 Feb 2011 01:29:32 +0000 (01:29 +0000)