transfer some notes from my email to somewhere useful.
authorChris Lattner <sabre@nondot.org>
Mon, 16 Jan 2006 17:53:00 +0000 (17:53 +0000)
committerChris Lattner <sabre@nondot.org>
Mon, 16 Jan 2006 17:53:00 +0000 (17:53 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25361 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/PowerPC/README.txt
lib/Target/X86/README.txt

index 84536ef2deba2b971e8d38c8ae09c068c42fbff2..ffae6110a8686cf08b4f59ecaaad3092170c2717 100644 (file)
@@ -225,3 +225,29 @@ struct foo { double X, Y; };
 void xxx(struct foo F);
 void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
 
+===-------------------------------------------------------------------------===
+
+For this:
+
+int h(int i, int j, int k) {
+ return (i==0||j==0||k == 0);
+}
+
+We currently emit this:
+
+_h:
+        cntlzw r2, r3
+        cntlzw r3, r4
+        cntlzw r4, r5
+        srwi r2, r2, 5
+        srwi r3, r3, 5
+        srwi r4, r4, 5
+        or r2, r3, r2
+        or r3, r2, r4
+        blr
+
+The ctlz/shift instructions are created by the isel, so the dag combiner doesn't
+have a chance to pull the shifts through the or's (eliminating two 
+instructions).  SETCC nodes should be custom lowered in this case, not expanded
+by the isel.
+
index 4869c5e956dfd8b968d8fbada4f863ebbe10d0d2..cb206f3b04e2c16f0550311ad6f61aaf9d1a2f57 100644 (file)
@@ -54,6 +54,10 @@ fxch ->              fucomi
 fucomi                 jl X
 jg X
 
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
 //===---------------------------------------------------------------------===//
 
 Improvements to the multiply -> shift/add algorithm:
@@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g.
        setg %al
        testb %al, %al  # unnecessary
        jne .BB7
+
+//===---------------------------------------------------------------------===//
+
+Count leading zeros and count trailing zeros:
+
+int clz(int X) { return __builtin_clz(X); }
+int ctz(int X) { return __builtin_ctz(X); }
+
+$ gcc t.c -S -o - -O3  -fomit-frame-pointer -masm=intel
+clz:
+        bsr     %eax, DWORD PTR [%esp+4]
+        xor     %eax, 31
+        ret
+ctz:
+        bsf     %eax, DWORD PTR [%esp+4]
+        ret
+
+however, check that these are defined for 0 and 32.  Our intrinsics are, GCC's
+aren't.
+
+//===---------------------------------------------------------------------===//
+
+Use push/pop instructions in prolog/epilog sequences instead of stores off 
+ESP (certain code size win, perf win on some [which?] processors).
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor.  They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+