Remove extra whitespace. NFC.

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 4e382e8f8ec1aa49dbfe5150e98918857a0e3b7d..7e9888cc13e80b48466cb5cca8997819eb07cb92 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,22 +2,6 @@ Target Independent Opportunities:
  
  //===---------------------------------------------------------------------===//
  
-With the recent changes to make the implicit def/use set explicit in
-machineinstrs, we should change the target descriptions for 'call' instructions
-so that the .td files don't list all the call-clobbered registers as implicit
-defs.  Instead, these should be added by the code generator (e.g. on the dag).
-
-This has a number of uses:
-
-1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
-   for their different impdef sets.
-2. Targets with multiple calling convs (e.g. x86) which have different clobber
-   sets don't need copies of call instructions.
-3. 'Interprocedural register allocation' can be done to reduce the clobber sets
-   of calls.
-
-//===---------------------------------------------------------------------===//
-
  We should recognized various "overflow detection" idioms and translate them into
  llvm.uadd.with.overflow and similar intrinsics.  Here is a multiply idiom:
  
@@ -109,44 +93,6 @@ This requires reassociating to forms of expressions that are already available,
  something that reassoc doesn't think about yet.
  
  
-//===---------------------------------------------------------------------===//
-
-This function: (derived from GCC PR19988)
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x + -0.1234 * y));
-}
-
-compiles to:
-_foo:
-       movapd  %xmm1, %xmm2
-       mulsd   LCPI1_1(%rip), %xmm1
-       mulsd   LCPI1_0(%rip), %xmm2
-       addsd   %xmm0, %xmm1
-       addsd   %xmm0, %xmm2
-       movapd  %xmm1, %xmm0
-       mulsd   %xmm2, %xmm0
-       ret
-
-Reassociate should be able to turn it into:
-
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x - 0.1234 * y));
-}
-
-Which allows the multiply by constant to be CSE'd, producing:
-
-_foo:
-       mulsd   LCPI1_0(%rip), %xmm1
-       movapd  %xmm1, %xmm2
-       addsd   %xmm0, %xmm2
-       subsd   %xmm1, %xmm0
-       mulsd   %xmm2, %xmm0
-       ret
-
-This doesn't need -ffast-math support at all.  This is particularly bad because
-the llvm-gcc frontend is canonicalizing the later into the former, but clang
-doesn't have this problem.
-
  //===---------------------------------------------------------------------===//
  
  These two functions should generate the same code on big-endian systems:
@@ -160,7 +106,7 @@ for 1,2,4,8 bytes.
  //===---------------------------------------------------------------------===//
  
  It would be nice to revert this patch:
-http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
+http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
  
  And teach the dag combiner enough to simplify the code expanded before 
  legalize.  It seems plausible that this knowledge would let it simplify other
@@ -168,7 +114,7 @@ stuff too.
  
  //===---------------------------------------------------------------------===//
  
-For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal
  to the type size. It works but can be overly conservative as the alignment of
  specific vector types are target dependent.
  
@@ -278,22 +224,7 @@ unsigned countbits_slow(unsigned v) {
      c += v & 1;
    return c;
  }
-unsigned countbits_fast(unsigned v){
-  unsigned c;
-  for (c = 0; v; c++)
-    v &= v - 1; // clear the least significant bit set
-  return c;
-}
  
-BITBOARD = unsigned long long
-int PopCnt(register BITBOARD a) {
-  register int c=0;
-  while(a) {
-    c++;
-    a &= a - 1;
-  }
-  return c;
-}
  unsigned int popcount(unsigned int input) {
    unsigned int count = 0;
    for (unsigned int i =  0; i < 4 * 8; i++)
@@ -802,7 +733,7 @@ f (unsigned long a, unsigned long b, unsigned long c)
    return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
  }
  Both should combine to ((a|b) & (c-1)) != 0.  Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
@@ -815,7 +746,7 @@ void clear_pmd_range(unsigned long start, unsigned long end)
  }
  The expression should optimize to something like
  "!((start|end)&~PMD_MASK). Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
@@ -834,7 +765,7 @@ int f(int x, int y)
   return (abs(x)) >= 0;
  }
  This should optimize to x == INT_MIN. (With -fwrapv.)  Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
@@ -872,92 +803,117 @@ rshift_gt (unsigned int a)
  
  All should simplify to a single comparison.  All of these are
  currently not optimized with "clang -emit-llvm-bc | opt
--std-compile-opts".
+-O3".
  
  //===---------------------------------------------------------------------===//
  
  From GCC Bug 32605:
  int c(int* x) {return (char*)x+2 == (char*)x;}
  Should combine to 0.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+-emit-llvm-bc | opt -O3" (although llc can optimize it).
  
  //===---------------------------------------------------------------------===//
  
  int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
  Should be combined to  "((b >> 1) | b) & 1".  Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
  Should combine to "x | (y & 3)".  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
  Should fold to "(~a & c) | (a & b)".  Currently not optimized with
-"clang -emit-llvm-bc | opt -std-compile-opts".
+"clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int a,int b) {return (~(a|b))|a;}
  Should fold to "a|~b".  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int a, int b) {return (a&&b) || (a&&!b);}
  Should fold to "a".  Currently not optimized with "clang -emit-llvm-bc
-| opt -std-compile-opts".
+| opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
  Should fold to "a ? b : c", or at least something sane.  Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
  Should fold to a && (b || c).  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int x) {return x | ((x & 8) ^ 8);}
  Should combine to x | 8.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int x) {return x ^ ((x & 8) ^ 8);}
  Should also combine to x | 8.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  int a(int x) {return ((x | -9) ^ 8) & x;}
  Should combine to x & -9.  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
  Should combine to "a * 0x88888888 >> 31".  Currently not optimized
-with "clang -emit-llvm-bc | opt -std-compile-opts".
+with "clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  unsigned a(char* x) {if ((*x & 32) == 0) return b();}
  There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
+-emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
  unsigned a(unsigned long long x) {return 40 * (x >> 1);}
  Should combine to "20 * (((unsigned)x) & -2)".  Currently not
-optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+optimized with "clang -emit-llvm-bc | opt -O3".
+
+//===---------------------------------------------------------------------===//
+
+int g(int x) { return (x - 10) < 0; }
+Should combine to "x <= 9" (the sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -O3".
+
+//===---------------------------------------------------------------------===//
+
+int g(int x) { return (x + 10) < 0; }
+Should combine to "x < -10" (the add has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -O3".
+
+//===---------------------------------------------------------------------===//
+
+int f(int i, int j) { return i < j + 1; }
+int g(int i, int j) { return j > i - 1; }
+Should combine to "i <= j" (the add/sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -O3".
+
+//===---------------------------------------------------------------------===//
+
+unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
+The & 15 part should be optimized away, it doesn't change the result. Currently
+not optimized with "clang -emit-llvm-bc | opt -O3".
  
  //===---------------------------------------------------------------------===//
  
@@ -1169,7 +1125,7 @@ There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
  GCC testsuite, ones we don't get yet are (checked through loadpre25):
  
  [CRIT EDGE BREAKING]
-loadpre3.c predcom-4.c
+predcom-4.c
  
  [PRE OF READONLY CALL]
  loadpre5.c
@@ -1312,7 +1268,8 @@ int foo (void) {
  ..
    else if (strchr ("<>", *intel_parser.op_string)
  
-Those should be turned into a switch.
+Those should be turned into a switch.  SimplifyLibCalls only gets the second
+case.
  
  //===---------------------------------------------------------------------===//
  
@@ -1762,7 +1719,6 @@ case it choses instead to keep the max operation obvious.
  
  //===---------------------------------------------------------------------===//
  
-Switch lowering generates less than ideal code for the following switch:
  define void @a(i32 %x) nounwind {
  entry:
    switch i32 %x, label %if.end [
@@ -1783,19 +1739,15 @@ declare void @foo()
  Generated code on x86-64 (other platforms give similar results):
  a:
         cmpl    $5, %edi
-       ja      .LBB0_2
-       movl    %edi, %eax
-       movl    $47, %ecx
-       btq     %rax, %rcx
-       jb      .LBB0_3
+       ja      LBB2_2
+       cmpl    $4, %edi
+       jne     LBB2_3
  .LBB0_2:
         ret
  .LBB0_3:
         jmp     foo  # TAILCALL
  
-The movl+movl+btq+jb could be simplified to a cmpl+jne.
-
-Or, if we wanted to be really clever, we could simplify the whole thing to
+If we wanted to be really clever, we could simplify the whole thing to
  something like the following, which eliminates a branch:
         xorl    $1, %edi
         cmpl    $4, %edi
@@ -1892,44 +1844,6 @@ we remove checking in code like
  
  //===---------------------------------------------------------------------===//
  
-This code (from Benchmarks/Dhrystone/dry.c):
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
-  %sext = shl i32 %0, 24
-  %conv = ashr i32 %sext, 24
-  %sext6 = shl i32 %1, 24
-  %conv4 = ashr i32 %sext6, 24
-  %cmp = icmp eq i32 %conv, %conv4
-  %. = select i1 %cmp, i32 10000, i32 0
-  ret i32 %.
-}
-
-Should be simplified into something like:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
-  %sext = shl i32 %0, 24
-  %conv = and i32 %sext, 0xFF000000
-  %sext6 = shl i32 %1, 24
-  %conv4 = and i32 %sext6, 0xFF000000
-  %cmp = icmp eq i32 %conv, %conv4
-  %. = select i1 %cmp, i32 10000, i32 0
-  ret i32 %.
-}
-
-and then to:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
-  %conv = and i32 %0, 0xFF
-  %conv4 = and i32 %1, 0xFF
-  %cmp = icmp eq i32 %conv, %conv4
-  %. = select i1 %cmp, i32 10000, i32 0
-  ret i32 %.
-}
-//===---------------------------------------------------------------------===//
-
  clang -O3 currently compiles this code
  
  int g(unsigned int a) {
@@ -2353,3 +2267,13 @@ which can do this in a single operation (instruction or libcall).  It is
  probably best to do this in the code generator.
  
  //===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; }
+should fold to (x & y) == 0.
+
+//===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
+should fold to x > y.
+
+//===---------------------------------------------------------------------===//