Refactoring SimplifyLibCalls to remove static initializers and generally cleaning...

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 4cc95340890d4cf997d9a91192ef435e4aacb2d1..bf8d465d0aeebe9db2b1a2e5375fef188fa8853c 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,22 +2,6 @@ Target Independent Opportunities:
  
  //===---------------------------------------------------------------------===//
  
-With the recent changes to make the implicit def/use set explicit in
-machineinstrs, we should change the target descriptions for 'call' instructions
-so that the .td files don't list all the call-clobbered registers as implicit
-defs.  Instead, these should be added by the code generator (e.g. on the dag).
-
-This has a number of uses:
-
-1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
-   for their different impdef sets.
-2. Targets with multiple calling convs (e.g. x86) which have different clobber
-   sets don't need copies of call instructions.
-3. 'Interprocedural register allocation' can be done to reduce the clobber sets
-   of calls.
-
-//===---------------------------------------------------------------------===//
-
  We should recognized various "overflow detection" idioms and translate them into
  llvm.uadd.with.overflow and similar intrinsics.  Here is a multiply idiom:
  
@@ -109,44 +93,6 @@ This requires reassociating to forms of expressions that are already available,
  something that reassoc doesn't think about yet.
  
  
-//===---------------------------------------------------------------------===//
-
-This function: (derived from GCC PR19988)
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x + -0.1234 * y));
-}
-
-compiles to:
-_foo:
-       movapd  %xmm1, %xmm2
-       mulsd   LCPI1_1(%rip), %xmm1
-       mulsd   LCPI1_0(%rip), %xmm2
-       addsd   %xmm0, %xmm1
-       addsd   %xmm0, %xmm2
-       movapd  %xmm1, %xmm0
-       mulsd   %xmm2, %xmm0
-       ret
-
-Reassociate should be able to turn it into:
-
-double foo(double x, double y) {
-  return ((x + 0.1234 * y) * (x - 0.1234 * y));
-}
-
-Which allows the multiply by constant to be CSE'd, producing:
-
-_foo:
-       mulsd   LCPI1_0(%rip), %xmm1
-       movapd  %xmm1, %xmm2
-       addsd   %xmm0, %xmm2
-       subsd   %xmm1, %xmm0
-       mulsd   %xmm2, %xmm0
-       ret
-
-This doesn't need -ffast-math support at all.  This is particularly bad because
-the llvm-gcc frontend is canonicalizing the later into the former, but clang
-doesn't have this problem.
-
  //===---------------------------------------------------------------------===//
  
  These two functions should generate the same code on big-endian systems:
@@ -168,7 +114,7 @@ stuff too.
  
  //===---------------------------------------------------------------------===//
  
-For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+For vector types, DataLayout.cpp::getTypeInfo() returns alignment that is equal
  to the type size. It works but can be overly conservative as the alignment of
  specific vector types are target dependent.
  
@@ -278,22 +224,7 @@ unsigned countbits_slow(unsigned v) {
      c += v & 1;
    return c;
  }
-unsigned countbits_fast(unsigned v){
-  unsigned c;
-  for (c = 0; v; c++)
-    v &= v - 1; // clear the least significant bit set
-  return c;
-}
  
-BITBOARD = unsigned long long
-int PopCnt(register BITBOARD a) {
-  register int c=0;
-  while(a) {
-    c++;
-    a &= a - 1;
-  }
-  return c;
-}
  unsigned int popcount(unsigned int input) {
    unsigned int count = 0;
    for (unsigned int i =  0; i < 4 * 8; i++)
@@ -961,6 +892,31 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
  
  //===---------------------------------------------------------------------===//
  
+int g(int x) { return (x - 10) < 0; }
+Should combine to "x <= 9" (the sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int g(int x) { return (x + 10) < 0; }
+Should combine to "x < -10" (the add has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int f(int i, int j) { return i < j + 1; }
+int g(int i, int j) { return j > i - 1; }
+Should combine to "i <= j" (the add/sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned f(unsigned x) { return ((x & 7) + 1) & 15; }
+The & 15 part should be optimized away, it doesn't change the result. Currently
+not optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
  This was noticed in the entryblock for grokdeclarator in 403.gcc:
  
          %tmp = icmp eq i32 %decl_context, 4          
@@ -1169,7 +1125,7 @@ There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
  GCC testsuite, ones we don't get yet are (checked through loadpre25):
  
  [CRIT EDGE BREAKING]
-loadpre3.c predcom-4.c
+predcom-4.c
  
  [PRE OF READONLY CALL]
  loadpre5.c
@@ -2348,3 +2304,13 @@ which can do this in a single operation (instruction or libcall).  It is
  probably best to do this in the code generator.
  
  //===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return (x & y) == 0 || x == 0; }
+should fold to (x & y) == 0.
+
+//===---------------------------------------------------------------------===//
+
+unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
+should fold to x > y.
+
+//===---------------------------------------------------------------------===//