revert r79631

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index c679fd83c0daccfa44ae210592d635c4fb3352bb..97546c4db595db64db3b9a49bec8058a860a2a26 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -125,8 +125,7 @@ specific vector types are target dependent.
  
  //===---------------------------------------------------------------------===//
  
-We should add 'unaligned load/store' nodes, and produce them from code like
-this:
+We should produce an unaligned load from code like this:
  
  v4sf example(float *P) {
    return (v4sf){P[0], P[1], P[2], P[3] };
@@ -167,52 +166,14 @@ if anyone cared enough about sincos.
  
  //===---------------------------------------------------------------------===//
  
-Scalar Repl cannot currently promote this testcase to 'ret long cst':
-
-        %struct.X = type { i32, i32 }
-        %struct.Y = type { %struct.X }
-
-define i64 @bar() {
-        %retval = alloca %struct.Y, align 8
-        %tmp12 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 0
-        store i32 0, i32* %tmp12
-        %tmp15 = getelementptr %struct.Y* %retval, i32 0, i32 0, i32 1
-        store i32 1, i32* %tmp15
-        %retval.upgrd.1 = bitcast %struct.Y* %retval to i64*
-        %retval.upgrd.2 = load i64* %retval.upgrd.1
-        ret i64 %retval.upgrd.2
-}
-
-it should be extended to do so.
-
-//===---------------------------------------------------------------------===//
-
--scalarrepl should promote this to be a vector scalar.
-
-        %struct..0anon = type { <4 x float> }
-
-define void @test1(<4 x float> %V, float* %P) {
-        %u = alloca %struct..0anon, align 16
-        %tmp = getelementptr %struct..0anon* %u, i32 0, i32 0
-        store <4 x float> %V, <4 x float>* %tmp
-        %tmp1 = bitcast %struct..0anon* %u to [4 x float]*
-        %tmp.upgrd.1 = getelementptr [4 x float]* %tmp1, i32 0, i32 1
-        %tmp.upgrd.2 = load float* %tmp.upgrd.1
-        %tmp3 = mul float %tmp.upgrd.2, 2.000000e+00
-        store float %tmp3, float* %P
-        ret void
-}
-
-//===---------------------------------------------------------------------===//
-
  Turn this into a single byte store with no load (the other 3 bytes are
  unmodified):
  
-void %test(uint* %P) {
-       %tmp = load uint* %P
-        %tmp14 = or uint %tmp, 3305111552
-        %tmp15 = and uint %tmp14, 3321888767
-        store uint %tmp15, uint* %P
+define void @test(i32* %P) {
+       %tmp = load i32* %P
+        %tmp14 = or i32 %tmp, 3305111552
+        %tmp15 = and i32 %tmp14, 3321888767
+        store i32 %tmp15, i32* %P
          ret void
  }
  
@@ -236,13 +197,6 @@ _bar:   addic r3,r3,-1
  
  //===---------------------------------------------------------------------===//
  
-Legalize should lower ctlz like this:
-  ctlz(x) = popcnt((x-1) & ~x)
-
-on targets that have popcnt but not ctlz.  itanium, what else?
-
-//===---------------------------------------------------------------------===//
-
  quantum_sigma_x in 462.libquantum contains the following loop:
  
        for(i=0; i<reg->size; i++)
@@ -374,11 +328,6 @@ when it is declared U32.
  
  //===---------------------------------------------------------------------===//
  
-Promote for i32 bswap can use i64 bswap + shr.  Useful on targets with 64-bit
-regs and bswap, like itanium.
-
-//===---------------------------------------------------------------------===//
-
  LSR should know what GPR types a target has.  This code:
  
  volatile short X, Y; // globals
@@ -634,32 +583,6 @@ once.
  
  //===---------------------------------------------------------------------===//
  
-We should extend parameter attributes to capture more information about
-pointer parameters for alias analysis.  Some ideas:
-
-1. Add a "nocapture" attribute, which indicates that the callee does not store
-   the address of the parameter into a global or any other memory location
-   visible to the callee.  This can be used to make basicaa and other analyses
-   more powerful.  It is true for things like memcpy, strcat, and many other
-   things, including structs passed by value, most C++ references, etc.
-2. Generalize readonly to be set on parameters.  This is important mod/ref 
-   info for the function, which is important for basicaa and others.  It can
-   also be used by the inliner to avoid inserting a memcpy for byval 
-   arguments when the function is inlined.
-
-These functions can be inferred by various analysis passes such as the 
-globalsmodrefaa pass.  Note that getting #2 right is actually really tricky.
-Consider this code:
-
-struct S;  S G;
-void caller(S byvalarg) { G.field = 1; ... }
-void callee() { caller(G); }
-
-The fact that the caller does not modify byval arg is not enough, we need
-to know that it doesn't modify G either.  This is very tricky.
-
-//===---------------------------------------------------------------------===//
-
  We should add an FRINT node to the DAG to model targets that have legal
  implementations of ceil/floor/rint.
  
@@ -814,16 +737,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course.
  
  //===---------------------------------------------------------------------===//
  
-We should be able to evaluate this loop:
-
-int test(int x_offs) {
-  while (x_offs > 4)
-     x_offs -= 4;
-  return x_offs;
-}
-
-//===---------------------------------------------------------------------===//
-
  Reassociate should turn things like:
  
  int factorial(int X) {
@@ -909,23 +822,6 @@ multiply hi's into a comparison against the mullo.
  
  //===---------------------------------------------------------------------===//
  
-SROA is not promoting the union on the stack in this example, we should end
-up with no allocas.
-
-union vec2d {
-    double e[2];
-    double v __attribute__((vector_size(16)));
-};
-typedef union vec2d vec2d;
-
-static vec2d a={{1,2}}, b={{3,4}};
-    
-vec2d foo () {
-    return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
-}
-
-//===---------------------------------------------------------------------===//
-
  Better mod/ref analysis for scanf would allow us to eliminate the vtable and a
  bunch of other stuff from this example (see PR1604): 
  
@@ -1215,16 +1111,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
  
  //===---------------------------------------------------------------------===//
  
-We would like to do the following transform in the instcombiner:
-
-  -X/C -> X/-C
-
-However, this isn't valid if (-X) overflows. We can implement this when we
-have the concept of a "C signed subtraction" operator that which is undefined
-on overflow.
-
-//===---------------------------------------------------------------------===//
-
  This was noticed in the entryblock for grokdeclarator in 403.gcc:
  
          %tmp = icmp eq i32 %decl_context, 4          
@@ -1683,3 +1569,111 @@ Always foldable for odd constants, what is the rule for even?
  
  //===---------------------------------------------------------------------===//
  
+PR 3381: GEP to field of size 0 inside a struct could be turned into GEP
+for next field in struct (which is at same address).
+
+For example: store of float into { {{}}, float } could be turned into a store to
+the float directly.
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+double foo(double a) {    return sin(a); }
+
+This compiles into this on x86-64 Linux:
+foo:
+       subq    $8, %rsp
+       call    sin
+       addq    $8, %rsp
+       ret
+vs:
+
+foo:
+        jmp sin
+
+//===---------------------------------------------------------------------===//
+
+The arg promotion pass should make use of nocapture to make its alias analysis
+stuff much more precise.
+
+//===---------------------------------------------------------------------===//
+
+The following functions should be optimized to use a select instead of a
+branch (from gcc PR40072):
+
+char char_int(int m) {if(m>7) return 0; return m;}
+int int_char(char m) {if(m>7) return 0; return m;}
+
+//===---------------------------------------------------------------------===//
+
+Instcombine should replace the load with a constant in:
+
+  static const char x[4] = {'a', 'b', 'c', 'd'};
+  
+  unsigned int y(void) {
+    return *(unsigned int *)x;
+  }
+
+It currently only does this transformation when the size of the constant 
+is the same as the size of the integer (so, try x[5]) and the last byte 
+is a null (making it a C string). There's no need for these restrictions.
+
+//===---------------------------------------------------------------------===//
+
+InstCombine's "turn load from constant into constant" optimization should be
+more aggressive in the presence of bitcasts.  For example, because of unions,
+this code:
+
+union vec2d {
+    double e[2];
+    double v __attribute__((vector_size(16)));
+};
+typedef union vec2d vec2d;
+
+static vec2d a={{1,2}}, b={{3,4}};
+    
+vec2d foo () {
+    return (vec2d){ .v = a.v + b.v * (vec2d){{5,5}}.v };
+}
+
+Compiles into:
+
+@a = internal constant %0 { [2 x double] 
+           [double 1.000000e+00, double 2.000000e+00] }, align 16
+@b = internal constant %0 { [2 x double]
+           [double 3.000000e+00, double 4.000000e+00] }, align 16
+...
+define void @foo(%struct.vec2d* noalias nocapture sret %agg.result) nounwind {
+entry:
+       %0 = load <2 x double>* getelementptr (%struct.vec2d* 
+           bitcast (%0* @a to %struct.vec2d*), i32 0, i32 0), align 16
+       %1 = load <2 x double>* getelementptr (%struct.vec2d* 
+           bitcast (%0* @b to %struct.vec2d*), i32 0, i32 0), align 16
+
+
+Instcombine should be able to optimize away the loads (and thus the globals).
+
+
+//===---------------------------------------------------------------------===//
+
+I saw this constant expression in real code after llvm-g++ -O2:
+
+declare extern_weak i32 @0(i64)
+
+define void @foo() {
+  br i1 icmp eq (i32 zext (i1 icmp ne (i32 (i64)* @0, i32 (i64)* null) to i32),
+i32 0), label %cond_true, label %cond_false
+cond_true:
+  ret void
+cond_false:
+  ret void
+}
+
+That branch expression should be reduced to:
+
+  i1 icmp eq (i32 (i64)* @0, i32 (i64)* null)
+
+It's probably not a perf issue, I just happened to see it while examining
+something else and didn't want to forget about it.
+
+//===---------------------------------------------------------------------===//