revert r79631

[oota-llvm.git] / lib / Target / README.txt
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 8399115e598aebe2011952f596bce12f830c9f1f..97546c4db595db64db3b9a49bec8058a860a2a26 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -125,8 +125,7 @@ specific vector types are target dependent.
  
  //===---------------------------------------------------------------------===//
  
-We should add 'unaligned load/store' nodes, and produce them from code like
-this:
+We should produce an unaligned load from code like this:
  
  v4sf example(float *P) {
    return (v4sf){P[0], P[1], P[2], P[3] };
@@ -170,11 +169,11 @@ if anyone cared enough about sincos.
  Turn this into a single byte store with no load (the other 3 bytes are
  unmodified):
  
-void %test(uint* %P) {
-       %tmp = load uint* %P
-        %tmp14 = or uint %tmp, 3305111552
-        %tmp15 = and uint %tmp14, 3321888767
-        store uint %tmp15, uint* %P
+define void @test(i32* %P) {
+       %tmp = load i32* %P
+        %tmp14 = or i32 %tmp, 3305111552
+        %tmp15 = and i32 %tmp14, 3321888767
+        store i32 %tmp15, i32* %P
          ret void
  }
  
@@ -198,13 +197,6 @@ _bar:   addic r3,r3,-1
  
  //===---------------------------------------------------------------------===//
  
-Legalize should lower ctlz like this:
-  ctlz(x) = popcnt((x-1) & ~x)
-
-on targets that have popcnt but not ctlz.  itanium, what else?
-
-//===---------------------------------------------------------------------===//
-
  quantum_sigma_x in 462.libquantum contains the following loop:
  
        for(i=0; i<reg->size; i++)
@@ -336,11 +328,6 @@ when it is declared U32.
  
  //===---------------------------------------------------------------------===//
  
-Promote for i32 bswap can use i64 bswap + shr.  Useful on targets with 64-bit
-regs and bswap, like itanium.
-
-//===---------------------------------------------------------------------===//
-
  LSR should know what GPR types a target has.  This code:
  
  volatile short X, Y; // globals
@@ -750,16 +737,6 @@ be done safely if "b" isn't modified between the strlen and memcpy of course.
  
  //===---------------------------------------------------------------------===//
  
-We should be able to evaluate this loop:
-
-int test(int x_offs) {
-  while (x_offs > 4)
-     x_offs -= 4;
-  return x_offs;
-}
-
-//===---------------------------------------------------------------------===//
-
  Reassociate should turn things like:
  
  int factorial(int X) {
@@ -1134,16 +1111,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
  
  //===---------------------------------------------------------------------===//
  
-We would like to do the following transform in the instcombiner:
-
-  -X/C -> X/-C
-
-However, this isn't valid if (-X) overflows. We can implement this when we
-have the concept of a "C signed subtraction" operator that which is undefined
-on overflow.
-
-//===---------------------------------------------------------------------===//
-
  This was noticed in the entryblock for grokdeclarator in 403.gcc:
  
          %tmp = icmp eq i32 %decl_context, 4          
@@ -1687,4 +1654,26 @@ entry:
  Instcombine should be able to optimize away the loads (and thus the globals).
  
  
+//===---------------------------------------------------------------------===//
+
+I saw this constant expression in real code after llvm-g++ -O2:
+
+declare extern_weak i32 @0(i64)
+
+define void @foo() {
+  br i1 icmp eq (i32 zext (i1 icmp ne (i32 (i64)* @0, i32 (i64)* null) to i32),
+i32 0), label %cond_true, label %cond_false
+cond_true:
+  ret void
+cond_false:
+  ret void
+}
+
+That branch expression should be reduced to:
+
+  i1 icmp eq (i32 (i64)* @0, i32 (i64)* null)
+
+It's probably not a perf issue, I just happened to see it while examining
+something else and didn't want to forget about it.
+
  //===---------------------------------------------------------------------===//