update a bunch of entries.

author Chris Lattner <sabre@nondot.org>

Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)

committer Chris Lattner <sabre@nondot.org>

Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)
author Chris Lattner <sabre@nondot.org>
Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)
committer Chris Lattner <sabre@nondot.org>
Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index f047d087cbf8823787fe8bc78484376347e8c4ce..fa5e6010cfc53615ee80af09a125a5f551bf73e8 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,38 +2,6 @@ Target Independent Opportunities:
  
  //===---------------------------------------------------------------------===//
  
-We should recognize idioms for add-with-carry and turn it into the appropriate
-intrinsics.  This example:
-
-unsigned add32carry(unsigned sum, unsigned x) {
- unsigned z = sum + x;
- if (sum + x < x)
-     z++;
- return z;
-}
-
-Compiles to: clang t.c -S -o - -O3 -fomit-frame-pointer -m64 -mkernel
-
-_add32carry:                            ## @add32carry
-       addl    %esi, %edi
-       cmpl    %esi, %edi
-       sbbl    %eax, %eax
-       andl    $1, %eax
-       addl    %edi, %eax
-       ret
-
-with clang, but to:
-
-_add32carry:
-       leal    (%rsi,%rdi), %eax
-       cmpl    %esi, %eax
-       adcl    $0, %eax
-       ret
-
-with gcc.
-
-//===---------------------------------------------------------------------===//
-
  Dead argument elimination should be enhanced to handle cases when an argument is
  dead to an externally visible function.  Though the argument can't be removed
  from the externally visible function, the caller doesn't need to pass it in.
@@ -82,6 +50,9 @@ unsigned int mul(unsigned int a,unsigned int b) {
    return a*b;
  }
  
+The legalization code for mul-with-overflow needs to be made more robust before
+this can be implemented though.
+
  //===---------------------------------------------------------------------===//
  
  Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
@@ -92,41 +63,6 @@ right).
  
  //===---------------------------------------------------------------------===//
  
-Solve this DAG isel folding deficiency:
-
-int X, Y;
-
-void fn1(void)
-{
-  X = X | (Y << 3);
-}
-
-compiles to
-
-fn1:
-       movl Y, %eax
-       shll $3, %eax
-       orl X, %eax
-       movl %eax, X
-       ret
-
-The problem is the store's chain operand is not the load X but rather
-a TokenFactor of the load X and load Y, which prevents the folding.
-
-There are two ways to fix this:
-
-1. The dag combiner can start using alias analysis to realize that y/x
-   don't alias, making the store to X not dependent on the load from Y.
-2. The generated isel could be made smarter in the case it can't
-   disambiguate the pointers.
-
-Number 1 is the preferred solution.
-
-This has been "fixed" by a TableGen hack. But that is a short term workaround
-which will be removed once the proper fix is made.
-
-//===---------------------------------------------------------------------===//
-
  On targets with expensive 64-bit multiply, we could LSR this:
  
  for (i = ...; ++i) {
@@ -339,14 +275,6 @@ unsigned long reverse(unsigned v) {
      return v ^ (t >> 8);
  }
  
-Neither is this (very standard idiom):
-
-int f(int n)
-{
-  return (((n) << 24) | (((n) & 0xff00) << 8) 
-       | (((n) >> 8) & 0xff00) | ((n) >> 24));
-}
-
  //===---------------------------------------------------------------------===//
  
  [LOOP RECOGNITION]
@@ -382,9 +310,7 @@ unsigned int popcount(unsigned int input) {
    return count;
  }
  
-This is a form of idiom recognition for loops, the same thing that could be
-useful for recognizing memset/memcpy.  This sort of thing should be added to the
-loop idiom pass.
+This sort of thing should be added to the loop idiom pass.
  
  //===---------------------------------------------------------------------===//
  
@@ -639,46 +565,21 @@ struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
  extern THotKey m_HotKey;
  THotKey GetHotKey () { return m_HotKey; }
  
-into (-O3 -fno-exceptions -static -fomit-frame-pointer):
-
-__Z9GetHotKeyv:
-       pushl   %esi
-       movl    8(%esp), %eax
-       movb    _m_HotKey+3, %cl
-       movb    _m_HotKey+4, %dl
-       movb    _m_HotKey+2, %ch
-       movw    _m_HotKey, %si
-       movw    %si, (%eax)
-       movb    %ch, 2(%eax)
-       movb    %cl, 3(%eax)
-       movb    %dl, 4(%eax)
-       popl    %esi
-       ret     $4
-
-GCC produces:
-
-__Z9GetHotKeyv:
-       movl    _m_HotKey, %edx
-       movl    4(%esp), %eax
-       movl    %edx, (%eax)
-       movzwl  _m_HotKey+4, %edx
-       movw    %dx, 4(%eax)
-       ret     $4
-
-The LLVM IR contains the needed alignment info, so we should be able to 
-merge the loads and stores into 4-byte loads:
-
-       %struct.THotKey = type { i16, i8, i8, i8 }
-define void @_Z9GetHotKeyv(%struct.THotKey* sret  %agg.result) nounwind  {
-...
-       %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8
-       %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2
-       %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1
-       %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2
-
-Alternatively, we should use a small amount of base-offset alias analysis
-to make it so the scheduler doesn't need to hold all the loads in regs at
-once.
+into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer):
+
+__Z9GetHotKeyv:                         ## @_Z9GetHotKeyv
+       movq    _m_HotKey@GOTPCREL(%rip), %rax
+       movzwl  (%rax), %ecx
+       movzbl  2(%rax), %edx
+       shlq    $16, %rdx
+       orq     %rcx, %rdx
+       movzbl  3(%rax), %ecx
+       shlq    $24, %rcx
+       orq     %rdx, %rcx
+       movzbl  4(%rax), %eax
+       shlq    $32, %rax
+       orq     %rcx, %rax
+       ret
  
  //===---------------------------------------------------------------------===//
  
@@ -764,20 +665,6 @@ etc.  On X86, we miss a bunch of 'rotate by variable' cases because the rotate
  matching code in dag combine doesn't look through truncates aggressively 
  enough.  Here are some testcases reduces from GCC PR17886:
  
-unsigned long long f(unsigned long long x, int y) {
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f2(unsigned x, int y){
-  return (x << y) | (x >> 32-y); 
-} 
-unsigned long long f3(unsigned long long x){
-  int y = 9;
-  return (x << y) | (x >> 64-y); 
-} 
-unsigned f4(unsigned x){
-  int y = 10;
-  return (x << y) | (x >> 32-y); 
-}
  unsigned long long f5(unsigned long long x, unsigned long long y) {
    return (x << 8) | ((y >> 48) & 0xffull);
  }
@@ -796,11 +683,6 @@ unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
    }
  }
  
-On X86-64, we only handle f2/f3/f4 right.  On x86-32, a few of these 
-generate truly horrible code, instead of using shld and friends.  On
-ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is
-badness.  PPC64 misses f, f5 and f6.  CellSPU aborts in isel.
-
  //===---------------------------------------------------------------------===//
  
  This (and similar related idioms):
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index 4005431b75a3c450293199e36f1864bf2b8dac27..551d9f08526be96a42526114d125401b6275229c 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1507,6 +1507,8 @@ loop, the value comes into the loop as two values, and
  RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
  constructed BUILD_PAIR which represents the cast value.
  
+This can be handled by making CodeGenPrepare sink the cast.
+
  //===---------------------------------------------------------------------===//
  
  Test instructions can be eliminated by using EFLAGS values from arithmetic
@@ -1847,3 +1849,38 @@ _foo:
  0 is the only unsigned number < 1.
  
  //===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+  %cmp = extractvalue %0 %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %add = add i32 %x, %sum
+  %z.0 = add i32 %add, %inc
+  ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry:                            ## @add32carry
+       addl    %esi, %edi
+       sbbl    %ecx, %ecx
+       movl    %edi, %eax
+       subl    %ecx, %eax
+       ret
+
+But it could be:
+
+_add32carry:
+       leal    (%rsi,%rdi), %eax
+       cmpl    %esi, %eax
+       adcl    $0, %eax
+       ret
+
+//===---------------------------------------------------------------------===//
author	Chris Lattner <sabre@nondot.org>
	Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sun, 2 Jan 2011 18:31:38 +0000 (18:31 +0000)
lib/Target/README.txt		patch \| blob \| history
lib/Target/X86/README.txt		patch \| blob \| history