lib/Target/README.txt

   1 Target Independent Opportunities:
   2
   3 ===-------------------------------------------------------------------------===
   4
   5 FreeBench/mason contains code like this:
   6
   7 static p_type m0u(p_type p) {
   8   int m[]={0, 8, 1, 2, 16, 5, 13, 7, 14, 9, 3, 4, 11, 12, 15, 10, 17, 6};
   9   p_type pu;
  10   pu.a = m[p.a];
  11   pu.b = m[p.b];
  12   pu.c = m[p.c];
  13   return pu;
  14 }
  15
  16 We currently compile this into a memcpy from a static array into 'm', then
  17 a bunch of loads from m.  It would be better to avoid the memcpy and just do
  18 loads from the static array.
  19
  20 //===---------------------------------------------------------------------===//
  21
  22 Make the PPC branch selector target independant
  23
  24 //===---------------------------------------------------------------------===//
  25
  26 Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
  27 precision don't matter (ffastmath).  Misc/mandel will like this. :)
  28
  29 //===---------------------------------------------------------------------===//
  30
  31 Solve this DAG isel folding deficiency:
  32
  33 int X, Y;
  34
  35 void fn1(void)
  36 {
  37   X = X | (Y << 3);
  38 }
  39
  40 compiles to
  41
  42 fn1:
  43         movl Y, %eax
  44         shll $3, %eax
  45         orl X, %eax
  46         movl %eax, X
  47         ret
  48
  49 The problem is the store's chain operand is not the load X but rather
  50 a TokenFactor of the load X and load Y, which prevents the folding.
  51
  52 There are two ways to fix this:
  53
  54 1. The dag combiner can start using alias analysis to realize that y/x
  55    don't alias, making the store to X not dependent on the load from Y.
  56 2. The generated isel could be made smarter in the case it can't
  57    disambiguate the pointers.
  58
  59 Number 1 is the preferred solution.
  60
  61 This has been "fixed" by a TableGen hack. But that is a short term workaround
  62 which will be removed once the proper fix is made.
  63
  64 //===---------------------------------------------------------------------===//
  65
  66 Turn this into a signed shift right in instcombine:
  67
  68 int f(unsigned x) {
  69   return x >> 31 ? -1 : 0;
  70 }
  71
  72 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25600
  73 http://gcc.gnu.org/ml/gcc-patches/2006-02/msg01492.html
  74
  75 //===---------------------------------------------------------------------===//
  76
  77 On targets with expensive 64-bit multiply, we could LSR this:
  78
  79 for (i = ...; ++i) {
  80    x = 1ULL << i;
  81
  82 into:
  83  long long tmp = 1;
  84  for (i = ...; ++i, tmp+=tmp)
  85    x = tmp;
  86
  87 This would be a win on ppc32, but not x86 or ppc64.
  88
  89 //===---------------------------------------------------------------------===//
  90
  91 Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
  92
  93 //===---------------------------------------------------------------------===//
  94
  95 Reassociate should turn: X*X*X*X -> t=(X*X) (t*t) to eliminate a multiply.
  96
  97 //===---------------------------------------------------------------------===//
  98
  99 Interesting? testcase for add/shift/mul reassoc:
 100
 101 int bar(int x, int y) {
 102   return x*x*x+y+x*x*x*x*x*y*y*y*y;
 103 }
 104 int foo(int z, int n) {
 105   return bar(z, n) + bar(2*z, 2*n);
 106 }
 107
 108 //===---------------------------------------------------------------------===//
 109
 110 These two functions should generate the same code on big-endian systems:
 111
 112 int g(int *j,int *l)  {  return memcmp(j,l,4);  }
 113 int h(int *j, int *l) {  return *j - *l; }
 114
 115 this could be done in SelectionDAGISel.cpp, along with other special cases,
 116 for 1,2,4,8 bytes.
 117
 118 //===---------------------------------------------------------------------===//
 119
 120 This code:
 121 int rot(unsigned char b) { int a = ((b>>1) ^ (b<<7)) & 0xff; return a; }
 122
 123 Can be improved in two ways:
 124
 125 1. The instcombiner should eliminate the type conversions.
 126 2. The X86 backend should turn this into a rotate by one bit.
 127
 128 //===---------------------------------------------------------------------===//
 129
 130 Add LSR exit value substitution. It'll probably be a win for Ackermann, etc.
 131
 132 //===---------------------------------------------------------------------===//
 133
 134 It would be nice to revert this patch:
 135 http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
 136
 137 And teach the dag combiner enough to simplify the code expanded before
 138 legalize.  It seems plausible that this knowledge would let it simplify other
 139 stuff too.
 140
 141 //===---------------------------------------------------------------------===//
 142
 143 For packed types, TargetData.cpp::getTypeInfo() returns alignment that is equal
 144 to the type size. It works but can be overly conservative as the alignment of
 145 specific packed types are target dependent.
 146
 147 //===---------------------------------------------------------------------===//
 148
 149 We should add 'unaligned load/store' nodes, and produce them from code like
 150 this:
 151
 152 v4sf example(float *P) {
 153   return (v4sf){P[0], P[1], P[2], P[3] };
 154 }
 155
 156 //===---------------------------------------------------------------------===//
 157
 158 We should constant fold packed type casts at the LLVM level, regardless of the
 159 cast.  Currently we cannot fold some casts because we don't have TargetData
 160 information in the constant folder, so we don't know the endianness of the
 161 target!
 162
 163 //===---------------------------------------------------------------------===//
 164
 165 Consider this:
 166
 167 unsigned short swap_16(unsigned short v) { return (v>>8) | (v<<8); }
 168
 169 Compiled with the ppc backend:
 170
 171 _swap_16:
 172         slwi r2, r3, 8
 173         srwi r3, r3, 8
 174         or r2, r3, r2
 175         rlwinm r3, r2, 0, 16, 31
 176         blr
 177
 178 The rlwinm (an and by 65535) is dead.  The dag combiner should propagate bits
 179 better than that to see this.
 180
 181 //===---------------------------------------------------------------------===//
 182
 183 Add support for conditional increments, and other related patterns.  Instead
 184 of:
 185
 186         movl 136(%esp), %eax
 187         cmpl $0, %eax
 188         je LBB16_2      #cond_next
 189 LBB16_1:        #cond_true
 190         incl _foo
 191 LBB16_2:        #cond_next
 192
 193 emit:
 194         movl    _foo, %eax
 195         cmpl    $1, %edi
 196         sbbl    $-1, %eax
 197         movl    %eax, _foo
 198
 199 //===---------------------------------------------------------------------===//
 200
 201 Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
 202
 203 Expand these to calls of sin/cos and stores:
 204       double sincos(double x, double *sin, double *cos);
 205       float sincosf(float x, float *sin, float *cos);
 206       long double sincosl(long double x, long double *sin, long double *cos);
 207
 208 Doing so could allow SROA of the destination pointers.  See also:
 209 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687
 210
 211 //===---------------------------------------------------------------------===//
 212
 213 Scalar Repl cannot currently promote this testcase to 'ret long cst':
 214
 215         %struct.X = type { int, int }
 216         %struct.Y = type { %struct.X }
 217 ulong %bar() {
 218         %retval = alloca %struct.Y, align 8             ; <%struct.Y*> [#uses=3]
 219         %tmp12 = getelementptr %struct.Y* %retval, int 0, uint 0, uint 0                ; <int*> [#uses=1]
 220         store int 0, int* %tmp12
 221         %tmp15 = getelementptr %struct.Y* %retval, int 0, uint 0, uint 1                ; <int*> [#uses=1]
 222         store int 1, int* %tmp15
 223         %retval = cast %struct.Y* %retval to ulong*             ; <ulong*> [#uses=1]
 224         %retval = load ulong* %retval           ; <ulong> [#uses=1]
 225         ret ulong %retval
 226 }
 227
 228 it should be extended to do so.
 229
 230 //===---------------------------------------------------------------------===//
 231
 232 Turn this into a single byte store with no load (the other 3 bytes are
 233 unmodified):
 234
 235 void %test(uint* %P) {
 236         %tmp = load uint* %P
 237         %tmp14 = or uint %tmp, 3305111552
 238         %tmp15 = and uint %tmp14, 3321888767
 239         store uint %tmp15, uint* %P
 240         ret void
 241 }
 242
 243 //===---------------------------------------------------------------------===//
 244
 245 dag/inst combine "clz(x)>>5 -> x==0" for 32-bit x.
 246
 247 Compile:
 248
 249 int bar(int x)
 250 {
 251   int t = __builtin_clz(x);
 252   return -(t>>5);
 253 }
 254
 255 to:
 256
 257 _bar:   addic r3,r3,-1
 258         subfe r3,r3,r3
 259         blr
 260
 261