test/CodeGen/X86/fold-and-shift.ll

   1 ; RUN: llc < %s -march=x86 | FileCheck %s
   2
   3 define i32 @t1(i8* %X, i32 %i) {
   4 ; CHECK-LABEL: t1:
   5 ; CHECK-NOT: and
   6 ; CHECK: movzbl
   7 ; CHECK: movl (%{{...}},%{{...}},4),
   8 ; CHECK: ret
   9
  10 entry:
  11   %tmp2 = shl i32 %i, 2
  12   %tmp4 = and i32 %tmp2, 1020
  13   %tmp7 = getelementptr i8, i8* %X, i32 %tmp4
  14   %tmp78 = bitcast i8* %tmp7 to i32*
  15   %tmp9 = load i32, i32* %tmp78
  16   ret i32 %tmp9
  17 }
  18
  19 define i32 @t2(i16* %X, i32 %i) {
  20 ; CHECK-LABEL: t2:
  21 ; CHECK-NOT: and
  22 ; CHECK: movzwl
  23 ; CHECK: movl (%{{...}},%{{...}},4),
  24 ; CHECK: ret
  25
  26 entry:
  27   %tmp2 = shl i32 %i, 1
  28   %tmp4 = and i32 %tmp2, 131070
  29   %tmp7 = getelementptr i16, i16* %X, i32 %tmp4
  30   %tmp78 = bitcast i16* %tmp7 to i32*
  31   %tmp9 = load i32, i32* %tmp78
  32   ret i32 %tmp9
  33 }
  34
  35 define i32 @t3(i16* %i.ptr, i32* %arr) {
  36 ; This case is tricky. The lshr followed by a gep will produce a lshr followed
  37 ; by an and to remove the low bits. This can be simplified by doing the lshr by
  38 ; a greater constant and using the addressing mode to scale the result back up.
  39 ; To make matters worse, because of the two-phase zext of %i and their reuse in
  40 ; the function, the DAG can get confusing trying to re-use both of them and
  41 ; prevent easy analysis of the mask in order to match this.
  42 ; CHECK-LABEL: t3:
  43 ; CHECK-NOT: and
  44 ; CHECK: shrl
  45 ; CHECK: addl (%{{...}},%{{...}},4),
  46 ; CHECK: ret
  47
  48 entry:
  49   %i = load i16, i16* %i.ptr
  50   %i.zext = zext i16 %i to i32
  51   %index = lshr i32 %i.zext, 11
  52   %val.ptr = getelementptr inbounds i32, i32* %arr, i32 %index
  53   %val = load i32, i32* %val.ptr
  54   %sum = add i32 %val, %i.zext
  55   ret i32 %sum
  56 }
  57
  58 define i32 @t4(i16* %i.ptr, i32* %arr) {
  59 ; A version of @t3 that has more zero extends and more re-use of intermediate
  60 ; values. This exercise slightly different bits of canonicalization.
  61 ; CHECK-LABEL: t4:
  62 ; CHECK-NOT: and
  63 ; CHECK: shrl
  64 ; CHECK: addl (%{{...}},%{{...}},4),
  65 ; CHECK: ret
  66
  67 entry:
  68   %i = load i16, i16* %i.ptr
  69   %i.zext = zext i16 %i to i32
  70   %index = lshr i32 %i.zext, 11
  71   %index.zext = zext i32 %index to i64
  72   %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
  73   %val = load i32, i32* %val.ptr
  74   %sum.1 = add i32 %val, %i.zext
  75   %sum.2 = add i32 %sum.1, %index
  76   ret i32 %sum.2
  77 }