test/CodeGen/ARM/vdiv_combine.ll

   1 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
   2
   3 @in = global float 0x400921FA00000000, align 4
   4 @iin = global i32 -1023, align 4
   5 @uin = global i32 1023, align 4
   6
   7 declare void @foo_int32x4_t(<4 x i32>)
   8
   9 ; Test signed conversion.
  10 ; CHECK: t1
  11 ; CHECK-NOT: {{vdiv|vmul}}
  12 define void @t1() nounwind {
  13 entry:
  14   %tmp = load i32, i32* @iin, align 4
  15   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
  16   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
  17   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
  18   %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
  19   tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
  20   ret void
  21 }
  22
  23 declare void @foo_float32x2_t(<2 x float>)
  24
  25 ; Test unsigned conversion.
  26 ; CHECK: t2
  27 ; CHECK-NOT: {{vdiv|vmul}}
  28 define void @t2() nounwind {
  29 entry:
  30   %tmp = load i32, i32* @uin, align 4
  31   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
  32   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
  33   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
  34   %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
  35   tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
  36   ret void
  37 }
  38
  39 ; Test which should not fold due to non-power of 2.
  40 ; CHECK: t3
  41 ; CHECK: {{vdiv|vmul}}
  42 define void @t3() nounwind {
  43 entry:
  44   %tmp = load i32, i32* @iin, align 4
  45   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
  46   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
  47   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
  48   %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000>
  49   tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
  50   ret void
  51 }
  52
  53 ; Test which should not fold due to power of 2 out of range.
  54 ; CHECK: t4
  55 ; CHECK: {{vdiv|vmul}}
  56 define void @t4() nounwind {
  57 entry:
  58   %tmp = load i32, i32* @iin, align 4
  59   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
  60   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
  61   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
  62   %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
  63   tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
  64   ret void
  65 }
  66
  67 ; Test case where const is max power of 2 (i.e., 2^32).
  68 ; CHECK: t5
  69 ; CHECK-NOT: {{vdiv|vmul}}
  70 define void @t5() nounwind {
  71 entry:
  72   %tmp = load i32, i32* @iin, align 4
  73   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
  74   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
  75   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
  76   %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
  77   tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
  78   ret void
  79 }
  80
  81 ; Test quadword.
  82 ; CHECK: t6
  83 ; CHECK-NOT: {{vdiv|vmul}}
  84 define void @t6() nounwind {
  85 entry:
  86   %tmp = load i32, i32* @iin, align 4
  87   %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
  88   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
  89   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
  90   %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %tmp, i32 3
  91   %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
  92   %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
  93   tail call void @foo_float32x4_t(<4 x float> %div.i) nounwind
  94   ret void
  95 }
  96
  97 declare void @foo_float32x4_t(<4 x float>)
  98
  99 define <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
 100 ; CHECK-LABEL: fix_unsigned_i16_to_float:
 101 ; CHECK: vmovl.u16 [[TMP:q[0-9]+]], {{d[0-9]+}}
 102 ; CHECK: vcvt.f32.u32 {{q[0-9]+}}, [[TMP]], #1
 103
 104     %conv = uitofp <4 x i16> %in to <4 x float>
 105     %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
 106     ret <4 x float> %shift
 107 }
 108
 109 define <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
 110 ; CHECK-LABEL: fix_signed_i16_to_float:
 111 ; CHECK: vmovl.s16 [[TMP:q[0-9]+]], {{d[0-9]+}}
 112 ; CHECK: vcvt.f32.s32 {{q[0-9]+}}, [[TMP]], #1
 113
 114     %conv = sitofp <4 x i16> %in to <4 x float>
 115     %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
 116     ret <4 x float> %shift
 117 }
 118
 119 define <2 x float> @fix_i64_to_float(<2 x i64> %in) {
 120 ; CHECK-LABEL: fix_i64_to_float:
 121 ; CHECK: bl
 122 ; CHECK: bl
 123
 124     %conv = uitofp <2 x i64> %in to <2 x float>
 125     %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
 126     ret <2 x float> %shift
 127 }
 128
 129 define <2 x double> @fix_i64_to_double(<2 x i64> %in) {
 130 ; CHECK-LABEL: fix_i64_to_double:
 131 ; CHECK: bl
 132 ; CHECK: bl
 133
 134     %conv = uitofp <2 x i64> %in to <2 x double>
 135     %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
 136     ret <2 x double> %shift
 137 }
 138
 139 ; Don't combine with 8 lanes.  Just make sure things don't crash.
 140 ; CHECK-LABEL: test7
 141 define <8 x float> @test7(<8 x i32> %in) nounwind {
 142 entry:
 143   %vcvt.i = sitofp <8 x i32> %in to <8 x float>
 144   %div.i = fdiv <8 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0>
 145   ret <8 x float> %div.i
 146 }
 147
 148 ; Can combine splat with an undef.
 149 ; CHECK-LABEL: test8
 150 ; CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #1
 151 define <4 x float> @test8(<4 x i32> %in) {
 152   %vcvt.i = sitofp <4 x i32> %in to <4 x float>
 153   %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
 154   ret <4 x float> %div.i
 155 }