test/CodeGen/ARM/vdup.ll

   1 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon -verify-machineinstrs %s -o - \
   2 ; RUN:  | FileCheck %s
   3
   4 define <8 x i8> @v_dup8(i8 %A) nounwind {
   5 ;CHECK-LABEL: v_dup8:
   6 ;CHECK: vdup.8
   7         %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
   8         %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
   9         %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
  10         %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
  11         %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
  12         %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
  13         %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
  14         %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
  15         ret <8 x i8> %tmp8
  16 }
  17
  18 define <4 x i16> @v_dup16(i16 %A) nounwind {
  19 ;CHECK-LABEL: v_dup16:
  20 ;CHECK: vdup.16
  21         %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
  22         %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
  23         %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
  24         %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
  25         ret <4 x i16> %tmp4
  26 }
  27
  28 define <2 x i32> @v_dup32(i32 %A) nounwind {
  29 ;CHECK-LABEL: v_dup32:
  30 ;CHECK: vdup.32
  31         %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
  32         %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
  33         ret <2 x i32> %tmp2
  34 }
  35
  36 define <2 x float> @v_dupfloat(float %A) nounwind {
  37 ;CHECK-LABEL: v_dupfloat:
  38 ;CHECK: vdup.32
  39         %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
  40         %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
  41         ret <2 x float> %tmp2
  42 }
  43
  44 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
  45 ;CHECK-LABEL: v_dupQ8:
  46 ;CHECK: vdup.8
  47         %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
  48         %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
  49         %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
  50         %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
  51         %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
  52         %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
  53         %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
  54         %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
  55         %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
  56         %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
  57         %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
  58         %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
  59         %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
  60         %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
  61         %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
  62         %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
  63         ret <16 x i8> %tmp16
  64 }
  65
  66 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
  67 ;CHECK-LABEL: v_dupQ16:
  68 ;CHECK: vdup.16
  69         %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
  70         %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
  71         %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
  72         %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
  73         %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
  74         %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
  75         %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
  76         %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
  77         ret <8 x i16> %tmp8
  78 }
  79
  80 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
  81 ;CHECK-LABEL: v_dupQ32:
  82 ;CHECK: vdup.32
  83         %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
  84         %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
  85         %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
  86         %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
  87         ret <4 x i32> %tmp4
  88 }
  89
  90 define <4 x float> @v_dupQfloat(float %A) nounwind {
  91 ;CHECK-LABEL: v_dupQfloat:
  92 ;CHECK: vdup.32
  93         %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
  94         %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
  95         %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
  96         %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
  97         ret <4 x float> %tmp4
  98 }
  99
 100 ; Check to make sure it works with shuffles, too.
 101
 102 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
 103 ;CHECK-LABEL: v_shuffledup8:
 104 ;CHECK: vdup.8
 105         %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
 106         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 107         ret <8 x i8> %tmp2
 108 }
 109
 110 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
 111 ;CHECK-LABEL: v_shuffledup16:
 112 ;CHECK: vdup.16
 113         %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
 114         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 115         ret <4 x i16> %tmp2
 116 }
 117
 118 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
 119 ;CHECK-LABEL: v_shuffledup32:
 120 ;CHECK: vdup.32
 121         %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
 122         %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 123         ret <2 x i32> %tmp2
 124 }
 125
 126 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
 127 ;CHECK-LABEL: v_shuffledupfloat:
 128 ;CHECK: vdup.32
 129         %tmp1 = insertelement <2 x float> undef, float %A, i32 0
 130         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
 131         ret <2 x float> %tmp2
 132 }
 133
 134 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
 135 ;CHECK-LABEL: v_shuffledupQ8:
 136 ;CHECK: vdup.8
 137         %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
 138         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
 139         ret <16 x i8> %tmp2
 140 }
 141
 142 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
 143 ;CHECK-LABEL: v_shuffledupQ16:
 144 ;CHECK: vdup.16
 145         %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
 146         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
 147         ret <8 x i16> %tmp2
 148 }
 149
 150 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
 151 ;CHECK-LABEL: v_shuffledupQ32:
 152 ;CHECK: vdup.32
 153         %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
 154         %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
 155         ret <4 x i32> %tmp2
 156 }
 157
 158 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 159 ;CHECK-LABEL: v_shuffledupQfloat:
 160 ;CHECK: vdup.32
 161         %tmp1 = insertelement <4 x float> undef, float %A, i32 0
 162         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
 163         ret <4 x float> %tmp2
 164 }
 165
 166 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 167 ;CHECK-LABEL: vduplane8:
 168 ;CHECK: vdup.8
 169         %tmp1 = load <8 x i8>, <8 x i8>* %A
 170         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 171         ret <8 x i8> %tmp2
 172 }
 173
 174 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 175 ;CHECK-LABEL: vduplane16:
 176 ;CHECK: vdup.16
 177         %tmp1 = load <4 x i16>, <4 x i16>* %A
 178         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 179         ret <4 x i16> %tmp2
 180 }
 181
 182 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 183 ;CHECK-LABEL: vduplane32:
 184 ;CHECK: vdup.32
 185         %tmp1 = load <2 x i32>, <2 x i32>* %A
 186         %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
 187         ret <2 x i32> %tmp2
 188 }
 189
 190 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 191 ;CHECK-LABEL: vduplanefloat:
 192 ;CHECK: vdup.32
 193         %tmp1 = load <2 x float>, <2 x float>* %A
 194         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
 195         ret <2 x float> %tmp2
 196 }
 197
 198 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 199 ;CHECK-LABEL: vduplaneQ8:
 200 ;CHECK: vdup.8
 201         %tmp1 = load <8 x i8>, <8 x i8>* %A
 202         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 203         ret <16 x i8> %tmp2
 204 }
 205
 206 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 207 ;CHECK-LABEL: vduplaneQ16:
 208 ;CHECK: vdup.16
 209         %tmp1 = load <4 x i16>, <4 x i16>* %A
 210         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 211         ret <8 x i16> %tmp2
 212 }
 213
 214 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 215 ;CHECK-LABEL: vduplaneQ32:
 216 ;CHECK: vdup.32
 217         %tmp1 = load <2 x i32>, <2 x i32>* %A
 218         %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 219         ret <4 x i32> %tmp2
 220 }
 221
 222 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
 223 ;CHECK-LABEL: vduplaneQfloat:
 224 ;CHECK: vdup.32
 225         %tmp1 = load <2 x float>, <2 x float>* %A
 226         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 227         ret <4 x float> %tmp2
 228 }
 229
 230 define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
 231 entry:
 232   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
 233   ret <2 x i64> %0
 234 }
 235
 236 define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
 237 entry:
 238   %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
 239   ret <2 x i64> %0
 240 }
 241
 242 define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
 243 entry:
 244   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
 245   ret <2 x double> %0
 246 }
 247
 248 define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
 249 entry:
 250   %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
 251   ret <2 x double> %0
 252 }
 253
 254 ; Radar 7373643
 255 ;CHECK-LABEL: redundantVdup:
 256 ;CHECK: vmov.i8
 257 ;CHECK-NOT: vdup.8
 258 ;CHECK: vstr
 259 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
 260   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
 261   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
 262   store <8 x i8> %2, <8 x i8>* %ptr, align 8
 263   ret void
 264 }
 265
 266 define <4 x i32> @tdupi(i32 %x, i32 %y) {
 267 ;CHECK-LABEL: tdupi:
 268 ;CHECK: vdup.32
 269   %1 = insertelement <4 x i32> undef, i32 %x, i32 0
 270   %2 = insertelement <4 x i32> %1, i32 %x, i32 1
 271   %3 = insertelement <4 x i32> %2, i32 %x, i32 2
 272   %4 = insertelement <4 x i32> %3, i32 %y, i32 3
 273   ret <4 x i32> %4
 274 }
 275
 276 define <4 x float> @tdupf(float %x, float %y) {
 277 ;CHECK-LABEL: tdupf:
 278 ;CHECK: vdup.32
 279   %1 = insertelement <4 x float> undef, float %x, i32 0
 280   %2 = insertelement <4 x float> %1, float %x, i32 1
 281   %3 = insertelement <4 x float> %2, float %x, i32 2
 282   %4 = insertelement <4 x float> %3, float %y, i32 3
 283   ret <4 x float> %4
 284 }
 285
 286 ; This test checks that when splatting an element from a vector into another,
 287 ; the value isn't moved out to GPRs first.
 288 define <4 x i32> @tduplane(<4 x i32> %invec) {
 289 ;CHECK-LABEL: tduplane:
 290 ;CHECK-NOT: vmov {{.*}}, d16[1]
 291 ;CHECK: vdup.32 {{.*}}, d16[1]
 292   %in = extractelement <4 x i32> %invec, i32 1
 293   %1 = insertelement <4 x i32> undef, i32 %in, i32 0
 294   %2 = insertelement <4 x i32> %1, i32 %in, i32 1
 295   %3 = insertelement <4 x i32> %2, i32 %in, i32 2
 296   %4 = insertelement <4 x i32> %3, i32 255, i32 3
 297   ret <4 x i32> %4
 298 }
 299
 300 define <2 x float> @check_f32(<4 x float> %v) nounwind {
 301 ;CHECK-LABEL: check_f32:
 302 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
 303   %x = extractelement <4 x float> %v, i32 3
 304   %1 = insertelement  <2 x float> undef, float %x, i32 0
 305   %2 = insertelement  <2 x float> %1, float %x, i32 1
 306   ret <2 x float> %2
 307 }
 308
 309 define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
 310 ;CHECK-LABEL: check_i32:
 311 ;CHECK: vdup.32 {{.*}}, d{{..}}[1]
 312   %x = extractelement <4 x i32> %v, i32 3
 313   %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
 314   %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
 315   ret <2 x i32> %2
 316 }
 317
 318 define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
 319 ;CHECK-LABEL: check_i16:
 320 ;CHECK: vdup.16 {{.*}}, d{{..}}[3]
 321   %x = extractelement <8 x i16> %v, i32 3
 322   %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
 323   %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
 324   ret <4 x i16> %2
 325 }
 326
 327 define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
 328 ;CHECK-LABEL: check_i8:
 329 ;CHECK: vdup.8 {{.*}}, d{{..}}[3]
 330   %x = extractelement <16 x i8> %v, i32 3
 331   %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
 332   %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
 333   ret <8 x i8> %2
 334 }
 335
 336 ; Check that an SPR splat produces a vdup.
 337
 338 define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) {
 339 ;CHECK-LABEL: check_spr_splat2:
 340 ;CHECK: vdup.32 d
 341   %conv = sitofp i16 %q to float
 342   %splat.splatinsert = insertelement <2 x float> undef, float %conv, i32 0
 343   %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> undef, <2 x i32> zeroinitializer
 344   %sub = fsub <2 x float> %splat.splat, %p
 345   ret <2 x float> %sub
 346 }
 347
 348 define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) {
 349 ;CHECK-LABEL: check_spr_splat4:
 350 ;CHECK: vld1.16
 351   %conv = sitofp i16 %q to float
 352   %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
 353   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
 354   %sub = fsub <4 x float> %splat.splat, %p
 355   ret <4 x float> %sub
 356 }
 357 ; Same codegen as above test; scalar is splatted using vld1, so shuffle index is irrelevant.
 358 define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
 359 ;CHECK-LABEL: check_spr_splat4_lane1:
 360 ;CHECK: vld1.16
 361   %conv = sitofp i16 %q to float
 362   %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1
 363   %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 364   %sub = fsub <4 x float> %splat.splat, %p
 365   ret <4 x float> %sub
 366 }
 367
 368 ; Also make sure we don't barf on variable-index extractelts, where we almost
 369 ; could have generated a vdup.
 370
 371 define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
 372 ; CHECK-LABEL: check_i8_varidx:
 373 ; CHECK: mov r[[FP:[0-9]+]], sp
 374 ; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
 375 ; CHECK: mov r[[SPCOPY:[0-9]+]], sp
 376 ; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
 377 ; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
 378   %x = extractelement <16 x i8> %v, i32 %idx
 379   %1 = insertelement  <8 x i8> undef, i8 %x, i32 0
 380   %2 = insertelement  <8 x i8> %1, i8 %x, i32 1
 381   ret <8 x i8> %2
 382 }