test/CodeGen/X86/splat-for-size.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
   4
   5 ; Check constant loads of every 128-bit and 256-bit vector type
   6 ; for size optimization using splat ops available with AVX and AVX2.
   7
   8 ; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
   9 define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
  10 ; CHECK-LABEL: splat_v2f64:
  11 ; CHECK:       # BB#0:
  12 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
  13 ; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
  14 ; CHECK-NEXT:    retq
  15   %add = fadd <2 x double> %x, <double 1.0, double 1.0>
  16   ret <2 x double> %add
  17 }
  18
  19 define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
  20 ; CHECK-LABEL: splat_v4f64:
  21 ; CHECK:       # BB#0:
  22 ; CHECK-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
  23 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
  24 ; CHECK-NEXT:    retq
  25   %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
  26   ret <4 x double> %add
  27 }
  28
  29 define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
  30 ; CHECK-LABEL: splat_v4f32:
  31 ; CHECK:       # BB#0:
  32 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
  33 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
  34 ; CHECK-NEXT:    retq
  35   %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
  36   ret <4 x float> %add
  37 }
  38
  39 define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
  40 ; CHECK-LABEL: splat_v8f32:
  41 ; CHECK:       # BB#0:
  42 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
  43 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
  44 ; CHECK-NEXT:    retq
  45   %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
  46   ret <8 x float> %add
  47 }
  48
  49 ; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
  50 ; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
  51 define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
  52 ; CHECK-LABEL: splat_v2i64:
  53 ; CHECK:       # BB#0:
  54 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
  55 ; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
  56 ; CHECK-NEXT:    retq
  57   %add = add <2 x i64> %x, <i64 1, i64 1>
  58   ret <2 x i64> %add
  59 }
  60
  61 ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
  62 ; and then we fake it: use vmovddup to splat 64-bit value.
  63 define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
  64 ; AVX-LABEL: splat_v4i64:
  65 ; AVX:       # BB#0:
  66 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
  67 ; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
  68 ; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
  69 ; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
  70 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
  71 ; AVX-NEXT:    retq
  72 ;
  73 ; AVX2-LABEL: splat_v4i64:
  74 ; AVX2:       # BB#0:
  75 ; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
  76 ; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
  77 ; AVX2-NEXT:    retq
  78   %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
  79   ret <4 x i64> %add
  80 }
  81
  82 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
  83 define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
  84 ; AVX-LABEL: splat_v4i32:
  85 ; AVX:       # BB#0:
  86 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
  87 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
  88 ; AVX-NEXT:    retq
  89 ;
  90 ; AVX2-LABEL: splat_v4i32:
  91 ; AVX2:       # BB#0:
  92 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
  93 ; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
  94 ; AVX2-NEXT:    retq
  95   %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
  96   ret <4 x i32> %add
  97 }
  98
  99 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
 100 define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
 101 ; AVX-LABEL: splat_v8i32:
 102 ; AVX:       # BB#0:
 103 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 104 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
 105 ; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
 106 ; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
 107 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 108 ; AVX-NEXT:    retq
 109 ;
 110 ; AVX2-LABEL: splat_v8i32:
 111 ; AVX2:       # BB#0:
 112 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
 113 ; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 114 ; AVX2-NEXT:    retq
 115   %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 116   ret <8 x i32> %add
 117 }
 118
 119 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
 120 define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
 121 ; AVX-LABEL: splat_v8i16:
 122 ; AVX:       # BB#0:
 123 ; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
 124 ; AVX-NEXT:    retq
 125 ;
 126 ; AVX2-LABEL: splat_v8i16:
 127 ; AVX2:       # BB#0:
 128 ; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm1
 129 ; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
 130 ; AVX2-NEXT:    retq
 131   %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 132   ret <8 x i16> %add
 133 }
 134
 135 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
 136 define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
 137 ; AVX-LABEL: splat_v16i16:
 138 ; AVX:       # BB#0:
 139 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 140 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
 141 ; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
 142 ; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
 143 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 144 ; AVX-NEXT:    retq
 145 ;
 146 ; AVX2-LABEL: splat_v16i16:
 147 ; AVX2:       # BB#0:
 148 ; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm1
 149 ; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
 150 ; AVX2-NEXT:    retq
 151   %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 152   ret <16 x i16> %add
 153 }
 154
 155 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
 156 define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
 157 ; AVX-LABEL: splat_v16i8:
 158 ; AVX:       # BB#0:
 159 ; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
 160 ; AVX-NEXT:    retq
 161 ;
 162 ; AVX2-LABEL: splat_v16i8:
 163 ; AVX2:       # BB#0:
 164 ; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %xmm1
 165 ; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
 166 ; AVX2-NEXT:    retq
 167   %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 168   ret <16 x i8> %add
 169 }
 170
 171 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
 172 define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
 173 ; AVX-LABEL: splat_v32i8:
 174 ; AVX:       # BB#0:
 175 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 176 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
 177 ; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
 178 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 179 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 180 ; AVX-NEXT:    retq
 181 ;
 182 ; AVX2-LABEL: splat_v32i8:
 183 ; AVX2:       # BB#0:
 184 ; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %ymm1
 185 ; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
 186 ; AVX2-NEXT:    retq
 187   %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 188   ret <32 x i8> %add
 189 }
 190
 191 ; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
 192 ; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
 193 ; loadi64 with multiple uses.
 194
 195 @A = common global <3 x i64> zeroinitializer, align 32
 196
 197 define <8 x i64> @pr23259() #1 {
 198 entry:
 199   %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
 200   %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
 201   %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 202   ret <8 x i64> %shuffle
 203 }
 204
 205 attributes #0 = { optsize }
 206 attributes #1 = { minsize }