test/CodeGen/X86/avx-isa-check.ll

   1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
   2
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
   4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
   5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
   6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
   7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
   8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
   9
  10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  11   ; Force the execution domain with an add.
  12   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  13   %x = and <4 x i64> %a2, %b
  14   ret <4 x i64> %x
  15 }
  16
  17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  18   ; Force the execution domain with an add.
  19   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  20   %x = and <2 x i64> %a2, %b
  21   ret <2 x i64> %x
  22 }
  23
  24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  25   ; Force the execution domain with an add.
  26   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  27   %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
  28   %x = and <4 x i64> %a, %y
  29   ret <4 x i64> %x
  30 }
  31
  32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  33   ; Force the execution domain with an add.
  34   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  35   %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
  36   %x = and <2 x i64> %a, %y
  37   ret <2 x i64> %x
  38 }
  39
  40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  41   ; Force the execution domain with an add.
  42   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  43   %x = or <4 x i64> %a2, %b
  44   ret <4 x i64> %x
  45 }
  46
  47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  48   ; Force the execution domain with an add.
  49   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  50   %x = xor <4 x i64> %a2, %b
  51   ret <4 x i64> %x
  52 }
  53
  54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  55   ; Force the execution domain with an add.
  56   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  57   %x = or <2 x i64> %a2, %b
  58   ret <2 x i64> %x
  59 }
  60
  61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  62   ; Force the execution domain with an add.
  63   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  64   %x = xor <2 x i64> %a2, %b
  65   ret <2 x i64> %x
  66 }
  67
  68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  69   %x = add <4 x i64> %i, %j
  70   ret <4 x i64> %x
  71 }
  72
  73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  74   %x = add <8 x i32> %i, %j
  75   ret <8 x i32> %x
  76 }
  77
  78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  79   %x = add <16 x i16> %i, %j
  80   ret <16 x i16> %x
  81 }
  82
  83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
  84   %x = add <32 x i8> %i, %j
  85   ret <32 x i8> %x
  86 }
  87
  88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  89   %x = sub <4 x i64> %i, %j
  90   ret <4 x i64> %x
  91 }
  92
  93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  94   %x = sub <8 x i32> %i, %j
  95   ret <8 x i32> %x
  96 }
  97
  98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  99   %x = sub <16 x i16> %i, %j
 100   ret <16 x i16> %x
 101 }
 102
 103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 104   %x = sub <32 x i8> %i, %j
 105   ret <32 x i8> %x
 106 }
 107
 108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 109   %x = mul <16 x i16> %i, %j
 110   ret <16 x i16> %x
 111 }
 112
 113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 114   %bincmp = icmp slt <8 x i32> %i, %j
 115   %x = sext <8 x i1> %bincmp to <8 x i32>
 116   ret <8 x i32> %x
 117 }
 118
 119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 120   %bincmp = icmp eq <32 x i8> %i, %j
 121   %x = sext <32 x i1> %bincmp to <32 x i8>
 122   ret <32 x i8> %x
 123 }
 124
 125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 126   %bincmp = icmp eq <16 x i16> %i, %j
 127   %x = sext <16 x i1> %bincmp to <16 x i16>
 128   ret <16 x i16> %x
 129 }
 130
 131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 132   %bincmp = icmp slt <32 x i8> %i, %j
 133   %x = sext <32 x i1> %bincmp to <32 x i8>
 134   ret <32 x i8> %x
 135 }
 136
 137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 138   %bincmp = icmp slt <16 x i16> %i, %j
 139   %x = sext <16 x i1> %bincmp to <16 x i16>
 140   ret <16 x i16> %x
 141 }
 142
 143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 144   %bincmp = icmp eq <8 x i32> %i, %j
 145   %x = sext <8 x i1> %bincmp to <8 x i32>
 146   ret <8 x i32> %x
 147 }
 148
 149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 150   %x = add <2 x i64> %i, %j
 151   ret <2 x i64> %x
 152 }
 153
 154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 155   %x = add <4 x i32> %i, %j
 156   ret <4 x i32> %x
 157 }
 158
 159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 160   %x = add <8 x i16> %i, %j
 161   ret <8 x i16> %x
 162 }
 163
 164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 165   %x = add <16 x i8> %i, %j
 166   ret <16 x i8> %x
 167 }
 168
 169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 170   %x = sub <2 x i64> %i, %j
 171   ret <2 x i64> %x
 172 }
 173
 174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 175   %x = sub <4 x i32> %i, %j
 176   ret <4 x i32> %x
 177 }
 178
 179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 180   %x = sub <8 x i16> %i, %j
 181   ret <8 x i16> %x
 182 }
 183
 184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 185   %x = sub <16 x i8> %i, %j
 186   ret <16 x i8> %x
 187 }
 188
 189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 190   %x = mul <8 x i16> %i, %j
 191   ret <8 x i16> %x
 192 }
 193
 194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 195   %bincmp = icmp slt <8 x i16> %i, %j
 196   %x = sext <8 x i1> %bincmp to <8 x i16>
 197   ret <8 x i16> %x
 198 }
 199
 200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 201   %bincmp = icmp slt <16 x i8> %i, %j
 202   %x = sext <16 x i1> %bincmp to <16 x i8>
 203   ret <16 x i8> %x
 204 }
 205
 206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 207   %bincmp = icmp eq <8 x i16> %i, %j
 208   %x = sext <8 x i1> %bincmp to <8 x i16>
 209   ret <8 x i16> %x
 210 }
 211
 212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 213   %bincmp = icmp eq <16 x i8> %i, %j
 214   %x = sext <16 x i1> %bincmp to <16 x i8>
 215   ret <16 x i8> %x
 216 }
 217
 218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
 219   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 220   ret <8 x i16> %shuffle
 221 }
 222
 223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
 224   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 225   ret <16 x i16> %shuffle
 226 }
 227
 228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
 229   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 230   ret <16 x i8> %shuffle
 231 }
 232
 233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
 234   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 235   ret <32 x i8> %shuffle
 236 }
 237
 238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
 239   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
 240   ret <2 x i64> %shuffle
 241 }
 242
 243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
 244   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
 245   ret <4 x i32> %shuffle
 246 }
 247
 248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
 249   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
 250   ret <8 x i32> %shuffle
 251 }
 252
 253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
 254   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
 255   ret <4 x double> %shuffle
 256 }
 257
 258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
 259   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
 260   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
 261   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 262   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
 263   ret <2 x double> %bitcast64
 264 }
 265
 266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
 267   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
 268   ret <16 x i16> %shuffle
 269 }
 270
 271 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
 272   %r1 = extractelement <2 x i64> %x, i32 0
 273   %r2 = extractelement <2 x i64> %x, i32 1
 274   store i64 %r2, i64* %dst, align 1
 275   ret i64 %r1
 276 }
 277
 278 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
 279   %r1 = extractelement <4 x i32> %x, i32 1
 280   %r2 = extractelement <4 x i32> %x, i32 3
 281   store i32 %r2, i32* %dst, align 1
 282   ret i32 %r1
 283 }
 284
 285 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
 286   %r1 = extractelement <8 x i16> %x, i32 1
 287   %r2 = extractelement <8 x i16> %x, i32 3
 288   store i16 %r2, i16* %dst, align 1
 289   ret i16 %r1
 290 }
 291
 292 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
 293   %r1 = extractelement <16 x i8> %x, i32 1
 294   %r2 = extractelement <16 x i8> %x, i32 3
 295   store i8 %r2, i8* %dst, align 1
 296   ret i8 %r1
 297 }
 298
 299 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
 300   %val = load i64, i64* %ptr
 301   %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
 302   %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
 303   ret <2 x i64> %r2
 304 }
 305
 306 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
 307   %val = load i32, i32* %ptr
 308   %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
 309   %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
 310   ret <4 x i32> %r2
 311 }
 312
 313 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
 314   %val = load i16, i16* %ptr
 315   %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
 316   %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
 317   ret <8 x i16> %r2
 318 }
 319
 320 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
 321   %val = load i8, i8* %ptr
 322   %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
 323   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
 324   ret <16 x i8> %r2
 325 }
 326
 327 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
 328   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
 329   ret <4 x i32> %shuffle
 330 }
 331
 332 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
 333  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
 334   ret <4 x i32> %shuffle
 335 }
 336
 337 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
 338   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
 339   ret <16 x i8> %shuffle
 340 }
 341
 342 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
 343   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 344   ret <16 x i16> %shuffle
 345 }
 346
 347 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
 348 ; vmovshdup 256 test
 349   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 350   ret <8 x float> %shuffle
 351 }
 352
 353 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
 354 ; vmovshdup 128 test
 355   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
 356   ret <4 x float> %shuffle
 357 }
 358
 359 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
 360 ; vmovsldup 256 test
 361   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
 362   ret <8 x float> %shuffle
 363 }
 364
 365 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
 366 ; vmovsldup 128 test
 367   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
 368   ret <4 x float> %shuffle
 369 }
 370
 371 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
 372   %a = load double, double* %ptr
 373   %v = insertelement <2 x double> undef, double %a, i32 0
 374   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
 375   ret <2 x double> %shuffle
 376 }
 377
 378 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
 379   %a = load double, double* %ptr
 380   %v = insertelement <2 x double> undef, double %a, i32 0
 381   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
 382   ret <2 x double> %shuffle
 383 }
 384
 385 define void @store_floats(<4 x float> %x, i64* %p) {
 386   %a = fadd <4 x float> %x, %x
 387   %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 388   %c = bitcast <2 x float> %b to i64
 389   store i64 %c, i64* %p
 390   ret void
 391 }
 392
 393 define void @store_double(<2 x double> %x, i64* %p) {
 394   %a = fadd <2 x double> %x, %x
 395   %b = extractelement <2 x double> %a, i32 0
 396   %c = bitcast double %b to i64
 397   store i64 %c, i64* %p
 398   ret void
 399 }
 400
 401 define void @store_h_double(<2 x double> %x, i64* %p) {
 402   %a = fadd <2 x double> %x, %x
 403   %b = extractelement <2 x double> %a, i32 1
 404   %c = bitcast double %b to i64
 405   store i64 %c, i64* %p
 406   ret void
 407 }
 408
 409 define <2 x double> @test39(double* %ptr) nounwind {
 410   %a = load double, double* %ptr
 411   %v = insertelement <2 x double> undef, double %a, i32 0
 412   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
 413   ret <2 x double> %shuffle
 414   }
 415
 416 define <2 x double> @test40(<2 x double>* %ptr) nounwind {
 417   %v = load  <2 x double>,  <2 x double>* %ptr
 418   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
 419   ret <2 x double> %shuffle
 420   }
 421
 422 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
 423   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
 424   ret <2 x double> %shuffle
 425 }
 426
 427 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
 428   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
 429   ret <4 x double> %shuffle
 430 }
 431
 432 define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
 433   %shift = ashr <8 x i32> %a, %b
 434   ret <8 x i32> %shift
 435 }
 436
 437 define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
 438   %shift = lshr <8 x i32> %a, %b
 439   ret <8 x i32> %shift
 440 }
 441
 442 define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
 443   %shift = shl <8 x i32> %a, %b
 444   ret <8 x i32> %shift
 445 }
 446
 447 define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
 448   %shift = ashr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 449   ret <8 x i32> %shift
 450 }
 451
 452 define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
 453   %shift = lshr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 454   ret <8 x i32> %shift
 455 }
 456
 457 define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
 458   %shift = shl <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
 459   ret <8 x i32> %shift
 460 }
 461
 462 define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
 463   %shift = ashr <4 x i64> %a, %b
 464   ret <4 x i64> %shift
 465 }
 466
 467 define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
 468   %shift = lshr <4 x i64> %a, %b
 469   ret <4 x i64> %shift
 470 }
 471
 472 define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
 473   %shift = shl <4 x i64> %a, %b
 474   ret <4 x i64> %shift
 475 }
 476
 477 define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
 478   %shift = ashr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
 479   ret <4 x i64> %shift
 480 }
 481
 482 define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
 483   %shift = lshr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
 484   ret <4 x i64> %shift
 485 }
 486
 487 define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
 488   %shift = shl <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
 489   ret <4 x i64> %shift
 490 }
 491
 492 define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
 493   %shift = ashr <16 x i16> %a, %b
 494   ret <16 x i16> %shift
 495 }
 496
 497 define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
 498   %shift = lshr <16 x i16> %a, %b
 499   ret <16 x i16> %shift
 500 }
 501
 502 define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
 503   %shift = shl <16 x i16> %a, %b
 504   ret <16 x i16> %shift
 505 }
 506
 507 define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
 508   %shift = ashr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 509   ret <16 x i16> %shift
 510 }
 511
 512 define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
 513   %shift = lshr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 514   ret <16 x i16> %shift
 515 }
 516
 517 define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
 518   %shift = shl <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 519   ret <16 x i16> %shift
 520 }
 521
 522 define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
 523   %shift = ashr <4 x i32> %a, %b
 524   ret <4 x i32> %shift
 525 }
 526
 527 define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
 528   %shift = shl <4 x i32> %a,  <i32 3, i32 3, i32 3, i32 3>
 529   ret <4 x i32> %shift
 530 }
 531
 532 define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
 533   %shift = ashr <2 x i64> %a, %b
 534   ret <2 x i64> %shift
 535 }
 536
 537 define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
 538   %shift = shl <2 x i64> %a,  <i64 3, i64 3>
 539   ret <2 x i64> %shift
 540 }
 541
 542 define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
 543   %shift = ashr <8 x i16> %a, %b
 544   ret <8 x i16> %shift
 545 }
 546
 547 define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
 548   %shift = lshr <8 x i16> %a, %b
 549   ret <8 x i16> %shift
 550 }
 551
 552 define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
 553   %shift = shl <8 x i16> %a, %b
 554   ret <8 x i16> %shift
 555 }
 556
 557 define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
 558   %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 559   ret <8 x i16> %shift
 560 }
 561
 562 define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
 563   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 564   ret <8 x i16> %shift
 565 }
 566
 567 define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
 568   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 569   ret <8 x i16> %shift
 570 }