test/CodeGen/X86/avx-isa-check.ll

   1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
   2
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
   4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
   5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
   6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
   7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
   8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
   9
  10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  11   ; Force the execution domain with an add.
  12   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  13   %x = and <4 x i64> %a2, %b
  14   ret <4 x i64> %x
  15 }
  16
  17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  18   ; Force the execution domain with an add.
  19   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  20   %x = and <2 x i64> %a2, %b
  21   ret <2 x i64> %x
  22 }
  23
  24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  25   ; Force the execution domain with an add.
  26   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  27   %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
  28   %x = and <4 x i64> %a, %y
  29   ret <4 x i64> %x
  30 }
  31
  32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  33   ; Force the execution domain with an add.
  34   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  35   %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
  36   %x = and <2 x i64> %a, %y
  37   ret <2 x i64> %x
  38 }
  39
  40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  41   ; Force the execution domain with an add.
  42   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  43   %x = or <4 x i64> %a2, %b
  44   ret <4 x i64> %x
  45 }
  46
  47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  48   ; Force the execution domain with an add.
  49   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  50   %x = xor <4 x i64> %a2, %b
  51   ret <4 x i64> %x
  52 }
  53
  54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  55   ; Force the execution domain with an add.
  56   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  57   %x = or <2 x i64> %a2, %b
  58   ret <2 x i64> %x
  59 }
  60
  61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  62   ; Force the execution domain with an add.
  63   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  64   %x = xor <2 x i64> %a2, %b
  65   ret <2 x i64> %x
  66 }
  67
  68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  69   %x = add <4 x i64> %i, %j
  70   ret <4 x i64> %x
  71 }
  72
  73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  74   %x = add <8 x i32> %i, %j
  75   ret <8 x i32> %x
  76 }
  77
  78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  79   %x = add <16 x i16> %i, %j
  80   ret <16 x i16> %x
  81 }
  82
  83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
  84   %x = add <32 x i8> %i, %j
  85   ret <32 x i8> %x
  86 }
  87
  88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  89   %x = sub <4 x i64> %i, %j
  90   ret <4 x i64> %x
  91 }
  92
  93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  94   %x = sub <8 x i32> %i, %j
  95   ret <8 x i32> %x
  96 }
  97
  98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  99   %x = sub <16 x i16> %i, %j
 100   ret <16 x i16> %x
 101 }
 102
 103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 104   %x = sub <32 x i8> %i, %j
 105   ret <32 x i8> %x
 106 }
 107
 108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 109   %x = mul <16 x i16> %i, %j
 110   ret <16 x i16> %x
 111 }
 112
 113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 114   %bincmp = icmp slt <8 x i32> %i, %j
 115   %x = sext <8 x i1> %bincmp to <8 x i32>
 116   ret <8 x i32> %x
 117 }
 118
 119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 120   %bincmp = icmp eq <32 x i8> %i, %j
 121   %x = sext <32 x i1> %bincmp to <32 x i8>
 122   ret <32 x i8> %x
 123 }
 124
 125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 126   %bincmp = icmp eq <16 x i16> %i, %j
 127   %x = sext <16 x i1> %bincmp to <16 x i16>
 128   ret <16 x i16> %x
 129 }
 130
 131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 132   %bincmp = icmp slt <32 x i8> %i, %j
 133   %x = sext <32 x i1> %bincmp to <32 x i8>
 134   ret <32 x i8> %x
 135 }
 136
 137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 138   %bincmp = icmp slt <16 x i16> %i, %j
 139   %x = sext <16 x i1> %bincmp to <16 x i16>
 140   ret <16 x i16> %x
 141 }
 142
 143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 144   %bincmp = icmp eq <8 x i32> %i, %j
 145   %x = sext <8 x i1> %bincmp to <8 x i32>
 146   ret <8 x i32> %x
 147 }
 148
 149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 150   %x = add <2 x i64> %i, %j
 151   ret <2 x i64> %x
 152 }
 153
 154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 155   %x = add <4 x i32> %i, %j
 156   ret <4 x i32> %x
 157 }
 158
 159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 160   %x = add <8 x i16> %i, %j
 161   ret <8 x i16> %x
 162 }
 163
 164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 165   %x = add <16 x i8> %i, %j
 166   ret <16 x i8> %x
 167 }
 168
 169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 170   %x = sub <2 x i64> %i, %j
 171   ret <2 x i64> %x
 172 }
 173
 174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 175   %x = sub <4 x i32> %i, %j
 176   ret <4 x i32> %x
 177 }
 178
 179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 180   %x = sub <8 x i16> %i, %j
 181   ret <8 x i16> %x
 182 }
 183
 184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 185   %x = sub <16 x i8> %i, %j
 186   ret <16 x i8> %x
 187 }
 188
 189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 190   %x = mul <8 x i16> %i, %j
 191   ret <8 x i16> %x
 192 }
 193
 194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 195   %bincmp = icmp slt <8 x i16> %i, %j
 196   %x = sext <8 x i1> %bincmp to <8 x i16>
 197   ret <8 x i16> %x
 198 }
 199
 200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 201   %bincmp = icmp slt <16 x i8> %i, %j
 202   %x = sext <16 x i1> %bincmp to <16 x i8>
 203   ret <16 x i8> %x
 204 }
 205
 206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 207   %bincmp = icmp eq <8 x i16> %i, %j
 208   %x = sext <8 x i1> %bincmp to <8 x i16>
 209   ret <8 x i16> %x
 210 }
 211
 212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 213   %bincmp = icmp eq <16 x i8> %i, %j
 214   %x = sext <16 x i1> %bincmp to <16 x i8>
 215   ret <16 x i8> %x
 216 }
 217
 218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
 219   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 220   ret <8 x i16> %shuffle
 221 }
 222
 223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
 224   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 225   ret <16 x i16> %shuffle
 226 }
 227
 228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
 229   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 230   ret <16 x i8> %shuffle
 231 }
 232
 233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
 234   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 235   ret <32 x i8> %shuffle
 236 }
 237
 238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
 239   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
 240   ret <2 x i64> %shuffle
 241 }
 242
 243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
 244   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
 245   ret <4 x i32> %shuffle
 246 }
 247
 248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
 249   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
 250   ret <8 x i32> %shuffle
 251 }
 252
 253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
 254   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
 255   ret <4 x double> %shuffle
 256 }
 257
 258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
 259   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
 260   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
 261   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 262   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
 263   ret <2 x double> %bitcast64
 264 }
 265
 266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
 267   %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
 268   ret <16 x i16> %shuffle
 269 }
 270
 271 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
 272   %r1 = extractelement <2 x i64> %x, i32 0
 273   %r2 = extractelement <2 x i64> %x, i32 1
 274   store i64 %r2, i64* %dst, align 1
 275   ret i64 %r1
 276 }
 277
 278 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
 279   %r1 = extractelement <4 x i32> %x, i32 1
 280   %r2 = extractelement <4 x i32> %x, i32 3
 281   store i32 %r2, i32* %dst, align 1
 282   ret i32 %r1
 283 }
 284
 285 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
 286   %r1 = extractelement <8 x i16> %x, i32 1
 287   %r2 = extractelement <8 x i16> %x, i32 3
 288   store i16 %r2, i16* %dst, align 1
 289   ret i16 %r1
 290 }
 291
 292 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
 293   %r1 = extractelement <16 x i8> %x, i32 1
 294   %r2 = extractelement <16 x i8> %x, i32 3
 295   store i8 %r2, i8* %dst, align 1
 296   ret i8 %r1
 297 }
 298
 299 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
 300   %val = load i64, i64* %ptr
 301   %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
 302   %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
 303   ret <2 x i64> %r2
 304 }
 305
 306 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
 307   %val = load i32, i32* %ptr
 308   %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
 309   %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
 310   ret <4 x i32> %r2
 311 }
 312
 313 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
 314   %val = load i16, i16* %ptr
 315   %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
 316   %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
 317   ret <8 x i16> %r2
 318 }
 319
 320 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
 321   %val = load i8, i8* %ptr
 322   %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
 323   %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
 324   ret <16 x i8> %r2
 325 }
 326
 327 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
 328   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
 329   ret <4 x i32> %shuffle
 330 }
 331
 332 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
 333  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
 334   ret <4 x i32> %shuffle
 335 }
 336
 337 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
 338   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
 339   ret <16 x i8> %shuffle
 340 }
 341
 342 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
 343   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 344   ret <16 x i16> %shuffle
 345 }
 346