test/CodeGen/X86/avx-isa-check.ll

   1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
   2
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
   4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
   5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
   6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
   7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
   8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
   9
  10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  11   ; Force the execution domain with an add.
  12   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  13   %x = and <4 x i64> %a2, %b
  14   ret <4 x i64> %x
  15 }
  16
  17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  18   ; Force the execution domain with an add.
  19   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  20   %x = and <2 x i64> %a2, %b
  21   ret <2 x i64> %x
  22 }
  23
  24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  25   ; Force the execution domain with an add.
  26   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  27   %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
  28   %x = and <4 x i64> %a, %y
  29   ret <4 x i64> %x
  30 }
  31
  32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  33   ; Force the execution domain with an add.
  34   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  35   %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
  36   %x = and <2 x i64> %a, %y
  37   ret <2 x i64> %x
  38 }
  39
  40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  41   ; Force the execution domain with an add.
  42   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  43   %x = or <4 x i64> %a2, %b
  44   ret <4 x i64> %x
  45 }
  46
  47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
  48   ; Force the execution domain with an add.
  49   %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
  50   %x = xor <4 x i64> %a2, %b
  51   ret <4 x i64> %x
  52 }
  53
  54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  55   ; Force the execution domain with an add.
  56   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  57   %x = or <2 x i64> %a2, %b
  58   ret <2 x i64> %x
  59 }
  60
  61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  62   ; Force the execution domain with an add.
  63   %a2 = add <2 x i64> %a, <i64 1, i64 1>
  64   %x = xor <2 x i64> %a2, %b
  65   ret <2 x i64> %x
  66 }
  67
  68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  69   %x = add <4 x i64> %i, %j
  70   ret <4 x i64> %x
  71 }
  72
  73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  74   %x = add <8 x i32> %i, %j
  75   ret <8 x i32> %x
  76 }
  77
  78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  79   %x = add <16 x i16> %i, %j
  80   ret <16 x i16> %x
  81 }
  82
  83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
  84   %x = add <32 x i8> %i, %j
  85   ret <32 x i8> %x
  86 }
  87
  88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  89   %x = sub <4 x i64> %i, %j
  90   ret <4 x i64> %x
  91 }
  92
  93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  94   %x = sub <8 x i32> %i, %j
  95   ret <8 x i32> %x
  96 }
  97
  98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  99   %x = sub <16 x i16> %i, %j
 100   ret <16 x i16> %x
 101 }
 102
 103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 104   %x = sub <32 x i8> %i, %j
 105   ret <32 x i8> %x
 106 }
 107
 108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 109   %x = mul <16 x i16> %i, %j
 110   ret <16 x i16> %x
 111 }
 112
 113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 114   %bincmp = icmp slt <8 x i32> %i, %j
 115   %x = sext <8 x i1> %bincmp to <8 x i32>
 116   ret <8 x i32> %x
 117 }
 118
 119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 120   %bincmp = icmp eq <32 x i8> %i, %j
 121   %x = sext <32 x i1> %bincmp to <32 x i8>
 122   ret <32 x i8> %x
 123 }
 124
 125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 126   %bincmp = icmp eq <16 x i16> %i, %j
 127   %x = sext <16 x i1> %bincmp to <16 x i16>
 128   ret <16 x i16> %x
 129 }
 130
 131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
 132   %bincmp = icmp slt <32 x i8> %i, %j
 133   %x = sext <32 x i1> %bincmp to <32 x i8>
 134   ret <32 x i8> %x
 135 }
 136
 137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
 138   %bincmp = icmp slt <16 x i16> %i, %j
 139   %x = sext <16 x i1> %bincmp to <16 x i16>
 140   ret <16 x i16> %x
 141 }
 142
 143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
 144   %bincmp = icmp eq <8 x i32> %i, %j
 145   %x = sext <8 x i1> %bincmp to <8 x i32>
 146   ret <8 x i32> %x
 147 }
 148
 149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 150   %x = add <2 x i64> %i, %j
 151   ret <2 x i64> %x
 152 }
 153
 154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 155   %x = add <4 x i32> %i, %j
 156   ret <4 x i32> %x
 157 }
 158
 159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 160   %x = add <8 x i16> %i, %j
 161   ret <8 x i16> %x
 162 }
 163
 164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 165   %x = add <16 x i8> %i, %j
 166   ret <16 x i8> %x
 167 }
 168
 169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
 170   %x = sub <2 x i64> %i, %j
 171   ret <2 x i64> %x
 172 }
 173
 174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
 175   %x = sub <4 x i32> %i, %j
 176   ret <4 x i32> %x
 177 }
 178
 179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 180   %x = sub <8 x i16> %i, %j
 181   ret <8 x i16> %x
 182 }
 183
 184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 185   %x = sub <16 x i8> %i, %j
 186   ret <16 x i8> %x
 187 }
 188
 189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 190   %x = mul <8 x i16> %i, %j
 191   ret <8 x i16> %x
 192 }
 193
 194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 195   %bincmp = icmp slt <8 x i16> %i, %j
 196   %x = sext <8 x i1> %bincmp to <8 x i16>
 197   ret <8 x i16> %x
 198 }
 199
 200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 201   %bincmp = icmp slt <16 x i8> %i, %j
 202   %x = sext <16 x i1> %bincmp to <16 x i8>
 203   ret <16 x i8> %x
 204 }
 205
 206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
 207   %bincmp = icmp eq <8 x i16> %i, %j
 208   %x = sext <8 x i1> %bincmp to <8 x i16>
 209   ret <8 x i16> %x
 210 }
 211
 212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
 213   %bincmp = icmp eq <16 x i8> %i, %j
 214   %x = sext <16 x i1> %bincmp to <16 x i8>
 215   ret <16 x i8> %x
 216 }
 217
 218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
 219   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 220   ret <8 x i16> %shuffle
 221 }
 222
 223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
 224   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 225   ret <16 x i16> %shuffle
 226 }
 227
 228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
 229   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
 230   ret <16 x i8> %shuffle
 231 }
 232
 233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
 234   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 235   ret <32 x i8> %shuffle
 236 }
 237
 238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
 239   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
 240   ret <2 x i64> %shuffle
 241 }
 242
 243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
 244   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
 245   ret <4 x i32> %shuffle
 246 }
 247
 248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
 249   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
 250   ret <8 x i32> %shuffle
 251 }
 252
 253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
 254   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
 255   ret <4 x double> %shuffle
 256 }
 257
 258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
 259   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
 260   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
 261   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 262   %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
 263   ret <2 x double> %bitcast64
 264 }