test/CodeGen/X86/avx512-insert-extract.ll

   1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
   2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
   3
   4 ;CHECK-LABEL: test1:
   5 ;CHECK: vinsertps
   6 ;CHECK: vinsertf32x4
   7 ;CHECK: ret
   8 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
   9   %rrr = load float* %br
  10   %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
  11   %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
  12   ret <16 x float> %rrr3
  13 }
  14
  15 ;CHECK-LABEL: test2:
  16 ;CHECK: vinsertf32x4 $0
  17 ;CHECK: vextractf32x4 $3
  18 ;CHECK: vinsertf32x4 $3
  19 ;CHECK: ret
  20 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
  21   %rrr = load double* %br
  22   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
  23   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
  24   ret <8 x double> %rrr3
  25 }
  26
  27 ;CHECK-LABEL: test3:
  28 ;CHECK: vextractf32x4 $1
  29 ;CHECK: vinsertf32x4 $0
  30 ;CHECK: ret
  31 define <16 x float> @test3(<16 x float> %x) nounwind {
  32   %eee = extractelement <16 x float> %x, i32 4
  33   %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
  34   ret <16 x float> %rrr2
  35 }
  36
  37 ;CHECK-LABEL: test4:
  38 ;CHECK: vextracti32x4 $2
  39 ;CHECK: vinserti32x4 $0
  40 ;CHECK: ret
  41 define <8 x i64> @test4(<8 x i64> %x) nounwind {
  42   %eee = extractelement <8 x i64> %x, i32 4
  43   %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
  44   ret <8 x i64> %rrr2
  45 }
  46
  47 ;CHECK-LABEL: test5:
  48 ;CHECK: vextractps
  49 ;CHECK: ret
  50 define i32 @test5(<4 x float> %x) nounwind {
  51   %ef = extractelement <4 x float> %x, i32 3
  52   %ei = bitcast float %ef to i32
  53   ret i32 %ei
  54 }
  55
  56 ;CHECK-LABEL: test6:
  57 ;CHECK: vextractps {{.*}}, (%rdi)
  58 ;CHECK: ret
  59 define void @test6(<4 x float> %x, float* %out) nounwind {
  60   %ef = extractelement <4 x float> %x, i32 3
  61   store float %ef, float* %out, align 4
  62   ret void
  63 }
  64
  65 ;CHECK-LABEL: test7
  66 ;CHECK: vmovd
  67 ;CHECK: vpermps %zmm
  68 ;CHECK: ret
  69 define float @test7(<16 x float> %x, i32 %ind) nounwind {
  70   %e = extractelement <16 x float> %x, i32 %ind
  71   ret float %e
  72 }
  73
  74 ;CHECK-LABEL: test8
  75 ;CHECK: vmovq
  76 ;CHECK: vpermpd %zmm
  77 ;CHECK: ret
  78 define double @test8(<8 x double> %x, i32 %ind) nounwind {
  79   %e = extractelement <8 x double> %x, i32 %ind
  80   ret double %e
  81 }
  82
  83 ;CHECK-LABEL: test9
  84 ;CHECK: vmovd
  85 ;CHECK: vpermps %ymm
  86 ;CHECK: ret
  87 define float @test9(<8 x float> %x, i32 %ind) nounwind {
  88   %e = extractelement <8 x float> %x, i32 %ind
  89   ret float %e
  90 }
  91
  92 ;CHECK-LABEL: test10
  93 ;CHECK: vmovd
  94 ;CHECK: vpermd %zmm
  95 ;CHECK: vmovd  %xmm0, %eax
  96 ;CHECK: ret
  97 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
  98   %e = extractelement <16 x i32> %x, i32 %ind
  99   ret i32 %e
 100 }
 101
 102 ;CHECK-LABEL: test11
 103 ;CHECK: vpcmpltud
 104 ;CHECK: kshiftlw $11
 105 ;CHECK: kshiftrw $15
 106 ;CHECK: kortestw
 107 ;CHECK: je
 108 ;CHECK: ret
 109 ;CHECK: ret
 110 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
 111   %cmp_res = icmp ult <16 x i32> %a, %b
 112   %ia = extractelement <16 x i1> %cmp_res, i32 4
 113   br i1 %ia, label %A, label %B
 114   A:
 115     ret <16 x i32>%b
 116   B:
 117    %c = add <16 x i32>%b, %a
 118    ret <16 x i32>%c
 119 }
 120
 121 ;CHECK-LABEL: test12
 122 ;CHECK: vpcmpgtq
 123 ;CHECK: kshiftlw $15
 124 ;CHECK: kshiftrw $15
 125 ;CHECK: kortestw
 126 ;CHECK: ret
 127
 128 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
 129
 130   %cmpvector_func.i = icmp slt <16 x i64> %a, %b
 131   %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
 132   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
 133   ret i64 %res
 134 }
 135
 136 ;CHECK-LABEL: test13
 137 ;CHECK: cmpl
 138 ;CHECK: sbbl
 139 ;CHECK: orl $65532
 140 ;CHECK: ret
 141 define i16 @test13(i32 %a, i32 %b) {
 142   %cmp_res = icmp ult i32 %a, %b
 143   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
 144   %res = bitcast <16 x i1> %maskv to i16
 145   ret i16 %res
 146 }
 147
 148 ;CHECK-LABEL: test14
 149 ;CHECK: vpcmpgtq
 150 ;CHECK: kshiftlw $11
 151 ;CHECK: kshiftrw $15
 152 ;CHECK: kortestw
 153 ;CHECK: ret
 154
 155 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
 156
 157   %cmpvector_func.i = icmp slt <8 x i64> %a, %b
 158   %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
 159   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
 160   ret i64 %res
 161 }
 162
 163 ;CHECK-LABEL: test15
 164 ;CHECK: kshiftlw
 165 ;CHECK: kmovw
 166 ;CHECK: ret
 167 define i16 @test15(i1 *%addr) {
 168   %x = load i1 * %addr, align 128
 169   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
 170   %x2 = bitcast <16 x i1>%x1 to i16
 171   ret i16 %x2
 172 }
 173
 174 ;CHECK-LABEL: test16
 175 ;CHECK: kshiftlw
 176 ;CHECK: kshiftrw
 177 ;CHECK: korw
 178 ;CHECK: ret
 179 define i16 @test16(i1 *%addr, i16 %a) {
 180   %x = load i1 * %addr, align 128
 181   %a1 = bitcast i16 %a to <16 x i1>
 182   %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
 183   %x2 = bitcast <16 x i1>%x1 to i16
 184   ret i16 %x2
 185 }
 186
 187 ;CHECK-LABEL: test17
 188 ;CHECK: kshiftlw
 189 ;CHECK: kshiftrw
 190 ;KNL: korw
 191 ;SKX: korb
 192 ;CHECK: ret
 193 define i8 @test17(i1 *%addr, i8 %a) {
 194   %x = load i1 * %addr, align 128
 195   %a1 = bitcast i8 %a to <8 x i1>
 196   %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
 197   %x2 = bitcast <8 x i1>%x1 to i8
 198   ret i8 %x2
 199 }
 200