test/CodeGen/X86/avx512-insert-extract.ll

   1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
   2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
   3
   4 ;CHECK-LABEL: test1:
   5 ;CHECK: vinsertps
   6 ;CHECK: vinsertf32x4
   7 ;CHECK: ret
   8 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
   9   %rrr = load float, float* %br
  10   %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
  11   %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
  12   ret <16 x float> %rrr3
  13 }
  14
  15 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
  16 ; KNL-LABEL: test2:
  17 ; KNL:       ## BB#0:
  18 ; KNL-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
  19 ; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
  20 ; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
  21 ; KNL-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
  22 ; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
  23 ; KNL-NEXT:    retq
  24 ;
  25 ; SKX-LABEL: test2:
  26 ; SKX:       ## BB#0:
  27 ; SKX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
  28 ; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
  29 ; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
  30 ; SKX-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
  31 ; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
  32 ; SKX-NEXT:    retq
  33   %rrr = load double, double* %br
  34   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
  35   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
  36   ret <8 x double> %rrr3
  37 }
  38
  39 ;CHECK-LABEL: test3:
  40 ;CHECK: vextractf32x4 $1
  41 ;CHECK: vinsertf32x4 $0
  42 ;CHECK: ret
  43 define <16 x float> @test3(<16 x float> %x) nounwind {
  44   %eee = extractelement <16 x float> %x, i32 4
  45   %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
  46   ret <16 x float> %rrr2
  47 }
  48
  49 define <8 x i64> @test4(<8 x i64> %x) nounwind {
  50 ; KNL-LABEL: test4:
  51 ; KNL:       ## BB#0:
  52 ; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
  53 ; KNL-NEXT:    vmovq %xmm1, %rax
  54 ; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
  55 ; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
  56 ; KNL-NEXT:    retq
  57 ;
  58 ; SKX-LABEL: test4:
  59 ; SKX:       ## BB#0:
  60 ; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
  61 ; SKX-NEXT:    vmovq %xmm1, %rax
  62 ; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
  63 ; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
  64 ; SKX-NEXT:    retq
  65   %eee = extractelement <8 x i64> %x, i32 4
  66   %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
  67   ret <8 x i64> %rrr2
  68 }
  69
  70 ;CHECK-LABEL: test5:
  71 ;CHECK: vextractps
  72 ;CHECK: ret
  73 define i32 @test5(<4 x float> %x) nounwind {
  74   %ef = extractelement <4 x float> %x, i32 3
  75   %ei = bitcast float %ef to i32
  76   ret i32 %ei
  77 }
  78
  79 ;CHECK-LABEL: test6:
  80 ;CHECK: vextractps {{.*}}, (%rdi)
  81 ;CHECK: ret
  82 define void @test6(<4 x float> %x, float* %out) nounwind {
  83   %ef = extractelement <4 x float> %x, i32 3
  84   store float %ef, float* %out, align 4
  85   ret void
  86 }
  87
  88 ;CHECK-LABEL: test7
  89 ;CHECK: vmovd
  90 ;CHECK: vpermps %zmm
  91 ;CHECK: ret
  92 define float @test7(<16 x float> %x, i32 %ind) nounwind {
  93   %e = extractelement <16 x float> %x, i32 %ind
  94   ret float %e
  95 }
  96
  97 ;CHECK-LABEL: test8
  98 ;CHECK: vmovq
  99 ;CHECK: vpermpd %zmm
 100 ;CHECK: ret
 101 define double @test8(<8 x double> %x, i32 %ind) nounwind {
 102   %e = extractelement <8 x double> %x, i32 %ind
 103   ret double %e
 104 }
 105
 106 ;CHECK-LABEL: test9
 107 ;CHECK: vmovd
 108 ;CHECK: vpermps %ymm
 109 ;CHECK: ret
 110 define float @test9(<8 x float> %x, i32 %ind) nounwind {
 111   %e = extractelement <8 x float> %x, i32 %ind
 112   ret float %e
 113 }
 114
 115 ;CHECK-LABEL: test10
 116 ;CHECK: vmovd
 117 ;CHECK: vpermd %zmm
 118 ;CHECK: vmovd  %xmm0, %eax
 119 ;CHECK: ret
 120 define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
 121   %e = extractelement <16 x i32> %x, i32 %ind
 122   ret i32 %e
 123 }
 124
 125 ;CHECK-LABEL: test11
 126 ;CHECK: vpcmpltud
 127 ;CHECK: kshiftlw $11
 128 ;CHECK: kshiftrw $15
 129 ;CHECK: testb
 130 ;CHECK: je
 131 ;CHECK: ret
 132 ;CHECK: ret
 133 define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
 134   %cmp_res = icmp ult <16 x i32> %a, %b
 135   %ia = extractelement <16 x i1> %cmp_res, i32 4
 136   br i1 %ia, label %A, label %B
 137   A:
 138     ret <16 x i32>%b
 139   B:
 140    %c = add <16 x i32>%b, %a
 141    ret <16 x i32>%c
 142 }
 143
 144 ;CHECK-LABEL: test12
 145 ;CHECK: vpcmpgtq
 146 ;CHECK: kshiftlw $15
 147 ;CHECK: kshiftrw $15
 148 ;CHECK: testb
 149 ;CHECK: ret
 150
 151 define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
 152
 153   %cmpvector_func.i = icmp slt <16 x i64> %a, %b
 154   %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
 155   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
 156   ret i64 %res
 157 }
 158
 159 ;CHECK-LABEL: test13
 160 ;CHECK: cmpl    %esi, %edi
 161 ;CHECK: setb    %al
 162 ;CHECK: andl    $1, %eax
 163 ;CHECK: kmovw   %eax, %k0
 164 ;CHECK: movw    $-4
 165 ;CHECK: korw
 166 define i16 @test13(i32 %a, i32 %b) {
 167   %cmp_res = icmp ult i32 %a, %b
 168   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
 169   %res = bitcast <16 x i1> %maskv to i16
 170   ret i16 %res
 171 }
 172
 173 ;CHECK-LABEL: test14
 174 ;CHECK: vpcmpgtq
 175 ;KNL: kshiftlw $11
 176 ;KNL: kshiftrw $15
 177 ;KNL: testb
 178 ;SKX: kshiftlb $3
 179 ;SKX: kshiftrb $7
 180 ;SKX: testb
 181 ;CHECK: ret
 182
 183 define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
 184
 185   %cmpvector_func.i = icmp slt <8 x i64> %a, %b
 186   %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
 187   %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
 188   ret i64 %res
 189 }
 190
 191 ;CHECK-LABEL: test15
 192 ;CHECK: movb (%rdi), %al
 193 ;CHECK: andb $1, %al
 194 ;CHECK: movw    $-1, %ax
 195 ;CHECK: cmovew
 196 define i16 @test15(i1 *%addr) {
 197   %x = load i1 , i1 * %addr, align 1
 198   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
 199   %x2 = bitcast <16 x i1>%x1 to i16
 200   ret i16 %x2
 201 }
 202
 203 ;CHECK-LABEL: test16
 204 ;CHECK: movb (%rdi), %al
 205 ;CHECK: andw $1, %ax
 206 ;CHECK: kmovw
 207 ;CHECK: kshiftlw        $10
 208 ;CHECK: korw
 209 ;CHECK: ret
 210 define i16 @test16(i1 *%addr, i16 %a) {
 211   %x = load i1 , i1 * %addr, align 128
 212   %a1 = bitcast i16 %a to <16 x i1>
 213   %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
 214   %x2 = bitcast <16 x i1>%x1 to i16
 215   ret i16 %x2
 216 }
 217
 218 ;CHECK-LABEL: test17
 219 ;KNL: movb (%rdi), %al
 220 ;KNL: andw $1, %ax
 221 ;KNL: kshiftlw $4
 222 ;KNL: korw
 223 ;SKX: kshiftlb $4
 224 ;SKX: korb
 225 ;CHECK: ret
 226 define i8 @test17(i1 *%addr, i8 %a) {
 227   %x = load i1 , i1 * %addr, align 128
 228   %a1 = bitcast i8 %a to <8 x i1>
 229   %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
 230   %x2 = bitcast <8 x i1>%x1 to i8
 231   ret i8 %x2
 232 }
 233