; CHECK: ## BB#0:
; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovb %eax, %k1
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
+; CHECK-NEXT: movb $120, %al
+; CHECK-NEXT: kmovb %eax, %k1
; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
+ call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, i8* %base, i32 4, i32 1)
+ call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
+ call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, i8* %base, i32 2, i32 1)
ret void
}
-
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
ret void
}
+define void @scatter_mask_test(i8* %x0, <8 x i32> %x2, <8 x i32> %x3) {
+; CHECK-LABEL: scatter_mask_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: kmovb %eax, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: movb $96, %al
+; CHECK-NEXT: kmovb %eax, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4)
+ ret void
+}
+
+define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, i8* %base) {
+; CHECK-LABEL: gather_mask_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
+; CHECK-NEXT: kxorw %k0, %k0, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1}
+; CHECK-NEXT: movw $1, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm4
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm4 {%k1}
+; CHECK-NEXT: movw $220, %ax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm4, %zmm1, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 -1, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 0, i32 4)
+ %res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 1, i32 4)
+ %res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 220, i32 4)
+
+ %res4 = fadd <16 x float> %res, %res1
+ %res5 = fadd <16 x float> %res3, %res2
+ %res6 = fadd <16 x float> %res5, %res4
+ ret <16 x float> %res6
+}