GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt_d_512:
+ GCCBuiltin<"__builtin_ia32_vpermt2vard512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt_q_512:
+ GCCBuiltin<"__builtin_ia32_vpermt2varq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt_ps_512:
+ GCCBuiltin<"__builtin_ia32_vpermt2varps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty,
+ llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_vpermt_pd_512:
+ GCCBuiltin<"__builtin_ia32_vpermt2varpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty,
+ llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
}
// Vector blend
return true;
}
+// Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
+// (src1[0], src0[1]), manipulation with 256-bit sub-vectors
+static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
+ if (!VT.is512BitVector())
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfSize = NumElts/2;
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
+ *Imm = 1;
+ return true;
+ }
+ }
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
+ *Imm = 0;
+ return true;
+ }
+ }
+ return false;
+}
+
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
getShuffleSHUFImmediate(SVOp), DAG);
}
+ unsigned Idx;
+ if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx))
+ return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),
+ Idx*(NumElems/2), DAG, dl);
+
// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx),
+ (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))),
+ (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx),
+ (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))),
+ (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx),
+ (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))),
+ (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx),
+ (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))),
+ (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
ret void
}
-declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
\ No newline at end of file
+declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
+
+define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
+; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
%res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i32> %res
}
+
+; CHECK-LABEL: @test28
+; CHECK: vinserti64x4 $1
+; CHECK: ret
+define <16 x i32> @test28(<16 x i32>%x, <16 x i32>%y) {
+ %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i32> %res
+}
+
+; CHECK-LABEL: @test29
+; CHECK: vinserti64x4 $0
+; CHECK: ret
+define <16 x i32> @test29(<16 x i32>%x, <16 x i32>%y) {
+ %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i32> %res
+}
+