AVX512: Implemented intrinsics for vshuff32x4, vshuff64x2, vshufi64x2, vshufi32x4
[oota-llvm.git] / lib / Target / X86 / X86IntrinsicsInfo.h
index 1383fa373068285a9b0222f8b9bcb512498fd91c..32ed518835f5b1db1a5766716e378a58cc170258 100644 (file)
@@ -19,13 +19,17 @@ namespace llvm {
 enum IntrinsicType {
   INTR_NO_TYPE,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
-  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
+  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
-  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
-  INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
+  INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
+  INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
+  FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
   VPERM_3OP_MASKZ,
-  INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
-  EXPAND_FROM_MEM, BLEND
+  INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
+  COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
+  TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
+  EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC
 };
 
 struct IntrinsicData {
@@ -138,6 +142,42 @@ static const IntrinsicData IntrinsicsWithChain[] = {
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -415,7 +455,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
-
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CVTDQ2PD, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
@@ -572,7 +623,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, ISD::UINT_TO_FP),
-
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
@@ -621,6 +677,62 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -711,6 +823,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@@ -813,6 +931,114 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -897,6 +1123,54 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
@@ -945,6 +1219,34 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -977,9 +1279,42 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
@@ -1016,7 +1351,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
                      X86ISD::FNMSUB_RND),
 
-
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -1154,6 +1488,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
                      X86ISD::VPERMV3, 0),
+  X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),