From: Chandler Carruth Date: Thu, 23 Jul 2015 08:03:44 +0000 (+0000) Subject: Revert r242990: "AVX-512: Implemented encoding , DAG lowering and ..." X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=045195799377589cfc6d40195ca9f6db8903deb6 Revert r242990: "AVX-512: Implemented encoding , DAG lowering and ..." This commit broke the build. Numerous build bots broken, and it was blocking my progress so reverting. It should be trivial to reproduce -- enable the BPF backend and it should fail when running llvm-tblgen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242992 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 7362c409c84..352a592bbd8 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -5816,550 +5816,6 @@ let TargetPrefix = "x86" in { llvm_i8_ty], [IntrReadArgMem]>; } - -// truncate -let TargetPrefix = "x86" in { - def int_x86_avx512_mask_pmov_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qb_128 : - GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qb_256 : - GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qb_512 : - GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qw_128 : - GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qw_256 : - GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qw_512 : - GCCBuiltin<"__builtin_ia32_pmovqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qw_512 : - GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qw_512 : - GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qd_128 : - GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qd_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qd_256 : - GCCBuiltin<"__builtin_ia32_pmovqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qd_256 : - GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qd_256 : - GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qd_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_qd_512 : - GCCBuiltin<"__builtin_ia32_pmovqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_qd_512 : - GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_qd_512 : - GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_qd_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_db_128 : - GCCBuiltin<"__builtin_ia32_pmovdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_db_128 : - GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_db_128 : - GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_db_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_db_256 : - GCCBuiltin<"__builtin_ia32_pmovdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_db_256 : - GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_db_256 : - GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_db_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_db_512 : - GCCBuiltin<"__builtin_ia32_pmovdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_db_512 : - GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_db_512 : - GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_db_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_dw_128 : - GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_dw_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_dw_256 : - GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_dw_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_dw_512 : - GCCBuiltin<"__builtin_ia32_pmovdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_dw_512 : - GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_dw_512 : - GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_dw_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovwb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_wb_128 : - GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_wb_mem_128 : - GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_wb_256 : - GCCBuiltin<"__builtin_ia32_pmovwb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_wb_256 : - GCCBuiltin<"__builtin_ia32_pmovswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_wb_256 : - GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_wb_mem_256 : - GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmov_wb_512 : - GCCBuiltin<"__builtin_ia32_pmovwb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmov_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovs_wb_512 : - GCCBuiltin<"__builtin_ia32_pmovswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovs_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrReadWriteArgMem]>; - def int_x86_avx512_mask_pmovus_wb_512 : - GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pmovus_wb_mem_512 : - GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrReadWriteArgMem]>; -} // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index fe5cee8d731..6c7eef14715 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -493,10 +493,9 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad, def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -// Do not use mld, mst directly. Use masked_store masked_load, masked_truncstore -def mst : SDNode<"ISD::MSTORE", SDTMaskedStore, +def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def mld : SDNode<"ISD::MLOAD", SDTMaskedLoad, +def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def masked_scatter : SDNode<"ISD::MSCATTER", SDTMaskedScatter, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -681,12 +680,6 @@ def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; }]>; -// masked load fragments. -def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (mld node:$src1, node:$src2, node:$src3), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; -}]>; - // extending load fragments. def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ return cast(N)->getExtensionType() == ISD::EXTLOAD; @@ -798,12 +791,6 @@ def store : PatFrag<(ops node:$val, node:$ptr), return !cast(N)->isTruncatingStore(); }]>; -// masked store fragments. -def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (mst node:$src1, node:$src2, node:$src3), [{ - return !cast(N)->isTruncatingStore(); -}]>; - // truncstore fragments. def truncstore : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr), [{ @@ -830,21 +817,6 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getMemoryVT() == MVT::f64; }]>; -def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; - -def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; - -def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; - // indexed store fragments. def istore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset), [{ @@ -919,27 +891,6 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), return cast(N)->getMemoryVT() == MVT::f32; }]>; -// masked truncstore fragments -def masked_truncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (mst node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isTruncatingStore(); -}]>; -def masked_truncstorevi8 : - PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_truncstore node:$src1, node:$src2, node:$src3), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def masked_truncstorevi16 : - PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_truncstore node:$src1, node:$src2, node:$src3), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def masked_truncstorevi32 : - PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_truncstore node:$src1, node:$src2, node:$src3), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; - // setcc convenience fragments. def setoeq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOEQ)>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 16aa96e8663..dc73cb2392f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1348,24 +1348,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); - setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); - setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); - setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); - if (Subtarget->hasVLX()){ - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); - setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); - setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); - setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); - setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); - - setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); - setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); - setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); - setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); - } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); @@ -1574,7 +1556,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); setOperationAction(ISD::SMAX, MVT::v64i8, Legal); setOperationAction(ISD::SMAX, MVT::v32i16, Legal); @@ -1585,11 +1566,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v64i8, Legal); setOperationAction(ISD::UMIN, MVT::v32i16, Legal); - setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); - setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); - if (Subtarget->hasVLX()) - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); - for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { const MVT VT = (MVT::SimpleValueType)i; @@ -12509,8 +12485,10 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { Subtarget->hasDQI() && Subtarget->hasVLX()) return Op; // legal, will go to VPMOVB2M, VPMOVQ2M } + if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) { + if (VT.getVectorElementType().getSizeInBits() >=8) + return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); - if (VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); unsigned NumElts = InVT.getVectorNumElements(); assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type"); @@ -12526,11 +12504,6 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::TESTM, DL, VT, And, And); } - // vpmovqb/w/d, vpmovdb/w, vpmovwb - if (((!InVT.is512BitVector() && Subtarget->hasVLX()) || InVT.is512BitVector()) && - (InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI())) - return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); - if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { @@ -15247,7 +15220,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, /// \brief Return (and \p Op, \p Mask) for compare instructions or /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the -/// necessary casting or extending for \p Mask when lowering masking intrinsics +/// necessary casting for \p Mask when lowering masking intrinsics. static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, @@ -15255,8 +15228,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, EVT VT = Op.getValueType(); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); - SDValue VMask = SDValue(); - unsigned OpcodeSelect = ISD::VSELECT; + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); SDLoc dl(Op); assert(MaskVT.isSimple() && "invalid mask type"); @@ -15264,20 +15237,11 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, if (isAllOnes(Mask)) return Op; - if (MaskVT.bitsGT(Mask.getValueType())) { - EVT newMaskVT = EVT::getIntegerVT(*DAG.getContext(), - MaskVT.getSizeInBits()); - VMask = DAG.getBitcast(MaskVT, - DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask)); - } else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements - // are extracted by EXTRACT_SUBVECTOR. - VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - } + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); switch (Op.getOpcode()) { default: break; @@ -15286,18 +15250,10 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, case X86ISD::CMPM: case X86ISD::CMPMU: return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - case X86ISD::VTRUNC: - case X86ISD::VTRUNCS: - case X86ISD::VTRUNCUS: - // We can't use ISD::VSELECT here because it is not always "Legal" - // for the destination type. For example vpmovqb require only AVX512 - // and vselect that can operate on byte element type require BWI - OpcodeSelect = X86ISD::SELECT; - break; } if (PreservedSrc.getOpcode() == ISD::UNDEF) PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); + return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc); } /// \brief Creates an SDNode for a predicated scalar operation. @@ -16155,45 +16111,6 @@ static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget, return Chain; } -/// \brief Lower intrinsics for TRUNCATE_TO_MEM case -/// return truncate Store/MaskedStore Node -static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op, - SelectionDAG &DAG, - MVT ElementType) { - SDLoc dl(Op); - SDValue Mask = Op.getOperand(4); - SDValue DataToTruncate = Op.getOperand(3); - SDValue Addr = Op.getOperand(2); - SDValue Chain = Op.getOperand(0); - - EVT VT = DataToTruncate.getValueType(); - EVT SVT = EVT::getVectorVT(*DAG.getContext(), - ElementType, VT.getVectorNumElements()); - - if (isAllOnes(Mask)) // return just a truncate store - return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, - MachinePointerInfo(), SVT, false, false, - SVT.getScalarSizeInBits()/8); - - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), - MVT::i1, VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements - // are extracted by EXTRACT_SUBVECTOR. - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOStore, SVT.getStoreSize(), - SVT.getScalarSizeInBits()/8); - - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, - VMask, SVT, MMO, true); -} - static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); @@ -16327,12 +16244,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); } - case TRUNCATE_TO_MEM_VI8: - return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8); - case TRUNCATE_TO_MEM_VI16: - return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16); - case TRUNCATE_TO_MEM_VI32: - return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32); case EXPAND_FROM_MEM: { SDLoc dl(Op); SDValue Mask = Op.getOperand(4); @@ -19043,8 +18954,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; - case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; - case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; + case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; @@ -24183,15 +24093,6 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) - return SDValue(); - // From, To sizes and ElemCount must be pow of two assert (isPowerOf2_32(NumElems * FromSz * ToSz) && "Unexpected size for truncating masked store"); @@ -24303,13 +24204,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, unsigned FromSz = VT.getVectorElementType().getSizeInBits(); unsigned ToSz = StVT.getVectorElementType().getSizeInBits(); - // The truncating store is legal in some cases. For example - // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw - // are designated for truncate store. - // In this case we don't need any further transformations. - if (TLI.isTruncStoreLegal(VT, StVT)) - return SDValue(); - // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue(); // We are going to use the original vector elt for storing. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3d058e8da13..7fab1938682 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -282,8 +282,9 @@ namespace llvm { // Vector integer truncate. VTRUNC, - // Vector integer truncate with unsigned/signed saturation. - VTRUNCUS, VTRUNCS, + + // Vector integer truncate with mask. + VTRUNCM, // Vector FP extend. VFPEXT, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5fd38ddb902..215dcebe661 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5571,217 +5571,82 @@ defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W // Integer truncate and extend operations //------------------------------------------------- -multiclass avx512_trunc_common opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, - X86MemOperand x86memop> { - - defm rr : AVX512_maskable, - EVEX, T8XS; - - // for intrinsic patter match - def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, - (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), - undef)), - (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , - SrcInfo.RC:$src1)>; - - def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, - (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), - DestInfo.ImmAllZerosV)), - (!cast(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask , - SrcInfo.RC:$src1)>; - - def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask, - (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), - DestInfo.RC:$src0)), - (!cast(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0, - DestInfo.KRCWM:$mask , - SrcInfo.RC:$src1)>; - - let mayStore = 1 in { - def mr : AVX512XS8I opc, string OpcodeStr, + RegisterClass dstRC, RegisterClass srcRC, + RegisterClass KRC, X86MemOperand x86memop> { + def rr : AVX512XS8I, EVEX; - def mrk : AVX512XS8I, EVEX, EVEX_K; - }//mayStore = 1 -} - -multiclass avx512_trunc_mr_lowering { - - def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), - (!cast(NAME#SrcInfo.ZSuffix##mr) - addr:$dst, SrcInfo.RC:$src)>; - - def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask, - (SrcInfo.VT SrcInfo.RC:$src)), - (!cast(NAME#SrcInfo.ZSuffix##mrk) - addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; -} - -multiclass avx512_trunc_sat_mr_lowering { - - def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), - (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, - (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), - (SrcInfo.VT SrcInfo.RC:$src))>; - - def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), - (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, - (SrcInfo.VT SrcInfo.RC:$src))>; -} - -multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, - X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, - X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, - X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, - Predicate prd = HasAVX512>{ - - let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, - avx512_trunc_mr_lowering, EVEX_V128; - - defm Z256: avx512_trunc_common, - avx512_trunc_mr_lowering, EVEX_V256; - } - let Predicates = [prd] in - defm Z: avx512_trunc_common, - avx512_trunc_mr_lowering, EVEX_V512; -} - -multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, - X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, - X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, - X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ - - let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V128; - - defm Z256: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V256; - } - let Predicates = [prd] in - defm Z: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V512; -} - -multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; -} -multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; -} - -multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; -} -multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; -} -multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; -} -multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; -} + def rrkz : AVX512XS8I, EVEX, EVEX_KZ; -multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; -} -multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; -} + def mr : AVX512XS8I, EVEX; -multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; -} -multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; -} + def mrk : AVX512XS8I, EVEX, EVEX_K; -multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } -multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; -} - -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; -defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; -defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; - -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; -defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; -defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; - -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; -defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; -defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; - -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; -defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; -defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; - -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; -defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; -defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; +defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; +defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM, + i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>; +defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM, + i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>; +defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM, + i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>; +defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; +defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM, + i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>; + +def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>; +def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>; +def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>; +def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; +def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; + +def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; +def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), + (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; +def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; +def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), + (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; -defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; -defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; multiclass avx512_extend_common opc, string OpcodeStr, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index bf999dce047..401b3267368 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -114,17 +114,19 @@ def X86vsext : SDNode<"X86ISD::VSEXT", SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>]>>; -def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisInt<0>, SDTCisInt<1>, - SDTCisOpSmallerThanOp<0, 1>]>; - -def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>; -def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>; -def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>; - +def X86vtrunc : SDNode<"X86ISD::VTRUNC", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisOpSmallerThanOp<0, 1>]>>; def X86trunc : SDNode<"X86ISD::TRUNC", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<0, 1>]>>; + +def X86vtruncm : SDNode<"X86ISD::VTRUNCM", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisInt<0>, SDTCisInt<1>, + SDTCisVec<2>, SDTCisInt<2>, + SDTCisOpSmallerThanOp<0, 2>]>>; def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisFP<1>, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index a8ad8deec5d..1383fa37306 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,12 +21,10 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, - INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, - TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND }; @@ -140,42 +138,6 @@ static const IntrinsicData IntrinsicsWithChain[] = { EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8, - X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), @@ -851,114 +813,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNC, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), - X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK, - X86ISD::VTRUNCUS, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK, diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll deleted file mode 100644 index aa1dd4928c3..00000000000 --- a/test/CodeGen/X86/avx512-ext.ll +++ /dev/null @@ -1,937 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX - - ;SKX-LABEL: zext_8x8mem_to_8x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = zext <8 x i8> %a to <8 x i16> - %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer - ret <8 x i16> %ret -} - -;SKX-LABEL: sext_8x8mem_to_8x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = sext <8 x i8> %a to <8 x i16> - %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer - ret <8 x i16> %ret -} - -;SKX-LABEL: zext_16x8mem_to_16x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i8>,<16 x i8> *%i,align 1 - %x = zext <16 x i8> %a to <16 x i16> - %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer - ret <16 x i16> %ret -} - -;SKX-LABEL: sext_16x8mem_to_16x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i8>,<16 x i8> *%i,align 1 - %x = sext <16 x i8> %a to <16 x i16> - %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer - ret <16 x i16> %ret -} - -;SKX-LABEL: zext_16x8_to_16x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 -;SKX-NEXT: retq -define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { - %x = zext <16 x i8> %a to <16 x i16> - ret <16 x i16> %x -} - -;SKX-LABEL: zext_16x8_to_16x16_mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm1, %k1 -;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { - %x = zext <16 x i8> %a to <16 x i16> - %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer - ret <16 x i16> %ret -} - -;SKX-LABEL: sext_16x8_to_16x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 -;SKX-NEXT: retq -define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { - %x = sext <16 x i8> %a to <16 x i16> - ret <16 x i16> %x -} - -;SKX-LABEL: sext_16x8_to_16x16_mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm1, %k1 -;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { - %x = sext <16 x i8> %a to <16 x i16> - %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer - ret <16 x i16> %ret -} - -;SKX-LABEL: zext_32x8mem_to_32x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %ymm0, %k1 -;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { - %a = load <32 x i8>,<32 x i8> *%i,align 1 - %x = zext <32 x i8> %a to <32 x i16> - %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer - ret <32 x i16> %ret -} - -;SKX-LABEL: sext_32x8mem_to_32x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %ymm0, %k1 -;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { - %a = load <32 x i8>,<32 x i8> *%i,align 1 - %x = sext <32 x i8> %a to <32 x i16> - %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer - ret <32 x i16> %ret -} - -;SKX-LABEL: zext_32x8_to_32x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 -;SKX-NEXT: retq -define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { - %x = zext <32 x i8> %a to <32 x i16> - ret <32 x i16> %x -} - -;SKX-LABEL: zext_32x8_to_32x16_mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %ymm1, %k1 -;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { - %x = zext <32 x i8> %a to <32 x i16> - %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer - ret <32 x i16> %ret -} - -;SKX-LABEL: sext_32x8_to_32x16: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 -;SKX-NEXT: retq -define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { - %x = sext <32 x i8> %a to <32 x i16> - ret <32 x i16> %x -} - -;SKX-LABEL: sext_32x8_to_32x16_mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %ymm1, %k1 -;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { - %x = sext <32 x i8> %a to <32 x i16> - %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer - ret <32 x i16> %ret -} - -;SKX-LABEL: zext_4x8mem_to_4x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i8>,<4 x i8> *%i,align 1 - %x = zext <4 x i8> %a to <4 x i32> - %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer - ret <4 x i32> %ret -} - -;SKX-LABEL: sext_4x8mem_to_4x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i8>,<4 x i8> *%i,align 1 - %x = sext <4 x i8> %a to <4 x i32> - %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer - ret <4 x i32> %ret -} - -;SKX-LABEL: zext_8x8mem_to_8x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = zext <8 x i8> %a to <8 x i32> - %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer - ret <8 x i32> %ret -} - -;SKX-LABEL: sext_8x8mem_to_8x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = sext <8 x i8> %a to <8 x i32> - %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer - ret <8 x i32> %ret -} - -;KNL-LABEL: zext_16x8mem_to_16x32: -;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i8>,<16 x i8> *%i,align 1 - %x = zext <16 x i8> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;KNL-LABEL: sext_16x8mem_to_16x32: -;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i8>,<16 x i8> *%i,align 1 - %x = sext <16 x i8> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;KNL-LABEL: zext_16x8_to_16x32_mask: -;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { - %x = zext <16 x i8> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;KNL-LABEL: sext_16x8_to_16x32_mask: -;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { - %x = sext <16 x i8> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -; KNL-LABEL: zext_16x8_to_16x32 -; KNL: vpmovzxbd {{.*}}%zmm -; KNL: ret -define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { - %x = zext <16 x i8> %i to <16 x i32> - ret <16 x i32> %x -} - -; KNL-LABEL: sext_16x8_to_16x32 -; KNL: vpmovsxbd {{.*}}%zmm -; KNL: ret -define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { - %x = sext <16 x i8> %i to <16 x i32> - ret <16 x i32> %x -} - -;SKX-LABEL: zext_2x8mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i8>,<2 x i8> *%i,align 1 - %x = zext <2 x i8> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} -;SKX-LABEL: sext_2x8mem_to_2x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i8>,<2 x i8> *%i,align 1 - %x = sext <2 x i8> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} -;SKX-LABEL: sext_2x8mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 -;SKX-NEXT: retq -define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { - %a = load <2 x i8>,<2 x i8> *%i,align 1 - %x = sext <2 x i8> %a to <2 x i64> - ret <2 x i64> %x -} - -;SKX-LABEL: zext_4x8mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i8>,<4 x i8> *%i,align 1 - %x = zext <4 x i8> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x8mem_to_4x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i8>,<4 x i8> *%i,align 1 - %x = sext <4 x i8> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x8mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 -;SKX-NEXT: retq -define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { - %a = load <4 x i8>,<4 x i8> *%i,align 1 - %x = sext <4 x i8> %a to <4 x i64> - ret <4 x i64> %x -} - -;KNL-LABEL: zext_8x8mem_to_8x64: -;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = zext <8 x i8> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;KNL-LABEL: sext_8x8mem_to_8x64mask: -;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z} -;KNL-NEXT: retq -define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = sext <8 x i8> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;KNL-LABEL: sext_8x8mem_to_8x64: -;KNL: vpmovsxbq (%rdi), %zmm0 -;KNL-NEXT: retq -define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { - %a = load <8 x i8>,<8 x i8> *%i,align 1 - %x = sext <8 x i8> %a to <8 x i64> - ret <8 x i64> %x -} - -;SKX-LABEL: zext_4x16mem_to_4x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = zext <4 x i16> %a to <4 x i32> - %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer - ret <4 x i32> %ret -} - -;SKX-LABEL: sext_4x16mem_to_4x32mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = sext <4 x i16> %a to <4 x i32> - %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer - ret <4 x i32> %ret -} - -;SKX-LABEL: sext_4x16mem_to_4x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 -;SKX-NEXT: retq -define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = sext <4 x i16> %a to <4 x i32> - ret <4 x i32> %x -} - - -;SKX-LABEL: zext_8x16mem_to_8x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = zext <8 x i16> %a to <8 x i32> - %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer - ret <8 x i32> %ret -} - -;SKX-LABEL: sext_8x16mem_to_8x32mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = sext <8 x i16> %a to <8 x i32> - %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer - ret <8 x i32> %ret -} - -;SKX-LABEL: sext_8x16mem_to_8x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 -;SKX-NEXT: retq -define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = sext <8 x i16> %a to <8 x i32> - ret <8 x i32> %x -} - -;SKX-LABEL: zext_8x16_to_8x32mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm1, %k1 -;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { - %x = zext <8 x i16> %a to <8 x i32> - %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer - ret <8 x i32> %ret -} - -;SKX-LABEL: zext_8x16_to_8x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 -;SKX-NEXT: retq -define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { - %x = zext <8 x i16> %a to <8 x i32> - ret <8 x i32> %x -} - -;SKX-LABEL: zext_16x16mem_to_16x32: -;KNL-LABEL: zext_16x16mem_to_16x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z} -;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i16>,<16 x i16> *%i,align 1 - %x = zext <16 x i16> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;SKX-LABEL: sext_16x16mem_to_16x32mask: -;KNL-LABEL: sext_16x16mem_to_16x32mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} -;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { - %a = load <16 x i16>,<16 x i16> *%i,align 1 - %x = sext <16 x i16> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;SKX-LABEL: sext_16x16mem_to_16x32: -;KNL-LABEL: sext_16x16mem_to_16x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 -;KNL: vpmovsxwd (%rdi), %zmm0 -;SKX-NEXT: retq -define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { - %a = load <16 x i16>,<16 x i16> *%i,align 1 - %x = sext <16 x i16> %a to <16 x i32> - ret <16 x i32> %x -} -;SKX-LABEL: zext_16x16_to_16x32mask: -;KNL-LABEL: zext_16x16_to_16x32mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovb2m %xmm1, %k1 -;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} -;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { - %x = zext <16 x i16> %a to <16 x i32> - %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer - ret <16 x i32> %ret -} - -;SKX-LABEL: zext_16x16_to_16x32: -;KNL-LABEL: zext_16x16_to_16x32: -;SKX: ## BB#0: -;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 -;KNL: vpmovzxwd %ymm0, %zmm0 -;SKX-NEXT: retq -define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { - %x = zext <16 x i16> %a to <16 x i32> - ret <16 x i32> %x -} - -;SKX-LABEL: zext_2x16mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i16>,<2 x i16> *%i,align 1 - %x = zext <2 x i16> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} - -;SKX-LABEL: sext_2x16mem_to_2x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i16>,<2 x i16> *%i,align 1 - %x = sext <2 x i16> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} - -;SKX-LABEL: sext_2x16mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 -;SKX-NEXT: retq -define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { - %a = load <2 x i16>,<2 x i16> *%i,align 1 - %x = sext <2 x i16> %a to <2 x i64> - ret <2 x i64> %x -} - -;SKX-LABEL: zext_4x16mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = zext <4 x i16> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x16mem_to_4x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = sext <4 x i16> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x16mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 -;SKX-NEXT: retq -define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { - %a = load <4 x i16>,<4 x i16> *%i,align 1 - %x = sext <4 x i16> %a to <4 x i64> - ret <4 x i64> %x -} - -;SKX-LABEL: zext_8x16mem_to_8x64: -;KNL-LABEL: zext_8x16mem_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z} -;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = zext <8 x i16> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;SKX-LABEL: sext_8x16mem_to_8x64mask: -;KNL-LABEL: sext_8x16mem_to_8x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} -;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = sext <8 x i16> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;SKX-LABEL: sext_8x16mem_to_8x64: -;KNL-LABEL: sext_8x16mem_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 -;KNL: vpmovsxwq (%rdi), %zmm0 -;SKX-NEXT: retq -define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { - %a = load <8 x i16>,<8 x i16> *%i,align 1 - %x = sext <8 x i16> %a to <8 x i64> - ret <8 x i64> %x -} - -;SKX-LABEL: zext_8x16_to_8x64mask: -;KNL-LABEL: zext_8x16_to_8x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm1, %k1 -;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} -;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { - %x = zext <8 x i16> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;SKX-LABEL: zext_8x16_to_8x64: -;KNL-LABEL: zext_8x16_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 -;KNL: vpmovzxwq %xmm0, %zmm0 -;SKX-NEXT: retq -; KNL: ret -define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { - %ret = zext <8 x i16> %a to <8 x i64> - ret <8 x i64> %ret -} - -;SKX-LABEL: zext_2x32mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i32>,<2 x i32> *%i,align 1 - %x = zext <2 x i32> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} - -;SKX-LABEL: sext_2x32mem_to_2x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovq2m %xmm0, %k1 -;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} -;SKX-NEXT: retq -define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { - %a = load <2 x i32>,<2 x i32> *%i,align 1 - %x = sext <2 x i32> %a to <2 x i64> - %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer - ret <2 x i64> %ret -} - -;SKX-LABEL: sext_2x32mem_to_2x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 -;SKX-NEXT: retq -define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { - %a = load <2 x i32>,<2 x i32> *%i,align 1 - %x = sext <2 x i32> %a to <2 x i64> - ret <2 x i64> %x -} - -;SKX-LABEL: zext_4x32mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i32>,<4 x i32> *%i,align 1 - %x = zext <4 x i32> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x32mem_to_4x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm0, %k1 -;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { - %a = load <4 x i32>,<4 x i32> *%i,align 1 - %x = sext <4 x i32> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: sext_4x32mem_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 -;SKX-NEXT: retq -define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { - %a = load <4 x i32>,<4 x i32> *%i,align 1 - %x = sext <4 x i32> %a to <4 x i64> - ret <4 x i64> %x -} - -;SKX-LABEL: sext_4x32_to_4x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxdq %xmm0, %ymm0 -;SKX-NEXT: retq -define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { - %x = sext <4 x i32> %a to <4 x i64> - ret <4 x i64> %x -} - -;SKX-LABEL: zext_4x32_to_4x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovd2m %xmm1, %k1 -;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} -;SKX-NEXT: retq -define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { - %x = zext <4 x i32> %a to <4 x i64> - %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer - ret <4 x i64> %ret -} - -;SKX-LABEL: zext_8x32mem_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i32>,<8 x i32> *%i,align 1 - %x = zext <8 x i32> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;SKX-LABEL: sext_8x32mem_to_8x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm0, %k1 -;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { - %a = load <8 x i32>,<8 x i32> *%i,align 1 - %x = sext <8 x i32> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} - -;SKX-LABEL: sext_8x32mem_to_8x64: -;KNL-LABEL: sext_8x32mem_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 -;KNL: vpmovsxdq (%rdi), %zmm0 -;SKX-NEXT: retq -define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { - %a = load <8 x i32>,<8 x i32> *%i,align 1 - %x = sext <8 x i32> %a to <8 x i64> - ret <8 x i64> %x -} - -;SKX-LABEL: sext_8x32_to_8x64: -;KNL-LABEL: sext_8x32_to_8x64: -;SKX: ## BB#0: -;SKX-NEXT: vpmovsxdq %ymm0, %zmm0 -;KNL: vpmovsxdq %ymm0, %zmm0 -;SKX-NEXT: retq -define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { - %x = sext <8 x i32> %a to <8 x i64> - ret <8 x i64> %x -} - -;SKX-LABEL: zext_8x32_to_8x64mask: -;KNL-LABEL: zext_8x32_to_8x64mask: -;SKX: ## BB#0: -;SKX-NEXT: vpmovw2m %xmm1, %k1 -;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} -;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z} -;SKX-NEXT: retq -define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { - %x = zext <8 x i32> %a to <8 x i64> - %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer - ret <8 x i64> %ret -} -;KNL-LABEL: fptrunc_test -;KNL: vcvtpd2ps {{.*}}%zmm -;KNL: ret -define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { - %b = fptrunc <8 x double> %a to <8 x float> - ret <8 x float> %b -} - -;KNL-LABEL: fpext_test -;KNL: vcvtps2pd {{.*}}%zmm -;KNL: ret -define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { - %b = fpext <8 x float> %a to <8 x double> - ret <8 x double> %b -} - -; KNL-LABEL: zext_16i1_to_16xi32 -; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z} -; KNL: ret -define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { - %a = bitcast i16 %b to <16 x i1> - %c = zext <16 x i1> %a to <16 x i32> - ret <16 x i32> %c -} - -; KNL-LABEL: zext_8i1_to_8xi64 -; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} -; KNL: ret -define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { - %a = bitcast i8 %b to <8 x i1> - %c = zext <8 x i1> %a to <8 x i64> - ret <8 x i64> %c -} - -; KNL-LABEL: trunc_16i8_to_16i1 -; KNL: vpmovsxbd -; KNL: vpandd -; KNL: vptestmd -; KNL: ret -; SKX-LABEL: trunc_16i8_to_16i1 -; SKX: vpmovb2m %xmm -define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { - %mask_b = trunc <16 x i8>%a to <16 x i1> - %mask = bitcast <16 x i1> %mask_b to i16 - ret i16 %mask -} - -; KNL-LABEL: trunc_16i32_to_16i1 -; KNL: vpandd -; KNL: vptestmd -; KNL: ret -; SKX-LABEL: trunc_16i32_to_16i1 -; SKX: vpmovd2m %zmm -define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { - %mask_b = trunc <16 x i32>%a to <16 x i1> - %mask = bitcast <16 x i1> %mask_b to i16 - ret i16 %mask -} - -; SKX-LABEL: trunc_4i32_to_4i1 -; SKX: vpmovd2m %xmm -; SKX: kandw -; SKX: vpmovm2d -define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { - %mask_a = trunc <4 x i32>%a to <4 x i1> - %mask_b = trunc <4 x i32>%b to <4 x i1> - %a_and_b = and <4 x i1>%mask_a, %mask_b - %res = sext <4 x i1>%a_and_b to <4 x i32> - ret <4 x i32>%res -} - -; KNL-LABEL: trunc_8i16_to_8i1 -; KNL: vpmovsxwq -; KNL: vpandq LCP{{.*}}(%rip){1to8} -; KNL: vptestmq -; KNL: ret - -; SKX-LABEL: trunc_8i16_to_8i1 -; SKX: vpmovw2m %xmm -define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { - %mask_b = trunc <8 x i16>%a to <8 x i1> - %mask = bitcast <8 x i1> %mask_b to i8 - ret i8 %mask -} - -; KNL-LABEL: sext_8i1_8i32 -; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} -; SKX: vpmovm2d -; KNL: ret -define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { - %x = icmp slt <8 x i32> %a1, %a2 - %x1 = xor <8 x i1>%x, - %y = sext <8 x i1> %x1 to <8 x i32> - ret <8 x i32> %y -} - - -; KNL-LABEL: trunc_i32_to_i1 -; KNL: movw $-4, %ax -; KNL: kmovw %eax, %k1 -; KNL: korw -define i16 @trunc_i32_to_i1(i32 %a) { - %a_i = trunc i32 %a to i1 - %maskv = insertelement <16 x i1> , i1 %a_i, i32 0 - %res = bitcast <16 x i1> %maskv to i16 - ret i16 %res -} - -; KNL-LABEL: sext_8i1_8i16 -; SKX: vpmovm2w -; KNL: ret -define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { - %x = icmp slt <8 x i32> %a1, %a2 - %y = sext <8 x i1> %x to <8 x i16> - ret <8 x i16> %y -} - -; KNL-LABEL: sext_16i1_16i32 -; SKX: vpmovm2d -; KNL: ret -define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { - %x = icmp slt <16 x i32> %a1, %a2 - %y = sext <16 x i1> %x to <16 x i32> - ret <16 x i32> %y -} - -; KNL-LABEL: sext_8i1_8i64 -; SKX: vpmovm2q -; KNL: ret -define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { - %x = icmp slt <8 x i32> %a1, %a2 - %y = sext <8 x i1> %x to <8 x i64> - ret <8 x i64> %y -} - -; KNL-LABEL: @extload_v8i64 -; KNL: vpmovsxbq -define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { - %sign_load = load <8 x i8>, <8 x i8>* %a - %c = sext <8 x i8> %sign_load to <8 x i64> - store <8 x i64> %c, <8 x i64>* %res - ret void -} - -;SKX-LABEL: test21: -;SKX: vmovdqu16 %zmm0, %zmm3 {%k1} -;SKX-NEXT: kshiftrq $32, %k1, %k1 -;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} -define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { - %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer - ret <64 x i16> %ret -} - diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 7c30063ce28..6e50fda7467 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -3119,396 +3119,6 @@ define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 ret <16 x float> %res2 } -declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512: -; CHECK: vpmovqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512: -; CHECK: vpmovqb %zmm0, (%rdi) -; CHECK: vpmovqb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512: -; CHECK: vpmovsqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: -; CHECK: vpmovsqb %zmm0, (%rdi) -; CHECK: vpmovsqb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512: -; CHECK: vpmovusqb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: -; CHECK: vpmovusqb %zmm0, (%rdi) -; CHECK: vpmovusqb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512: -; CHECK: vpmovqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqw %zmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512: -; CHECK: vpmovqw %zmm0, (%rdi) -; CHECK: vpmovqw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512: -; CHECK: vpmovsqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqw %zmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: -; CHECK: vpmovsqw %zmm0, (%rdi) -; CHECK: vpmovsqw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512: -; CHECK: vpmovusqw %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqw %zmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: -; CHECK: vpmovusqw %zmm0, (%rdi) -; CHECK: vpmovusqw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512: -; CHECK: vpmovqd %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovqd %zmm0, %ymm0 - %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) - %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) - %res3 = add <8 x i32> %res0, %res1 - %res4 = add <8 x i32> %res3, %res2 - ret <8 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512: -; CHECK: vpmovqd %zmm0, (%rdi) -; CHECK: vpmovqd %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512: -; CHECK: vpmovsqd %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsqd %zmm0, %ymm0 - %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) - %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) - %res3 = add <8 x i32> %res0, %res1 - %res4 = add <8 x i32> %res3, %res2 - ret <8 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: -; CHECK: vpmovsqd %zmm0, (%rdi) -; CHECK: vpmovsqd %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512: -; CHECK: vpmovusqd %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovusqd %zmm0, %ymm0 - %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) - %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) - %res3 = add <8 x i32> %res0, %res1 - %res4 = add <8 x i32> %res3, %res2 - ret <8 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: -; CHECK: vpmovusqd %zmm0, (%rdi) -; CHECK: vpmovusqd %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512: -; CHECK: vpmovdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovdb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512: -; CHECK: vpmovdb %zmm0, (%rdi) -; CHECK: vpmovdb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512: -; CHECK: vpmovsdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsdb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: -; CHECK: vpmovsdb %zmm0, (%rdi) -; CHECK: vpmovsdb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512: -; CHECK: vpmovusdb %zmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusdb %zmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: -; CHECK: vpmovusdb %zmm0, (%rdi) -; CHECK: vpmovusdb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - -declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) - -define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512: -; CHECK: vpmovdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 - %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) - %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) - %res3 = add <16 x i16> %res0, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512: -; CHECK: vpmovdw %zmm0, (%rdi) -; CHECK: vpmovdw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - -declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) - -define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512: -; CHECK: vpmovsdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovsdw %zmm0, %ymm0 - %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) - %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) - %res3 = add <16 x i16> %res0, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: -; CHECK: vpmovsdw %zmm0, (%rdi) -; CHECK: vpmovsdw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - -declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) - -define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512: -; CHECK: vpmovusdw %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovusdw %zmm0, %ymm0 - %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) - %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) - %res3 = add <16 x i16> %res0, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) - -define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: -; CHECK: vpmovusdw %zmm0, (%rdi) -; CHECK: vpmovusdw %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2) - ret void -} - declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) { diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll new file mode 100644 index 00000000000..f25458972e4 --- /dev/null +++ b/test/CodeGen/X86/avx512-trunc-ext.ll @@ -0,0 +1,961 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX + + +; KNL-LABEL: trunc_16x32_to_16x8 +; KNL: vpmovdb +; KNL: ret +define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone { + %x = trunc <16 x i32> %i to <16 x i8> + ret <16 x i8> %x +} + +; KNL-LABEL: trunc_8x64_to_8x16 +; KNL: vpmovqw +; KNL: ret +define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone { + %x = trunc <8 x i64> %i to <8 x i16> + ret <8 x i16> %x +} + +;SKX-LABEL: zext_8x8mem_to_8x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i16> + %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer + ret <8 x i16> %ret +} + +;SKX-LABEL: sext_8x8mem_to_8x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i16> + %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer + ret <8 x i16> %ret +} + +;SKX-LABEL: zext_16x8mem_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = zext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: sext_16x8mem_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = sext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: zext_16x8_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 +;SKX-NEXT: retq +define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i16> + ret <16 x i16> %x +} + +;SKX-LABEL: zext_16x8_to_16x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: sext_16x8_to_16x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 +;SKX-NEXT: retq +define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i16> + ret <16 x i16> %x +} + +;SKX-LABEL: sext_16x8_to_16x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i16> + %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer + ret <16 x i16> %ret +} + +;SKX-LABEL: zext_32x8mem_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm0, %k1 +;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { + %a = load <32 x i8>,<32 x i8> *%i,align 1 + %x = zext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: sext_32x8mem_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm0, %k1 +;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { + %a = load <32 x i8>,<32 x i8> *%i,align 1 + %x = sext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: zext_32x8_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 +;SKX-NEXT: retq +define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { + %x = zext <32 x i8> %a to <32 x i16> + ret <32 x i16> %x +} + +;SKX-LABEL: zext_32x8_to_32x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm1, %k1 +;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { + %x = zext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: sext_32x8_to_32x16: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 +;SKX-NEXT: retq +define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { + %x = sext <32 x i8> %a to <32 x i16> + ret <32 x i16> %x +} + +;SKX-LABEL: sext_32x8_to_32x16_mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %ymm1, %k1 +;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { + %x = sext <32 x i8> %a to <32 x i16> + %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer + ret <32 x i16> %ret +} + +;SKX-LABEL: zext_4x8mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = zext <4 x i8> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: zext_8x8mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x8mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;KNL-LABEL: zext_16x8mem_to_16x32: +;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = zext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: sext_16x8mem_to_16x32: +;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i8>,<16 x i8> *%i,align 1 + %x = sext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: zext_16x8_to_16x32_mask: +;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { + %x = zext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;KNL-LABEL: sext_16x8_to_16x32_mask: +;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { + %x = sext <16 x i8> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +; KNL-LABEL: zext_16x8_to_16x32 +; KNL: vpmovzxbd {{.*}}%zmm +; KNL: ret +define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { + %x = zext <16 x i8> %i to <16 x i32> + ret <16 x i32> %x +} + +; KNL-LABEL: sext_16x8_to_16x32 +; KNL: vpmovsxbd {{.*}}%zmm +; KNL: ret +define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { + %x = sext <16 x i8> %i to <16 x i32> + ret <16 x i32> %x +} + +;SKX-LABEL: zext_2x8mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = zext <2 x i8> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} +;SKX-LABEL: sext_2x8mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = sext <2 x i8> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} +;SKX-LABEL: sext_2x8mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { + %a = load <2 x i8>,<2 x i8> *%i,align 1 + %x = sext <2 x i8> %a to <2 x i64> + ret <2 x i64> %x +} + +;SKX-LABEL: zext_4x8mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = zext <4 x i8> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x8mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { + %a = load <4 x i8>,<4 x i8> *%i,align 1 + %x = sext <4 x i8> %a to <4 x i64> + ret <4 x i64> %x +} + +;KNL-LABEL: zext_8x8mem_to_8x64: +;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = zext <8 x i8> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;KNL-LABEL: sext_8x8mem_to_8x64mask: +;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z} +;KNL-NEXT: retq +define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;KNL-LABEL: sext_8x8mem_to_8x64: +;KNL: vpmovsxbq (%rdi), %zmm0 +;KNL-NEXT: retq +define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { + %a = load <8 x i8>,<8 x i8> *%i,align 1 + %x = sext <8 x i8> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: zext_4x16mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = zext <4 x i16> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i32> + %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer + ret <4 x i32> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 +;SKX-NEXT: retq +define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i32> + ret <4 x i32> %x +} + + +;SKX-LABEL: zext_8x16mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = zext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 +;SKX-NEXT: retq +define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i32> + ret <8 x i32> %x +} + +;SKX-LABEL: zext_8x16_to_8x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i32> + %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + ret <8 x i32> %ret +} + +;SKX-LABEL: zext_8x16_to_8x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 +;SKX-NEXT: retq +define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i32> + ret <8 x i32> %x +} + +;SKX-LABEL: zext_16x16mem_to_16x32: +;KNL-LABEL: zext_16x16mem_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = zext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: sext_16x16mem_to_16x32mask: +;KNL-LABEL: sext_16x16mem_to_16x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = sext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: sext_16x16mem_to_16x32: +;KNL-LABEL: sext_16x16mem_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 +;KNL: vpmovsxwd (%rdi), %zmm0 +;SKX-NEXT: retq +define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { + %a = load <16 x i16>,<16 x i16> *%i,align 1 + %x = sext <16 x i16> %a to <16 x i32> + ret <16 x i32> %x +} +;SKX-LABEL: zext_16x16_to_16x32mask: +;KNL-LABEL: zext_16x16_to_16x32mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovb2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} +;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { + %x = zext <16 x i16> %a to <16 x i32> + %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer + ret <16 x i32> %ret +} + +;SKX-LABEL: zext_16x16_to_16x32: +;KNL-LABEL: zext_16x16_to_16x32: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 +;KNL: vpmovzxwd %ymm0, %zmm0 +;SKX-NEXT: retq +define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { + %x = zext <16 x i16> %a to <16 x i32> + ret <16 x i32> %x +} + +;SKX-LABEL: zext_2x16mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = zext <2 x i16> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x16mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = sext <2 x i16> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x16mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { + %a = load <2 x i16>,<2 x i16> *%i,align 1 + %x = sext <2 x i16> %a to <2 x i64> + ret <2 x i64> %x +} + +;SKX-LABEL: zext_4x16mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = zext <4 x i16> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x16mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { + %a = load <4 x i16>,<4 x i16> *%i,align 1 + %x = sext <4 x i16> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: zext_8x16mem_to_8x64: +;KNL-LABEL: zext_8x16mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = zext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x64mask: +;KNL-LABEL: sext_8x16mem_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x16mem_to_8x64: +;KNL-LABEL: sext_8x16mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 +;KNL: vpmovsxwq (%rdi), %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { + %a = load <8 x i16>,<8 x i16> *%i,align 1 + %x = sext <8 x i16> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: zext_8x16_to_8x64mask: +;KNL-LABEL: zext_8x16_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} +;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i16> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: zext_8x16_to_8x64: +;KNL-LABEL: zext_8x16_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 +;KNL: vpmovzxwq %xmm0, %zmm0 +;SKX-NEXT: retq +; KNL: ret +define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { + %ret = zext <8 x i16> %a to <8 x i64> + ret <8 x i64> %ret +} + +;SKX-LABEL: zext_2x32mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = zext <2 x i32> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x32mem_to_2x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovq2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} +;SKX-NEXT: retq +define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = sext <2 x i32> %a to <2 x i64> + %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer + ret <2 x i64> %ret +} + +;SKX-LABEL: sext_2x32mem_to_2x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 +;SKX-NEXT: retq +define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { + %a = load <2 x i32>,<2 x i32> *%i,align 1 + %x = sext <2 x i32> %a to <2 x i64> + ret <2 x i64> %x +} + +;SKX-LABEL: zext_4x32mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = zext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x32mem_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = sext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: sext_4x32mem_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { + %a = load <4 x i32>,<4 x i32> *%i,align 1 + %x = sext <4 x i32> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: sext_4x32_to_4x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq %xmm0, %ymm0 +;SKX-NEXT: retq +define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { + %x = sext <4 x i32> %a to <4 x i64> + ret <4 x i64> %x +} + +;SKX-LABEL: zext_4x32_to_4x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovd2m %xmm1, %k1 +;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} +;SKX-NEXT: retq +define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { + %x = zext <4 x i32> %a to <4 x i64> + %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer + ret <4 x i64> %ret +} + +;SKX-LABEL: zext_8x32mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = zext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x32mem_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm0, %k1 +;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = sext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} + +;SKX-LABEL: sext_8x32mem_to_8x64: +;KNL-LABEL: sext_8x32mem_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 +;KNL: vpmovsxdq (%rdi), %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { + %a = load <8 x i32>,<8 x i32> *%i,align 1 + %x = sext <8 x i32> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: sext_8x32_to_8x64: +;KNL-LABEL: sext_8x32_to_8x64: +;SKX: ## BB#0: +;SKX-NEXT: vpmovsxdq %ymm0, %zmm0 +;KNL: vpmovsxdq %ymm0, %zmm0 +;SKX-NEXT: retq +define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { + %x = sext <8 x i32> %a to <8 x i64> + ret <8 x i64> %x +} + +;SKX-LABEL: zext_8x32_to_8x64mask: +;KNL-LABEL: zext_8x32_to_8x64mask: +;SKX: ## BB#0: +;SKX-NEXT: vpmovw2m %xmm1, %k1 +;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} +;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z} +;SKX-NEXT: retq +define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { + %x = zext <8 x i32> %a to <8 x i64> + %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer + ret <8 x i64> %ret +} +;KNL-LABEL: fptrunc_test +;KNL: vcvtpd2ps {{.*}}%zmm +;KNL: ret +define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { + %b = fptrunc <8 x double> %a to <8 x float> + ret <8 x float> %b +} + +;KNL-LABEL: fpext_test +;KNL: vcvtps2pd {{.*}}%zmm +;KNL: ret +define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { + %b = fpext <8 x float> %a to <8 x double> + ret <8 x double> %b +} + +; KNL-LABEL: zext_16i1_to_16xi32 +; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; KNL: ret +define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { + %a = bitcast i16 %b to <16 x i1> + %c = zext <16 x i1> %a to <16 x i32> + ret <16 x i32> %c +} + +; KNL-LABEL: zext_8i1_to_8xi64 +; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; KNL: ret +define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { + %a = bitcast i8 %b to <8 x i1> + %c = zext <8 x i1> %a to <8 x i64> + ret <8 x i64> %c +} + +; KNL-LABEL: trunc_16i8_to_16i1 +; KNL: vpmovsxbd +; KNL: vpandd +; KNL: vptestmd +; KNL: ret +; SKX-LABEL: trunc_16i8_to_16i1 +; SKX: vpmovb2m %xmm +define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { + %mask_b = trunc <16 x i8>%a to <16 x i1> + %mask = bitcast <16 x i1> %mask_b to i16 + ret i16 %mask +} + +; KNL-LABEL: trunc_16i32_to_16i1 +; KNL: vpandd +; KNL: vptestmd +; KNL: ret +; SKX-LABEL: trunc_16i32_to_16i1 +; SKX: vpmovd2m %zmm +define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { + %mask_b = trunc <16 x i32>%a to <16 x i1> + %mask = bitcast <16 x i1> %mask_b to i16 + ret i16 %mask +} + +; SKX-LABEL: trunc_4i32_to_4i1 +; SKX: vpmovd2m %xmm +; SKX: kandw +; SKX: vpmovm2d +define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { + %mask_a = trunc <4 x i32>%a to <4 x i1> + %mask_b = trunc <4 x i32>%b to <4 x i1> + %a_and_b = and <4 x i1>%mask_a, %mask_b + %res = sext <4 x i1>%a_and_b to <4 x i32> + ret <4 x i32>%res +} + +; KNL-LABEL: trunc_8i16_to_8i1 +; KNL: vpmovsxwq +; KNL: vpandq LCP{{.*}}(%rip){1to8} +; KNL: vptestmq +; KNL: ret + +; SKX-LABEL: trunc_8i16_to_8i1 +; SKX: vpmovw2m %xmm +define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { + %mask_b = trunc <8 x i16>%a to <8 x i1> + %mask = bitcast <8 x i1> %mask_b to i8 + ret i8 %mask +} + +; KNL-LABEL: sext_8i1_8i32 +; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; SKX: vpmovm2d +; KNL: ret +define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %x1 = xor <8 x i1>%x, + %y = sext <8 x i1> %x1 to <8 x i32> + ret <8 x i32> %y +} + +; KNL-LABEL: trunc_v16i32_to_v16i16 +; KNL: vpmovdw +; KNL: ret +define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) { + %1 = trunc <16 x i32> %x to <16 x i16> + ret <16 x i16> %1 +} + +; KNL-LABEL: trunc_i32_to_i1 +; KNL: movw $-4, %ax +; KNL: kmovw %eax, %k1 +; KNL: korw +define i16 @trunc_i32_to_i1(i32 %a) { + %a_i = trunc i32 %a to i1 + %maskv = insertelement <16 x i1> , i1 %a_i, i32 0 + %res = bitcast <16 x i1> %maskv to i16 + ret i16 %res +} + +; KNL-LABEL: sext_8i1_8i16 +; SKX: vpmovm2w +; KNL: ret +define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i16> + ret <8 x i16> %y +} + +; KNL-LABEL: sext_16i1_16i32 +; SKX: vpmovm2d +; KNL: ret +define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { + %x = icmp slt <16 x i32> %a1, %a2 + %y = sext <16 x i1> %x to <16 x i32> + ret <16 x i32> %y +} + +; KNL-LABEL: sext_8i1_8i64 +; SKX: vpmovm2q +; KNL: ret +define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i64> + ret <8 x i64> %y +} + +; KNL-LABEL: @extload_v8i64 +; KNL: vpmovsxbq +define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { + %sign_load = load <8 x i8>, <8 x i8>* %a + %c = sext <8 x i8> %sign_load to <8 x i64> + store <8 x i64> %c, <8 x i64>* %res + ret void +} + +;SKX-LABEL: test21: +;SKX: vmovdqu16 %zmm0, %zmm3 {%k1} +;SKX-NEXT: kshiftrq $32, %k1, %k1 +;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} +define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { + %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer + ret <64 x i16> %ret +} + diff --git a/test/CodeGen/X86/avx512-trunc.ll b/test/CodeGen/X86/avx512-trunc.ll deleted file mode 100644 index 9205feda7eb..00000000000 --- a/test/CodeGen/X86/avx512-trunc.ll +++ /dev/null @@ -1,364 +0,0 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX - - attributes #0 = { nounwind } - -; KNL-LABEL: trunc_16x32_to_16x8 -; KNL: vpmovdb -; KNL: ret -define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 { - %x = trunc <16 x i32> %i to <16 x i8> - ret <16 x i8> %x -} - -; KNL-LABEL: trunc_8x64_to_8x16 -; KNL: vpmovqw -; KNL: ret -define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 { - %x = trunc <8 x i64> %i to <8 x i16> - ret <8 x i16> %x -} - -; KNL-LABEL: trunc_v16i32_to_v16i16 -; KNL: vpmovdw -; KNL: ret -define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 { - %1 = trunc <16 x i32> %x to <16 x i16> - ret <16 x i16> %1 -} - -define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 { -; SKX-LABEL: trunc_qb_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqw %zmm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i8> - ret <8 x i8> %x -} - -define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 { -; SKX-LABEL: trunc_qb_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqb %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i8> - store <8 x i8> %x, <8 x i8>* %res - ret void -} - -define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 { -; SKX-LABEL: trunc_qb_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i8> - ret <4 x i8> %x -} - -define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 { -; SKX-LABEL: trunc_qb_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqb %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i8> - store <4 x i8> %x, <4 x i8>* %res - ret void -} - -define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 { -; SKX-LABEL: trunc_qb_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i8> - ret <2 x i8> %x -} - -define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 { -; SKX-LABEL: trunc_qb_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqb %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i8> - store <2 x i8> %x, <2 x i8>* %res - ret void -} - -define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 { -; SKX-LABEL: trunc_qw_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqw %zmm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i16> - ret <8 x i16> %x -} - -define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 { -; SKX-LABEL: trunc_qw_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqw %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i16> - store <8 x i16> %x, <8 x i16>* %res - ret void -} - -define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 { -; SKX-LABEL: trunc_qw_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i16> - ret <4 x i16> %x -} - -define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 { -; SKX-LABEL: trunc_qw_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqw %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i16> - store <4 x i16> %x, <4 x i16>* %res - ret void -} - -define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 { -; SKX-LABEL: trunc_qw_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i16> - ret <2 x i16> %x -} - -define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 { -; SKX-LABEL: trunc_qw_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqw %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i16> - store <2 x i16> %x, <2 x i16>* %res - ret void -} - -define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 { -; SKX-LABEL: trunc_qd_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %zmm0, %ymm0 -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i32> - ret <8 x i32> %x -} - -define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 { -; SKX-LABEL: trunc_qd_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i64> %i to <8 x i32> - store <8 x i32> %x, <8 x i32>* %res - ret void -} - -define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 { -; SKX-LABEL: trunc_qd_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i32> - ret <4 x i32> %x -} - -define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 { -; SKX-LABEL: trunc_qd_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <4 x i64> %i to <4 x i32> - store <4 x i32> %x, <4 x i32>* %res - ret void -} - -define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 { -; SKX-LABEL: trunc_qd_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i32> - ret <2 x i32> %x -} - -define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 { -; SKX-LABEL: trunc_qd_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovqd %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <2 x i64> %i to <2 x i32> - store <2 x i32> %x, <2 x i32>* %res - ret void -} - -define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 { -; SKX-LABEL: trunc_db_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdb %zmm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <16 x i32> %i to <16 x i8> - ret <16 x i8> %x -} - -define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 { -; SKX-LABEL: trunc_db_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdb %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <16 x i32> %i to <16 x i8> - store <16 x i8> %x, <16 x i8>* %res - ret void -} - -define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 { -; SKX-LABEL: trunc_db_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <8 x i32> %i to <8 x i8> - ret <8 x i8> %x -} - -define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 { -; SKX-LABEL: trunc_db_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdb %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i32> %i to <8 x i8> - store <8 x i8> %x, <8 x i8>* %res - ret void -} - -define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 { -; SKX-LABEL: trunc_db_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <4 x i32> %i to <4 x i8> - ret <4 x i8> %x -} - -define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 { -; SKX-LABEL: trunc_db_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdb %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <4 x i32> %i to <4 x i8> - store <4 x i8> %x, <4 x i8>* %res - ret void -} - -define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 { -; SKX-LABEL: trunc_dw_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %zmm0, %ymm0 -; SKX-NEXT: retq - %x = trunc <16 x i32> %i to <16 x i16> - ret <16 x i16> %x -} - -define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 { -; SKX-LABEL: trunc_dw_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <16 x i32> %i to <16 x i16> - store <16 x i16> %x, <16 x i16>* %res - ret void -} - -define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 { -; SKX-LABEL: trunc_dw_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <8 x i32> %i to <8 x i16> - ret <8 x i16> %x -} - -define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 { -; SKX-LABEL: trunc_dw_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i32> %i to <8 x i16> - store <8 x i16> %x, <8 x i16>* %res - ret void -} - -define <4 x i16> @trunc_dw_128(<4 x i32> %i) #0 { -; SKX-LABEL: trunc_dw_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <4 x i32> %i to <4 x i16> - ret <4 x i16> %x -} - -define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 { -; SKX-LABEL: trunc_dw_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovdw %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <4 x i32> %i to <4 x i16> - store <4 x i16> %x, <4 x i16>* %res - ret void -} - -define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 { -; SKX-LABEL: trunc_wb_512: -; SKX: ## BB#0: -; SKX-NEXT: vpmovwb %zmm0, %ymm0 -; SKX-NEXT: retq - %x = trunc <32 x i16> %i to <32 x i8> - ret <32 x i8> %x -} - -define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 { -; SKX-LABEL: trunc_wb_512_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovwb %zmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <32 x i16> %i to <32 x i8> - store <32 x i8> %x, <32 x i8>* %res - ret void -} - -define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 { -; SKX-LABEL: trunc_wb_256: -; SKX: ## BB#0: -; SKX-NEXT: vpmovwb %ymm0, %xmm0 -; SKX-NEXT: retq - %x = trunc <16 x i16> %i to <16 x i8> - ret <16 x i8> %x -} - -define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 { -; SKX-LABEL: trunc_wb_256_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovwb %ymm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <16 x i16> %i to <16 x i8> - store <16 x i8> %x, <16 x i8>* %res - ret void -} - -define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 { -; SKX-LABEL: trunc_wb_128: -; SKX: ## BB#0: -; SKX-NEXT: retq - %x = trunc <8 x i16> %i to <8 x i8> - ret <8 x i8> %x -} - -define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 { -; SKX-LABEL: trunc_wb_128_mem: -; SKX: ## BB#0: -; SKX-NEXT: vpmovwb %xmm0, (%rdi) -; SKX-NEXT: retq - %x = trunc <8 x i16> %i to <8 x i8> - store <8 x i8> %x, <8 x i8>* %res - ret void -} diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 5ad28ab5ab5..b2b417df2f1 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1008,84 +1008,6 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i ret <32 x i16> %res2 } -declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) - -define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512: -; CHECK: vpmovwb %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovwb %zmm0, %ymm0 - %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) - %res3 = add <32 x i8> %res0, %res1 - %res4 = add <32 x i8> %res3, %res2 - ret <32 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) - -define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: -; CHECK: vpmovwb %zmm0, (%rdi) -; CHECK: vpmovwb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) - call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) - ret void -} - -declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) - -define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: -; CHECK: vpmovswb %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovswb %zmm0, %ymm0 - %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) - %res3 = add <32 x i8> %res0, %res1 - %res4 = add <32 x i8> %res3, %res2 - ret <32 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) - -define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: -; CHECK: vpmovswb %zmm0, (%rdi) -; CHECK: vpmovswb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) - call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) - ret void -} - -declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) - -define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: -; CHECK: vpmovuswb %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} -; CHECK-NEXT: vpmovuswb %zmm0, %ymm0 - %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) - %res3 = add <32 x i8> %res0, %res1 - %res4 = add <32 x i8> %res3, %res2 - ret <32 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) - -define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: -; CHECK: vpmovuswb %zmm0, (%rdi) -; CHECK: vpmovuswb %zmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) - call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) - ret void -} - declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index ee76ae2a8a3..2373dc089ae 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -3876,162 +3876,6 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i ret <16 x i16> %res2 } -declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128: -; CHECK: vpmovwb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovwb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) - -define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: -; CHECK: vpmovwb %xmm0, (%rdi) -; CHECK: vpmovwb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: -; CHECK: vpmovswb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovswb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) - -define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: -; CHECK: vpmovswb %xmm0, (%rdi) -; CHECK: vpmovswb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: -; CHECK: vpmovuswb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) - -define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: -; CHECK: vpmovuswb %xmm0, (%rdi) -; CHECK: vpmovuswb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256: -; CHECK: vpmovwb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovwb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) - -define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: -; CHECK: vpmovwb %ymm0, (%rdi) -; CHECK: vpmovwb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: -; CHECK: vpmovswb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovswb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) - -define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: -; CHECK: vpmovswb %ymm0, (%rdi) -; CHECK: vpmovswb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: -; CHECK: vpmovuswb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) - -define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: -; CHECK: vpmovuswb %ymm0, (%rdi) -; CHECK: vpmovuswb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) - call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) - ret void -} - declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 7812148de1c..46ee51f47b6 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3005,786 +3005,6 @@ define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x ret <8 x float> %res2 } -declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128: -; CHECK: vpmovqb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128: -; CHECK: vpmovqb %xmm0, (%rdi) -; CHECK: vpmovqb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: -; CHECK: vpmovsqb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: -; CHECK: vpmovsqb %xmm0, (%rdi) -; CHECK: vpmovsqb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: -; CHECK: vpmovusqb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: -; CHECK: vpmovusqb %xmm0, (%rdi) -; CHECK: vpmovusqb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256: -; CHECK: vpmovqb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256: -; CHECK: vpmovqb %ymm0, (%rdi) -; CHECK: vpmovqb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: -; CHECK: vpmovsqb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: -; CHECK: vpmovsqb %ymm0, (%rdi) -; CHECK: vpmovsqb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: -; CHECK: vpmovusqb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: -; CHECK: vpmovusqb %ymm0, (%rdi) -; CHECK: vpmovusqb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128: -; CHECK: vpmovqw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128: -; CHECK: vpmovqw %xmm0, (%rdi) -; CHECK: vpmovqw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: -; CHECK: vpmovsqw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: -; CHECK: vpmovsqw %xmm0, (%rdi) -; CHECK: vpmovsqw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: -; CHECK: vpmovusqw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: -; CHECK: vpmovusqw %xmm0, (%rdi) -; CHECK: vpmovusqw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256: -; CHECK: vpmovqw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256: -; CHECK: vpmovqw %ymm0, (%rdi) -; CHECK: vpmovqw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: -; CHECK: vpmovsqw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: -; CHECK: vpmovsqw %ymm0, (%rdi) -; CHECK: vpmovsqw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: -; CHECK: vpmovusqw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: -; CHECK: vpmovusqw %ymm0, (%rdi) -; CHECK: vpmovusqw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128: -; CHECK: vpmovqd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqd %xmm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128: -; CHECK: vpmovqd %xmm0, (%rdi) -; CHECK: vpmovqd %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: -; CHECK: vpmovsqd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: -; CHECK: vpmovsqd %xmm0, (%rdi) -; CHECK: vpmovsqd %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: -; CHECK: vpmovusqd %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: -; CHECK: vpmovusqd %xmm0, (%rdi) -; CHECK: vpmovusqd %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256: -; CHECK: vpmovqd %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovqd %ymm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256: -; CHECK: vpmovqd %ymm0, (%rdi) -; CHECK: vpmovqd %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256: -; CHECK: vpmovsqd %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: -; CHECK: vpmovsqd %ymm0, (%rdi) -; CHECK: vpmovsqd %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256: -; CHECK: vpmovusqd %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 - %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) - %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) - %res3 = add <4 x i32> %res0, %res1 - %res4 = add <4 x i32> %res3, %res2 - ret <4 x i32> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8) - -define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: -; CHECK: vpmovusqd %ymm0, (%rdi) -; CHECK: vpmovusqd %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128: -; CHECK: vpmovdb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovdb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128: -; CHECK: vpmovdb %xmm0, (%rdi) -; CHECK: vpmovdb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128: -; CHECK: vpmovsdb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsdb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: -; CHECK: vpmovsdb %xmm0, (%rdi) -; CHECK: vpmovsdb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128: -; CHECK: vpmovusdb %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusdb %xmm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: -; CHECK: vpmovusdb %xmm0, (%rdi) -; CHECK: vpmovusdb %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256: -; CHECK: vpmovdb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovdb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256: -; CHECK: vpmovdb %ymm0, (%rdi) -; CHECK: vpmovdb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256: -; CHECK: vpmovsdb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsdb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: -; CHECK: vpmovsdb %ymm0, (%rdi) -; CHECK: vpmovsdb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - -declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8) - -define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256: -; CHECK: vpmovusdb %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 - %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: -; CHECK: vpmovusdb %ymm0, (%rdi) -; CHECK: vpmovusdb %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128: -; CHECK: vpmovdw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovdw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128: -; CHECK: vpmovdw %xmm0, (%rdi) -; CHECK: vpmovdw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: -; CHECK: vpmovsdw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsdw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: -; CHECK: vpmovsdw %xmm0, (%rdi) -; CHECK: vpmovsdw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: -; CHECK: vpmovusdw %xmm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusdw %xmm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: -; CHECK: vpmovusdw %xmm0, (%rdi) -; CHECK: vpmovusdw %xmm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256: -; CHECK: vpmovdw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovdw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256: -; CHECK: vpmovdw %ymm0, (%rdi) -; CHECK: vpmovdw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: -; CHECK: vpmovsdw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovsdw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: -; CHECK: vpmovsdw %ymm0, (%rdi) -; CHECK: vpmovsdw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - -declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: -; CHECK: vpmovusdw %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} -; CHECK-NEXT: vpmovusdw %ymm0, %xmm0 - %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) - %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) - %res3 = add <8 x i16> %res0, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 -} - -declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8) - -define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: -; CHECK: vpmovusdw %ymm0, (%rdi) -; CHECK: vpmovusdw %ymm0, (%rdi) {%k1} - call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) - call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) - ret void -} - declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index f51d4fa103e..6c16e634a59 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -190,13 +190,10 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { ; AVX2-LABEL: test15 ; AVX2: vpmaskmovd -; SKX-LABEL: test15: -; SKX: ## BB#0: -; SKX-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1 -; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1} -; SKX-NEXT: retq +; SKX-LABEL: test15 +; SKX: kshiftl +; SKX: kshiftr +; SKX: vmovdqu32 {{.*}}{%k1} define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { %mask = icmp eq <2 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s index 9e10ca42c3b..8d72b086a6c 100644 --- a/test/MC/X86/x86-64-avx512bw.s +++ b/test/MC/X86/x86-64-avx512bw.s @@ -3668,126 +3668,6 @@ // CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff] vpabsw -8256(%rdx), %zmm30 -// CHECK: vpmovwb %zmm27, %ymm22 -// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x30,0xde] - vpmovwb %zmm27, %ymm22 - -// CHECK: vpmovwb %zmm27, %ymm22 {%k1} -// CHECK: encoding: [0x62,0x22,0x7e,0x49,0x30,0xde] - vpmovwb %zmm27, %ymm22 {%k1} - -// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z} -// CHECK: encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde] - vpmovwb %zmm27, %ymm22 {%k1} {z} - -// CHECK: vpmovwb %zmm22, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31] - vpmovwb %zmm22, (%rcx) - -// CHECK: vpmovwb %zmm22, (%rcx) {%k4} -// CHECK: encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31] - vpmovwb %zmm22, (%rcx) {%k4} - -// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovwb %zmm22, 291(%rax,%r14,8) - -// CHECK: vpmovwb %zmm22, 4064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f] - vpmovwb %zmm22, 4064(%rdx) - -// CHECK: vpmovwb %zmm22, 4096(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00] - vpmovwb %zmm22, 4096(%rdx) - -// CHECK: vpmovwb %zmm22, -4096(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80] - vpmovwb %zmm22, -4096(%rdx) - -// CHECK: vpmovwb %zmm22, -4128(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff] - vpmovwb %zmm22, -4128(%rdx) - -// CHECK: vpmovswb %zmm18, %ymm23 -// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7] - vpmovswb %zmm18, %ymm23 - -// CHECK: vpmovswb %zmm18, %ymm23 {%k2} -// CHECK: encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7] - vpmovswb %zmm18, %ymm23 {%k2} - -// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7] - vpmovswb %zmm18, %ymm23 {%k2} {z} - -// CHECK: vpmovswb %zmm24, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x01] - vpmovswb %zmm24, (%rcx) - -// CHECK: vpmovswb %zmm24, (%rcx) {%k7} -// CHECK: encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01] - vpmovswb %zmm24, (%rcx) {%k7} - -// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00] - vpmovswb %zmm24, 291(%rax,%r14,8) - -// CHECK: vpmovswb %zmm24, 4064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f] - vpmovswb %zmm24, 4064(%rdx) - -// CHECK: vpmovswb %zmm24, 4096(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00] - vpmovswb %zmm24, 4096(%rdx) - -// CHECK: vpmovswb %zmm24, -4096(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80] - vpmovswb %zmm24, -4096(%rdx) - -// CHECK: vpmovswb %zmm24, -4128(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff] - vpmovswb %zmm24, -4128(%rdx) - -// CHECK: vpmovuswb %zmm22, %ymm28 -// CHECK: encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4] - vpmovuswb %zmm22, %ymm28 - -// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} -// CHECK: encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4] - vpmovuswb %zmm22, %ymm28 {%k3} - -// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4] - vpmovuswb %zmm22, %ymm28 {%k3} {z} - -// CHECK: vpmovuswb %zmm27, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x19] - vpmovuswb %zmm27, (%rcx) - -// CHECK: vpmovuswb %zmm27, (%rcx) {%k2} -// CHECK: encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19] - vpmovuswb %zmm27, (%rcx) {%k2} - -// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00] - vpmovuswb %zmm27, 291(%rax,%r14,8) - -// CHECK: vpmovuswb %zmm27, 4064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f] - vpmovuswb %zmm27, 4064(%rdx) - -// CHECK: vpmovuswb %zmm27, 4096(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00] - vpmovuswb %zmm27, 4096(%rdx) - -// CHECK: vpmovuswb %zmm27, -4096(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80] - vpmovuswb %zmm27, -4096(%rdx) - -// CHECK: vpmovuswb %zmm27, -4128(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff] - vpmovuswb %zmm27, -4128(%rdx) - // CHECK: vpmulhuw %zmm21, %zmm24, %zmm21 // CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed] vpmulhuw %zmm21, %zmm24, %zmm21 diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s index 24b8a56efe6..dcc9db577eb 100644 --- a/test/MC/X86/x86-64-avx512bw_vl.s +++ b/test/MC/X86/x86-64-avx512bw_vl.s @@ -6583,486 +6583,6 @@ // CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff] vpshufb -4128(%rdx), %ymm18, %ymm19 -// CHECK: vpmovwb %xmm28, %xmm27 -// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3] - vpmovwb %xmm28, %xmm27 - -// CHECK: vpmovwb %xmm28, %xmm27 {%k2} -// CHECK: encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3] - vpmovwb %xmm28, %xmm27 {%k2} - -// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3] - vpmovwb %xmm28, %xmm27 {%k2} {z} - -// CHECK: vpmovwb %ymm26, %xmm26 -// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2] - vpmovwb %ymm26, %xmm26 - -// CHECK: vpmovwb %ymm26, %xmm26 {%k4} -// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2] - vpmovwb %ymm26, %xmm26 {%k4} - -// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2] - vpmovwb %ymm26, %xmm26 {%k4} {z} - -// CHECK: vpmovwb %xmm23, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39] - vpmovwb %xmm23,(%rcx) - -// CHECK: vpmovwb %xmm23, (%rcx) {%k6} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39] - vpmovwb %xmm23,(%rcx) {%k6} - -// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00] - vpmovwb %xmm23,4660(%rax,%r14,8) - -// CHECK: vpmovwb %xmm23, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f] - vpmovwb %xmm23, 1016(%rdx) - -// CHECK: vpmovwb %xmm23, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00] - vpmovwb %xmm23, 1024(%rdx) - -// CHECK: vpmovwb %xmm23, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80] - vpmovwb %xmm23,-1024(%rdx) - -// CHECK: vpmovwb %xmm23, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff] - vpmovwb %xmm23,-1032(%rdx) - -// CHECK: vpmovwb %ymm21, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29] - vpmovwb %ymm21,(%rcx) - -// CHECK: vpmovwb %ymm21, (%rcx) {%k5} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29] - vpmovwb %ymm21,(%rcx) {%k5} - -// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00] - vpmovwb %ymm21, 4660(%rax,%r14,8) - -// CHECK: vpmovwb %ymm21, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f] - vpmovwb %ymm21, 2032(%rdx) - -// CHECK: vpmovwb %ymm21, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00] - vpmovwb %ymm21, 2048(%rdx) - -// CHECK: vpmovwb %ymm21, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80] - vpmovwb %ymm21,-2048(%rdx) - -// CHECK: vpmovwb %ymm21, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff] - vpmovwb %ymm21, -2064(%rdx) - -// CHECK: vpmovswb %xmm19, %xmm17 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9] - vpmovswb %xmm19, %xmm17 - -// CHECK: vpmovswb %xmm19, %xmm17 {%k1} -// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9] - vpmovswb %xmm19, %xmm17 {%k1} - -// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9] - vpmovswb %xmm19, %xmm17 {%k1} {z} - -// CHECK: vpmovswb %ymm19, %xmm21 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd] - vpmovswb %ymm19, %xmm21 - -// CHECK: vpmovswb %ymm19, %xmm21 {%k4} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd] - vpmovswb %ymm19, %xmm21 {%k4} - -// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd] - vpmovswb %ymm19, %xmm21 {%k4} {z} - -// CHECK: vpmovswb %xmm18, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11] - vpmovswb %xmm18,(%rcx) - -// CHECK: vpmovswb %xmm18, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11] - vpmovswb %xmm18,(%rcx) {%k2} - -// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00] - vpmovswb %xmm18, 4660(%rax,%r14,8) - -// CHECK: vpmovswb %xmm18, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f] - vpmovswb %xmm18, 1016(%rdx) - -// CHECK: vpmovswb %xmm18, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00] - vpmovswb %xmm18, 1024(%rdx) - -// CHECK: vpmovswb %xmm18, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80] - vpmovswb %xmm18, -1024(%rdx) - -// CHECK: vpmovswb %xmm18, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff] - vpmovswb %xmm18, -1032(%rdx) - -// CHECK: vpmovswb %ymm23, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39] - vpmovswb %ymm23,(%rcx) - -// CHECK: vpmovswb %ymm23, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39] - vpmovswb %ymm23,(%rcx) {%k2} - -// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00] - vpmovswb %ymm23, 4660(%rax,%r14,8) - -// CHECK: vpmovswb %ymm23, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f] - vpmovswb %ymm23, 2032(%rdx) - -// CHECK: vpmovswb %ymm23, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00] - vpmovswb %ymm23, 2048(%rdx) - -// CHECK: vpmovswb %ymm23, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80] - vpmovswb %ymm23, -2048(%rdx) - -// CHECK: vpmovswb %ymm23, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff] - vpmovswb %ymm23, -2064(%rdx) - -// CHECK: vpmovuswb %xmm17, %xmm26 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x10,0xca] - vpmovuswb %xmm17, %xmm26 - -// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} -// CHECK: encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca] - vpmovuswb %xmm17, %xmm26 {%k6} - -// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca] - vpmovuswb %xmm17, %xmm26 {%k6} {z} - -// CHECK: vpmovuswb %ymm26, %xmm17 -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1] - vpmovuswb %ymm26, %xmm17 - -// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} -// CHECK: encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1] - vpmovuswb %ymm26, %xmm17 {%k2} - -// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z} -// CHECK: encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1] - vpmovuswb %ymm26, %xmm17 {%k2} {z} - -// CHECK: vpmovuswb %xmm19, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19] - vpmovuswb %xmm19,(%rcx) - -// CHECK: vpmovuswb %xmm19, (%rcx) {%k1} -// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19] - vpmovuswb %xmm19,(%rcx) {%k1} - -// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00] - vpmovuswb %xmm19, 4660(%rax,%r14,8) - -// CHECK: vpmovuswb %xmm19, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f] - vpmovuswb %xmm19, 1016(%rdx) - -// CHECK: vpmovuswb %xmm19, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00] - vpmovuswb %xmm19, 1024(%rdx) - -// CHECK: vpmovuswb %xmm19, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80] - vpmovuswb %xmm19, -1024(%rdx) - -// CHECK: vpmovuswb %xmm19, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff] - vpmovuswb %xmm19, -1032(%rdx) - -// CHECK: vpmovuswb %ymm23, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39] - vpmovuswb %ymm23,(%rcx) - -// CHECK: vpmovuswb %ymm23, (%rcx) {%k6} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39] - vpmovuswb %ymm23,(%rcx) {%k6} - -// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00] - vpmovuswb %ymm23, 4660(%rax,%r14,8) - -// CHECK: vpmovuswb %ymm23, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f] - vpmovuswb %ymm23, 2032(%rdx) - -// CHECK: vpmovuswb %ymm23, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00] - vpmovuswb %ymm23, 2048(%rdx) - -// CHECK: vpmovuswb %ymm23, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80] - vpmovuswb %ymm23, -2048(%rdx) - -// CHECK: vpmovuswb %ymm23, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff] - vpmovuswb %ymm23, -2064(%rdx) - -// CHECK: vpmovwb %xmm17, %xmm21 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd] - vpmovwb %xmm17, %xmm21 - -// CHECK: vpmovwb %xmm17, %xmm21 {%k1} -// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd] - vpmovwb %xmm17, %xmm21 {%k1} - -// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd] - vpmovwb %xmm17, %xmm21 {%k1} {z} - -// CHECK: vpmovwb %ymm23, %xmm26 -// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa] - vpmovwb %ymm23, %xmm26 - -// CHECK: vpmovwb %ymm23, %xmm26 {%k7} -// CHECK: encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa] - vpmovwb %ymm23, %xmm26 {%k7} - -// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa] - vpmovwb %ymm23, %xmm26 {%k7} {z} - -// CHECK: vpmovwb %xmm21, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29] - vpmovwb %xmm21, (%rcx) - -// CHECK: vpmovwb %xmm21, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29] - vpmovwb %xmm21, (%rcx) {%k2} - -// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovwb %xmm21, 291(%rax,%r14,8) - -// CHECK: vpmovwb %xmm21, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f] - vpmovwb %xmm21, 1016(%rdx) - -// CHECK: vpmovwb %xmm21, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00] - vpmovwb %xmm21, 1024(%rdx) - -// CHECK: vpmovwb %xmm21, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80] - vpmovwb %xmm21, -1024(%rdx) - -// CHECK: vpmovwb %xmm21, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff] - vpmovwb %xmm21, -1032(%rdx) - -// CHECK: vpmovwb %ymm20, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21] - vpmovwb %ymm20, (%rcx) - -// CHECK: vpmovwb %ymm20, (%rcx) {%k4} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21] - vpmovwb %ymm20, (%rcx) {%k4} - -// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00] - vpmovwb %ymm20, 291(%rax,%r14,8) - -// CHECK: vpmovwb %ymm20, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f] - vpmovwb %ymm20, 2032(%rdx) - -// CHECK: vpmovwb %ymm20, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00] - vpmovwb %ymm20, 2048(%rdx) - -// CHECK: vpmovwb %ymm20, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80] - vpmovwb %ymm20, -2048(%rdx) - -// CHECK: vpmovwb %ymm20, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff] - vpmovwb %ymm20, -2064(%rdx) - -// CHECK: vpmovswb %xmm20, %xmm24 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0] - vpmovswb %xmm20, %xmm24 - -// CHECK: vpmovswb %xmm20, %xmm24 {%k4} -// CHECK: encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0] - vpmovswb %xmm20, %xmm24 {%k4} - -// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0] - vpmovswb %xmm20, %xmm24 {%k4} {z} - -// CHECK: vpmovswb %ymm18, %xmm27 -// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3] - vpmovswb %ymm18, %xmm27 - -// CHECK: vpmovswb %ymm18, %xmm27 {%k1} -// CHECK: encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3] - vpmovswb %ymm18, %xmm27 {%k1} - -// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3] - vpmovswb %ymm18, %xmm27 {%k1} {z} - -// CHECK: vpmovswb %xmm24, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x01] - vpmovswb %xmm24, (%rcx) - -// CHECK: vpmovswb %xmm24, (%rcx) {%k3} -// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01] - vpmovswb %xmm24, (%rcx) {%k3} - -// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00] - vpmovswb %xmm24, 291(%rax,%r14,8) - -// CHECK: vpmovswb %xmm24, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f] - vpmovswb %xmm24, 1016(%rdx) - -// CHECK: vpmovswb %xmm24, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00] - vpmovswb %xmm24, 1024(%rdx) - -// CHECK: vpmovswb %xmm24, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80] - vpmovswb %xmm24, -1024(%rdx) - -// CHECK: vpmovswb %xmm24, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff] - vpmovswb %xmm24, -1032(%rdx) - -// CHECK: vpmovswb %ymm27, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x19] - vpmovswb %ymm27, (%rcx) - -// CHECK: vpmovswb %ymm27, (%rcx) {%k7} -// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19] - vpmovswb %ymm27, (%rcx) {%k7} - -// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00] - vpmovswb %ymm27, 291(%rax,%r14,8) - -// CHECK: vpmovswb %ymm27, 2032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f] - vpmovswb %ymm27, 2032(%rdx) - -// CHECK: vpmovswb %ymm27, 2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00] - vpmovswb %ymm27, 2048(%rdx) - -// CHECK: vpmovswb %ymm27, -2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80] - vpmovswb %ymm27, -2048(%rdx) - -// CHECK: vpmovswb %ymm27, -2064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff] - vpmovswb %ymm27, -2064(%rdx) - -// CHECK: vpmovuswb %xmm19, %xmm23 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf] - vpmovuswb %xmm19, %xmm23 - -// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} -// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf] - vpmovuswb %xmm19, %xmm23 {%k4} - -// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf] - vpmovuswb %xmm19, %xmm23 {%k4} {z} - -// CHECK: vpmovuswb %ymm23, %xmm28 -// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc] - vpmovuswb %ymm23, %xmm28 - -// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} -// CHECK: encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc] - vpmovuswb %ymm23, %xmm28 {%k6} - -// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc] - vpmovuswb %ymm23, %xmm28 {%k6} {z} - -// CHECK: vpmovuswb %xmm25, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x09] - vpmovuswb %xmm25, (%rcx) - -// CHECK: vpmovuswb %xmm25, (%rcx) {%k3} -// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09] - vpmovuswb %xmm25, (%rcx) {%k3} - -// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovuswb %xmm25, 291(%rax,%r14,8) - -// CHECK: vpmovuswb %xmm25, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f] - vpmovuswb %xmm25, 1016(%rdx) - -// CHECK: vpmovuswb %xmm25, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00] - vpmovuswb %xmm25, 1024(%rdx) - -// CHECK: vpmovuswb %xmm25, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80] - vpmovuswb %xmm25, -1024(%rdx) - -// CHECK: vpmovuswb %xmm25, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff] - vpmovuswb %xmm25, -1032(%rdx) - -// CHECK: vpmovuswb %ymm28, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x21] - vpmovuswb %ymm28, (%rcx) - -// CHECK: vpmovuswb %ymm28, (%rcx) {%k2} -// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21] - vpmovuswb %ymm28, (%rcx) {%k2} - -// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00] - vpmovuswb %ymm28, 291(%rax,%r14,8) - -// CHECK: vpmovuswb %ymm28, 2032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f] - vpmovuswb %ymm28, 2032(%rdx) - -// CHECK: vpmovuswb %ymm28, 2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00] - vpmovuswb %ymm28, 2048(%rdx) - -// CHECK: vpmovuswb %ymm28, -2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80] - vpmovuswb %ymm28, -2048(%rdx) - -// CHECK: vpmovuswb %ymm28, -2064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff] - vpmovuswb %ymm28, -2064(%rdx) - // CHECK: vpmulhuw %xmm18, %xmm21, %xmm24 // CHECK: encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2] vpmulhuw %xmm18, %xmm21, %xmm24 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index 9280be94716..eca2ffbfc09 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -16285,1206 +16285,6 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff] vscalefps -516(%rdx){1to8}, %ymm22, %ymm25 -// CHECK: vpmovqb %xmm29, %xmm24 -// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x32,0xe8] - vpmovqb %xmm29, %xmm24 - -// CHECK: vpmovqb %xmm29, %xmm24 {%k4} -// CHECK: encoding: [0x62,0x02,0x7e,0x0c,0x32,0xe8] - vpmovqb %xmm29, %xmm24 {%k4} - -// CHECK: vpmovqb %xmm29, %xmm24 {%k4} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0x8c,0x32,0xe8] - vpmovqb %xmm29, %xmm24 {%k4} {z} - -// CHECK: vpmovqb %ymm29, %xmm17 -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xe9] - vpmovqb %ymm29, %xmm17 - -// CHECK: vpmovqb %ymm29, %xmm17 {%k3} -// CHECK: encoding: [0x62,0x22,0x7e,0x2b,0x32,0xe9] - vpmovqb %ymm29, %xmm17 {%k3} - -// CHECK: vpmovqb %ymm29, %xmm17 {%k3} {z} -// CHECK: encoding: [0x62,0x22,0x7e,0xab,0x32,0xe9] - vpmovqb %ymm29, %xmm17 {%k3} {z} - -// CHECK: vpmovqb %xmm27, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x19] - vpmovqb %xmm27, (%rcx) - -// CHECK: vpmovqb %xmm27, (%rcx) {%k2} -// CHECK: encoding: [0x62,0x62,0x7e,0x0a,0x32,0x19] - vpmovqb %xmm27, (%rcx) {%k2} - -// CHECK: vpmovqb %xmm27, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00] - vpmovqb %xmm27, 291(%rax,%r14,8) - -// CHECK: vpmovqb %xmm27, 254(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x7f] - vpmovqb %xmm27, 254(%rdx) - -// CHECK: vpmovqb %xmm27, 256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0x00,0x01,0x00,0x00] - vpmovqb %xmm27, 256(%rdx) - -// CHECK: vpmovqb %xmm27, -256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x80] - vpmovqb %xmm27, -256(%rdx) - -// CHECK: vpmovqb %xmm27, -258(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff] - vpmovqb %xmm27, -258(%rdx) - -// CHECK: vpmovqb %ymm28, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x21] - vpmovqb %ymm28, (%rcx) - -// CHECK: vpmovqb %ymm28, (%rcx) {%k7} -// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x32,0x21] - vpmovqb %ymm28, (%rcx) {%k7} - -// CHECK: vpmovqb %ymm28, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xa4,0xf0,0x23,0x01,0x00,0x00] - vpmovqb %ymm28, 291(%rax,%r14,8) - -// CHECK: vpmovqb %ymm28, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x7f] - vpmovqb %ymm28, 508(%rdx) - -// CHECK: vpmovqb %ymm28, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0x00,0x02,0x00,0x00] - vpmovqb %ymm28, 512(%rdx) - -// CHECK: vpmovqb %ymm28, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x80] - vpmovqb %ymm28, -512(%rdx) - -// CHECK: vpmovqb %ymm28, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0xfc,0xfd,0xff,0xff] - vpmovqb %ymm28, -516(%rdx) - -// CHECK: vpmovsqb %xmm19, %xmm26 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x22,0xda] - vpmovsqb %xmm19, %xmm26 - -// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} -// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x22,0xda] - vpmovsqb %xmm19, %xmm26 {%k1} - -// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x22,0xda] - vpmovsqb %xmm19, %xmm26 {%k1} {z} - -// CHECK: vpmovsqb %ymm20, %xmm20 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0xe4] - vpmovsqb %ymm20, %xmm20 - -// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x22,0xe4] - vpmovsqb %ymm20, %xmm20 {%k6} - -// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x22,0xe4] - vpmovsqb %ymm20, %xmm20 {%k6} {z} - -// CHECK: vpmovsqb %xmm25, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x09] - vpmovsqb %xmm25, (%rcx) - -// CHECK: vpmovsqb %xmm25, (%rcx) {%k7} -// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x22,0x09] - vpmovsqb %xmm25, (%rcx) {%k7} - -// CHECK: vpmovsqb %xmm25, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovsqb %xmm25, 291(%rax,%r14,8) - -// CHECK: vpmovsqb %xmm25, 254(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x7f] - vpmovsqb %xmm25, 254(%rdx) - -// CHECK: vpmovsqb %xmm25, 256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0x00,0x01,0x00,0x00] - vpmovsqb %xmm25, 256(%rdx) - -// CHECK: vpmovsqb %xmm25, -256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x80] - vpmovsqb %xmm25, -256(%rdx) - -// CHECK: vpmovsqb %xmm25, -258(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff] - vpmovsqb %xmm25, -258(%rdx) - -// CHECK: vpmovsqb %ymm17, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x09] - vpmovsqb %ymm17, (%rcx) - -// CHECK: vpmovsqb %ymm17, (%rcx) {%k4} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x22,0x09] - vpmovsqb %ymm17, (%rcx) {%k4} - -// CHECK: vpmovsqb %ymm17, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovsqb %ymm17, 291(%rax,%r14,8) - -// CHECK: vpmovsqb %ymm17, 508(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x7f] - vpmovsqb %ymm17, 508(%rdx) - -// CHECK: vpmovsqb %ymm17, 512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0x00,0x02,0x00,0x00] - vpmovsqb %ymm17, 512(%rdx) - -// CHECK: vpmovsqb %ymm17, -512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x80] - vpmovsqb %ymm17, -512(%rdx) - -// CHECK: vpmovsqb %ymm17, -516(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0xfc,0xfd,0xff,0xff] - vpmovsqb %ymm17, -516(%rdx) - -// CHECK: vpmovusqb %xmm22, %xmm28 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x12,0xf4] - vpmovusqb %xmm22, %xmm28 - -// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} -// CHECK: encoding: [0x62,0x82,0x7e,0x0a,0x12,0xf4] - vpmovusqb %xmm22, %xmm28 {%k2} - -// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8a,0x12,0xf4] - vpmovusqb %xmm22, %xmm28 {%k2} {z} - -// CHECK: vpmovusqb %ymm23, %xmm22 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x12,0xfe] - vpmovusqb %ymm23, %xmm22 - -// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2f,0x12,0xfe] - vpmovusqb %ymm23, %xmm22 {%k7} - -// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xaf,0x12,0xfe] - vpmovusqb %ymm23, %xmm22 {%k7} {z} - -// CHECK: vpmovusqb %xmm26, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x11] - vpmovusqb %xmm26, (%rcx) - -// CHECK: vpmovusqb %xmm26, (%rcx) {%k5} -// CHECK: encoding: [0x62,0x62,0x7e,0x0d,0x12,0x11] - vpmovusqb %xmm26, (%rcx) {%k5} - -// CHECK: vpmovusqb %xmm26, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x12,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovusqb %xmm26, 291(%rax,%r14,8) - -// CHECK: vpmovusqb %xmm26, 254(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x7f] - vpmovusqb %xmm26, 254(%rdx) - -// CHECK: vpmovusqb %xmm26, 256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0x00,0x01,0x00,0x00] - vpmovusqb %xmm26, 256(%rdx) - -// CHECK: vpmovusqb %xmm26, -256(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x80] - vpmovusqb %xmm26, -256(%rdx) - -// CHECK: vpmovusqb %xmm26, -258(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0xfe,0xfe,0xff,0xff] - vpmovusqb %xmm26, -258(%rdx) - -// CHECK: vpmovusqb %ymm30, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x31] - vpmovusqb %ymm30, (%rcx) - -// CHECK: vpmovusqb %ymm30, (%rcx) {%k2} -// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x12,0x31] - vpmovusqb %ymm30, (%rcx) {%k2} - -// CHECK: vpmovusqb %ymm30, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x12,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovusqb %ymm30, 291(%rax,%r14,8) - -// CHECK: vpmovusqb %ymm30, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x7f] - vpmovusqb %ymm30, 508(%rdx) - -// CHECK: vpmovusqb %ymm30, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0x00,0x02,0x00,0x00] - vpmovusqb %ymm30, 512(%rdx) - -// CHECK: vpmovusqb %ymm30, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x80] - vpmovusqb %ymm30, -512(%rdx) - -// CHECK: vpmovusqb %ymm30, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0xfc,0xfd,0xff,0xff] - vpmovusqb %ymm30, -516(%rdx) - -// CHECK: vpmovqw %xmm18, %xmm19 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xd3] - vpmovqw %xmm18, %xmm19 - -// CHECK: vpmovqw %xmm18, %xmm19 {%k4} -// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x34,0xd3] - vpmovqw %xmm18, %xmm19 {%k4} - -// CHECK: vpmovqw %xmm18, %xmm19 {%k4} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x34,0xd3] - vpmovqw %xmm18, %xmm19 {%k4} {z} - -// CHECK: vpmovqw %ymm22, %xmm19 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x34,0xf3] - vpmovqw %ymm22, %xmm19 - -// CHECK: vpmovqw %ymm22, %xmm19 {%k5} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x34,0xf3] - vpmovqw %ymm22, %xmm19 {%k5} - -// CHECK: vpmovqw %ymm22, %xmm19 {%k5} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x34,0xf3] - vpmovqw %ymm22, %xmm19 {%k5} {z} - -// CHECK: vpmovqw %xmm21, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x29] - vpmovqw %xmm21, (%rcx) - -// CHECK: vpmovqw %xmm21, (%rcx) {%k3} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0b,0x34,0x29] - vpmovqw %xmm21, (%rcx) {%k3} - -// CHECK: vpmovqw %xmm21, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovqw %xmm21, 291(%rax,%r14,8) - -// CHECK: vpmovqw %xmm21, 508(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x7f] - vpmovqw %xmm21, 508(%rdx) - -// CHECK: vpmovqw %xmm21, 512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0x00,0x02,0x00,0x00] - vpmovqw %xmm21, 512(%rdx) - -// CHECK: vpmovqw %xmm21, -512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x80] - vpmovqw %xmm21, -512(%rdx) - -// CHECK: vpmovqw %xmm21, -516(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff] - vpmovqw %xmm21, -516(%rdx) - -// CHECK: vpmovqw %ymm28, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x21] - vpmovqw %ymm28, (%rcx) - -// CHECK: vpmovqw %ymm28, (%rcx) {%k6} -// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x34,0x21] - vpmovqw %ymm28, (%rcx) {%k6} - -// CHECK: vpmovqw %ymm28, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x34,0xa4,0xf0,0x23,0x01,0x00,0x00] - vpmovqw %ymm28, 291(%rax,%r14,8) - -// CHECK: vpmovqw %ymm28, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x7f] - vpmovqw %ymm28, 1016(%rdx) - -// CHECK: vpmovqw %ymm28, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0x00,0x04,0x00,0x00] - vpmovqw %ymm28, 1024(%rdx) - -// CHECK: vpmovqw %ymm28, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x80] - vpmovqw %ymm28, -1024(%rdx) - -// CHECK: vpmovqw %ymm28, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0xf8,0xfb,0xff,0xff] - vpmovqw %ymm28, -1032(%rdx) - -// CHECK: vpmovsqw %xmm18, %xmm26 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x24,0xd2] - vpmovsqw %xmm18, %xmm26 - -// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} -// CHECK: encoding: [0x62,0x82,0x7e,0x0f,0x24,0xd2] - vpmovsqw %xmm18, %xmm26 {%k7} - -// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8f,0x24,0xd2] - vpmovsqw %xmm18, %xmm26 {%k7} {z} - -// CHECK: vpmovsqw %ymm20, %xmm28 -// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x24,0xe4] - vpmovsqw %ymm20, %xmm28 - -// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} -// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x24,0xe4] - vpmovsqw %ymm20, %xmm28 {%k4} - -// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x24,0xe4] - vpmovsqw %ymm20, %xmm28 {%k4} {z} - -// CHECK: vpmovsqw %xmm30, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x31] - vpmovsqw %xmm30, (%rcx) - -// CHECK: vpmovsqw %xmm30, (%rcx) {%k4} -// CHECK: encoding: [0x62,0x62,0x7e,0x0c,0x24,0x31] - vpmovsqw %xmm30, (%rcx) {%k4} - -// CHECK: vpmovsqw %xmm30, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x24,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovsqw %xmm30, 291(%rax,%r14,8) - -// CHECK: vpmovsqw %xmm30, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x7f] - vpmovsqw %xmm30, 508(%rdx) - -// CHECK: vpmovsqw %xmm30, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0x00,0x02,0x00,0x00] - vpmovsqw %xmm30, 512(%rdx) - -// CHECK: vpmovsqw %xmm30, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x80] - vpmovsqw %xmm30, -512(%rdx) - -// CHECK: vpmovsqw %xmm30, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0xfc,0xfd,0xff,0xff] - vpmovsqw %xmm30, -516(%rdx) - -// CHECK: vpmovsqw %ymm21, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x29] - vpmovsqw %ymm21, (%rcx) - -// CHECK: vpmovsqw %ymm21, (%rcx) {%k5} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x24,0x29] - vpmovsqw %ymm21, (%rcx) {%k5} - -// CHECK: vpmovsqw %ymm21, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x24,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovsqw %ymm21, 291(%rax,%r14,8) - -// CHECK: vpmovsqw %ymm21, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x7f] - vpmovsqw %ymm21, 1016(%rdx) - -// CHECK: vpmovsqw %ymm21, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0x00,0x04,0x00,0x00] - vpmovsqw %ymm21, 1024(%rdx) - -// CHECK: vpmovsqw %ymm21, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x80] - vpmovsqw %ymm21, -1024(%rdx) - -// CHECK: vpmovsqw %ymm21, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0xf8,0xfb,0xff,0xff] - vpmovsqw %ymm21, -1032(%rdx) - -// CHECK: vpmovusqw %xmm20, %xmm29 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x14,0xe5] - vpmovusqw %xmm20, %xmm29 - -// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} -// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x14,0xe5] - vpmovusqw %xmm20, %xmm29 {%k1} - -// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x14,0xe5] - vpmovusqw %xmm20, %xmm29 {%k1} {z} - -// CHECK: vpmovusqw %ymm21, %xmm20 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0xec] - vpmovusqw %ymm21, %xmm20 - -// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x14,0xec] - vpmovusqw %ymm21, %xmm20 {%k5} - -// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x14,0xec] - vpmovusqw %ymm21, %xmm20 {%k5} {z} - -// CHECK: vpmovusqw %xmm18, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x11] - vpmovusqw %xmm18, (%rcx) - -// CHECK: vpmovusqw %xmm18, (%rcx) {%k1} -// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x14,0x11] - vpmovusqw %xmm18, (%rcx) {%k1} - -// CHECK: vpmovusqw %xmm18, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovusqw %xmm18, 291(%rax,%r14,8) - -// CHECK: vpmovusqw %xmm18, 508(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x7f] - vpmovusqw %xmm18, 508(%rdx) - -// CHECK: vpmovusqw %xmm18, 512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0x00,0x02,0x00,0x00] - vpmovusqw %xmm18, 512(%rdx) - -// CHECK: vpmovusqw %xmm18, -512(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x80] - vpmovusqw %xmm18, -512(%rdx) - -// CHECK: vpmovusqw %xmm18, -516(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0xfc,0xfd,0xff,0xff] - vpmovusqw %xmm18, -516(%rdx) - -// CHECK: vpmovusqw %ymm18, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x11] - vpmovusqw %ymm18, (%rcx) - -// CHECK: vpmovusqw %ymm18, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x14,0x11] - vpmovusqw %ymm18, (%rcx) {%k2} - -// CHECK: vpmovusqw %ymm18, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovusqw %ymm18, 291(%rax,%r14,8) - -// CHECK: vpmovusqw %ymm18, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x7f] - vpmovusqw %ymm18, 1016(%rdx) - -// CHECK: vpmovusqw %ymm18, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0x00,0x04,0x00,0x00] - vpmovusqw %ymm18, 1024(%rdx) - -// CHECK: vpmovusqw %ymm18, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x80] - vpmovusqw %ymm18, -1024(%rdx) - -// CHECK: vpmovusqw %ymm18, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0xf8,0xfb,0xff,0xff] - vpmovusqw %ymm18, -1032(%rdx) - -// CHECK: vpmovqd %xmm25, %xmm21 -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xcd] - vpmovqd %xmm25, %xmm21 - -// CHECK: vpmovqd %xmm25, %xmm21 {%k5} -// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x35,0xcd] - vpmovqd %xmm25, %xmm21 {%k5} - -// CHECK: vpmovqd %xmm25, %xmm21 {%k5} {z} -// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x35,0xcd] - vpmovqd %xmm25, %xmm21 {%k5} {z} - -// CHECK: vpmovqd %ymm22, %xmm21 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x35,0xf5] - vpmovqd %ymm22, %xmm21 - -// CHECK: vpmovqd %ymm22, %xmm21 {%k6} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x35,0xf5] - vpmovqd %ymm22, %xmm21 {%k6} - -// CHECK: vpmovqd %ymm22, %xmm21 {%k6} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x35,0xf5] - vpmovqd %ymm22, %xmm21 {%k6} {z} - -// CHECK: vpmovqd %xmm29, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x29] - vpmovqd %xmm29, (%rcx) - -// CHECK: vpmovqd %xmm29, (%rcx) {%k6} -// CHECK: encoding: [0x62,0x62,0x7e,0x0e,0x35,0x29] - vpmovqd %xmm29, (%rcx) {%k6} - -// CHECK: vpmovqd %xmm29, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovqd %xmm29, 291(%rax,%r14,8) - -// CHECK: vpmovqd %xmm29, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x7f] - vpmovqd %xmm29, 1016(%rdx) - -// CHECK: vpmovqd %xmm29, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0x00,0x04,0x00,0x00] - vpmovqd %xmm29, 1024(%rdx) - -// CHECK: vpmovqd %xmm29, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x80] - vpmovqd %xmm29, -1024(%rdx) - -// CHECK: vpmovqd %xmm29, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0xf8,0xfb,0xff,0xff] - vpmovqd %xmm29, -1032(%rdx) - -// CHECK: vpmovqd %ymm30, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x31] - vpmovqd %ymm30, (%rcx) - -// CHECK: vpmovqd %ymm30, (%rcx) {%k2} -// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x35,0x31] - vpmovqd %ymm30, (%rcx) {%k2} - -// CHECK: vpmovqd %ymm30, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x35,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovqd %ymm30, 291(%rax,%r14,8) - -// CHECK: vpmovqd %ymm30, 2032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x7f] - vpmovqd %ymm30, 2032(%rdx) - -// CHECK: vpmovqd %ymm30, 2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0x00,0x08,0x00,0x00] - vpmovqd %ymm30, 2048(%rdx) - -// CHECK: vpmovqd %ymm30, -2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x80] - vpmovqd %ymm30, -2048(%rdx) - -// CHECK: vpmovqd %ymm30, -2064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0xf0,0xf7,0xff,0xff] - vpmovqd %ymm30, -2064(%rdx) - -// CHECK: vpmovsqd %xmm21, %xmm21 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0xed] - vpmovsqd %xmm21, %xmm21 - -// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} -// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x25,0xed] - vpmovsqd %xmm21, %xmm21 {%k2} - -// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x25,0xed] - vpmovsqd %xmm21, %xmm21 {%k2} {z} - -// CHECK: vpmovsqd %ymm29, %xmm29 -// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x25,0xed] - vpmovsqd %ymm29, %xmm29 - -// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} -// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x25,0xed] - vpmovsqd %ymm29, %xmm29 {%k4} - -// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x25,0xed] - vpmovsqd %ymm29, %xmm29 {%k4} {z} - -// CHECK: vpmovsqd %xmm17, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x09] - vpmovsqd %xmm17, (%rcx) - -// CHECK: vpmovsqd %xmm17, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x25,0x09] - vpmovsqd %xmm17, (%rcx) {%k2} - -// CHECK: vpmovsqd %xmm17, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovsqd %xmm17, 291(%rax,%r14,8) - -// CHECK: vpmovsqd %xmm17, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x7f] - vpmovsqd %xmm17, 1016(%rdx) - -// CHECK: vpmovsqd %xmm17, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0x00,0x04,0x00,0x00] - vpmovsqd %xmm17, 1024(%rdx) - -// CHECK: vpmovsqd %xmm17, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x80] - vpmovsqd %xmm17, -1024(%rdx) - -// CHECK: vpmovsqd %xmm17, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0xf8,0xfb,0xff,0xff] - vpmovsqd %xmm17, -1032(%rdx) - -// CHECK: vpmovsqd %ymm23, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x39] - vpmovsqd %ymm23, (%rcx) - -// CHECK: vpmovsqd %ymm23, (%rcx) {%k5} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x25,0x39] - vpmovsqd %ymm23, (%rcx) {%k5} - -// CHECK: vpmovsqd %ymm23, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00] - vpmovsqd %ymm23, 291(%rax,%r14,8) - -// CHECK: vpmovsqd %ymm23, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x7f] - vpmovsqd %ymm23, 2032(%rdx) - -// CHECK: vpmovsqd %ymm23, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0x00,0x08,0x00,0x00] - vpmovsqd %ymm23, 2048(%rdx) - -// CHECK: vpmovsqd %ymm23, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x80] - vpmovsqd %ymm23, -2048(%rdx) - -// CHECK: vpmovsqd %ymm23, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0xf0,0xf7,0xff,0xff] - vpmovsqd %ymm23, -2064(%rdx) - -// CHECK: vpmovusqd %xmm21, %xmm25 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x15,0xe9] - vpmovusqd %xmm21, %xmm25 - -// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} -// CHECK: encoding: [0x62,0x82,0x7e,0x0d,0x15,0xe9] - vpmovusqd %xmm21, %xmm25 {%k5} - -// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8d,0x15,0xe9] - vpmovusqd %xmm21, %xmm25 {%k5} {z} - -// CHECK: vpmovusqd %ymm21, %xmm20 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x15,0xec] - vpmovusqd %ymm21, %xmm20 - -// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x15,0xec] - vpmovusqd %ymm21, %xmm20 {%k2} - -// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x15,0xec] - vpmovusqd %ymm21, %xmm20 {%k2} {z} - -// CHECK: vpmovusqd %xmm18, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x11] - vpmovusqd %xmm18, (%rcx) - -// CHECK: vpmovusqd %xmm18, (%rcx) {%k1} -// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x15,0x11] - vpmovusqd %xmm18, (%rcx) {%k1} - -// CHECK: vpmovusqd %xmm18, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x15,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovusqd %xmm18, 291(%rax,%r14,8) - -// CHECK: vpmovusqd %xmm18, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x7f] - vpmovusqd %xmm18, 1016(%rdx) - -// CHECK: vpmovusqd %xmm18, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0x00,0x04,0x00,0x00] - vpmovusqd %xmm18, 1024(%rdx) - -// CHECK: vpmovusqd %xmm18, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x80] - vpmovusqd %xmm18, -1024(%rdx) - -// CHECK: vpmovusqd %xmm18, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0xf8,0xfb,0xff,0xff] - vpmovusqd %xmm18, -1032(%rdx) - -// CHECK: vpmovusqd %ymm29, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x29] - vpmovusqd %ymm29, (%rcx) - -// CHECK: vpmovusqd %ymm29, (%rcx) {%k6} -// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x15,0x29] - vpmovusqd %ymm29, (%rcx) {%k6} - -// CHECK: vpmovusqd %ymm29, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x15,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovusqd %ymm29, 291(%rax,%r14,8) - -// CHECK: vpmovusqd %ymm29, 2032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x7f] - vpmovusqd %ymm29, 2032(%rdx) - -// CHECK: vpmovusqd %ymm29, 2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0x00,0x08,0x00,0x00] - vpmovusqd %ymm29, 2048(%rdx) - -// CHECK: vpmovusqd %ymm29, -2048(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x80] - vpmovusqd %ymm29, -2048(%rdx) - -// CHECK: vpmovusqd %ymm29, -2064(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0xf0,0xf7,0xff,0xff] - vpmovusqd %ymm29, -2064(%rdx) - -// CHECK: vpmovdb %xmm21, %xmm30 -// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x31,0xee] - vpmovdb %xmm21, %xmm30 - -// CHECK: vpmovdb %xmm21, %xmm30 {%k3} -// CHECK: encoding: [0x62,0x82,0x7e,0x0b,0x31,0xee] - vpmovdb %xmm21, %xmm30 {%k3} - -// CHECK: vpmovdb %xmm21, %xmm30 {%k3} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0x8b,0x31,0xee] - vpmovdb %xmm21, %xmm30 {%k3} {z} - -// CHECK: vpmovdb %ymm21, %xmm23 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x31,0xef] - vpmovdb %ymm21, %xmm23 - -// CHECK: vpmovdb %ymm21, %xmm23 {%k4} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x31,0xef] - vpmovdb %ymm21, %xmm23 {%k4} - -// CHECK: vpmovdb %ymm21, %xmm23 {%k4} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x31,0xef] - vpmovdb %ymm21, %xmm23 {%k4} {z} - -// CHECK: vpmovdb %xmm29, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x29] - vpmovdb %xmm29, (%rcx) - -// CHECK: vpmovdb %xmm29, (%rcx) {%k3} -// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x31,0x29] - vpmovdb %xmm29, (%rcx) {%k3} - -// CHECK: vpmovdb %xmm29, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x31,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovdb %xmm29, 291(%rax,%r14,8) - -// CHECK: vpmovdb %xmm29, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x7f] - vpmovdb %xmm29, 508(%rdx) - -// CHECK: vpmovdb %xmm29, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0x00,0x02,0x00,0x00] - vpmovdb %xmm29, 512(%rdx) - -// CHECK: vpmovdb %xmm29, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x80] - vpmovdb %xmm29, -512(%rdx) - -// CHECK: vpmovdb %xmm29, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0xfc,0xfd,0xff,0xff] - vpmovdb %xmm29, -516(%rdx) - -// CHECK: vpmovdb %ymm26, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x11] - vpmovdb %ymm26, (%rcx) - -// CHECK: vpmovdb %ymm26, (%rcx) {%k6} -// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x31,0x11] - vpmovdb %ymm26, (%rcx) {%k6} - -// CHECK: vpmovdb %ymm26, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x31,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovdb %ymm26, 291(%rax,%r14,8) - -// CHECK: vpmovdb %ymm26, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x7f] - vpmovdb %ymm26, 1016(%rdx) - -// CHECK: vpmovdb %ymm26, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0x00,0x04,0x00,0x00] - vpmovdb %ymm26, 1024(%rdx) - -// CHECK: vpmovdb %ymm26, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x80] - vpmovdb %ymm26, -1024(%rdx) - -// CHECK: vpmovdb %ymm26, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0xf8,0xfb,0xff,0xff] - vpmovdb %ymm26, -1032(%rdx) - -// CHECK: vpmovsdb %xmm27, %xmm30 -// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x21,0xde] - vpmovsdb %xmm27, %xmm30 - -// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} -// CHECK: encoding: [0x62,0x02,0x7e,0x09,0x21,0xde] - vpmovsdb %xmm27, %xmm30 {%k1} - -// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0x89,0x21,0xde] - vpmovsdb %xmm27, %xmm30 {%k1} {z} - -// CHECK: vpmovsdb %ymm27, %xmm26 -// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x21,0xda] - vpmovsdb %ymm27, %xmm26 - -// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} -// CHECK: encoding: [0x62,0x02,0x7e,0x2b,0x21,0xda] - vpmovsdb %ymm27, %xmm26 {%k3} - -// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0xab,0x21,0xda] - vpmovsdb %ymm27, %xmm26 {%k3} {z} - -// CHECK: vpmovsdb %xmm30, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x31] - vpmovsdb %xmm30, (%rcx) - -// CHECK: vpmovsdb %xmm30, (%rcx) {%k3} -// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x21,0x31] - vpmovsdb %xmm30, (%rcx) {%k3} - -// CHECK: vpmovsdb %xmm30, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovsdb %xmm30, 291(%rax,%r14,8) - -// CHECK: vpmovsdb %xmm30, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x7f] - vpmovsdb %xmm30, 508(%rdx) - -// CHECK: vpmovsdb %xmm30, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0x00,0x02,0x00,0x00] - vpmovsdb %xmm30, 512(%rdx) - -// CHECK: vpmovsdb %xmm30, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x80] - vpmovsdb %xmm30, -512(%rdx) - -// CHECK: vpmovsdb %xmm30, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0xfc,0xfd,0xff,0xff] - vpmovsdb %xmm30, -516(%rdx) - -// CHECK: vpmovsdb %ymm25, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x09] - vpmovsdb %ymm25, (%rcx) - -// CHECK: vpmovsdb %ymm25, (%rcx) {%k5} -// CHECK: encoding: [0x62,0x62,0x7e,0x2d,0x21,0x09] - vpmovsdb %ymm25, (%rcx) {%k5} - -// CHECK: vpmovsdb %ymm25, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x21,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovsdb %ymm25, 291(%rax,%r14,8) - -// CHECK: vpmovsdb %ymm25, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x7f] - vpmovsdb %ymm25, 1016(%rdx) - -// CHECK: vpmovsdb %ymm25, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0x00,0x04,0x00,0x00] - vpmovsdb %ymm25, 1024(%rdx) - -// CHECK: vpmovsdb %ymm25, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x80] - vpmovsdb %ymm25, -1024(%rdx) - -// CHECK: vpmovsdb %ymm25, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0xf8,0xfb,0xff,0xff] - vpmovsdb %ymm25, -1032(%rdx) - -// CHECK: vpmovusdb %xmm29, %xmm30 -// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x11,0xee] - vpmovusdb %xmm29, %xmm30 - -// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} -// CHECK: encoding: [0x62,0x02,0x7e,0x0f,0x11,0xee] - vpmovusdb %xmm29, %xmm30 {%k7} - -// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0x8f,0x11,0xee] - vpmovusdb %xmm29, %xmm30 {%k7} {z} - -// CHECK: vpmovusdb %ymm17, %xmm23 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x11,0xcf] - vpmovusdb %ymm17, %xmm23 - -// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x11,0xcf] - vpmovusdb %ymm17, %xmm23 {%k6} - -// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x11,0xcf] - vpmovusdb %ymm17, %xmm23 {%k6} {z} - -// CHECK: vpmovusdb %xmm26, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x11] - vpmovusdb %xmm26, (%rcx) - -// CHECK: vpmovusdb %xmm26, (%rcx) {%k7} -// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x11,0x11] - vpmovusdb %xmm26, (%rcx) {%k7} - -// CHECK: vpmovusdb %xmm26, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00] - vpmovusdb %xmm26, 291(%rax,%r14,8) - -// CHECK: vpmovusdb %xmm26, 508(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x7f] - vpmovusdb %xmm26, 508(%rdx) - -// CHECK: vpmovusdb %xmm26, 512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0x00,0x02,0x00,0x00] - vpmovusdb %xmm26, 512(%rdx) - -// CHECK: vpmovusdb %xmm26, -512(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x80] - vpmovusdb %xmm26, -512(%rdx) - -// CHECK: vpmovusdb %xmm26, -516(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0xfc,0xfd,0xff,0xff] - vpmovusdb %xmm26, -516(%rdx) - -// CHECK: vpmovusdb %ymm25, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x09] - vpmovusdb %ymm25, (%rcx) - -// CHECK: vpmovusdb %ymm25, (%rcx) {%k6} -// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x11,0x09] - vpmovusdb %ymm25, (%rcx) {%k6} - -// CHECK: vpmovusdb %ymm25, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x11,0x8c,0xf0,0x23,0x01,0x00,0x00] - vpmovusdb %ymm25, 291(%rax,%r14,8) - -// CHECK: vpmovusdb %ymm25, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x7f] - vpmovusdb %ymm25, 1016(%rdx) - -// CHECK: vpmovusdb %ymm25, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0x00,0x04,0x00,0x00] - vpmovusdb %ymm25, 1024(%rdx) - -// CHECK: vpmovusdb %ymm25, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x80] - vpmovusdb %ymm25, -1024(%rdx) - -// CHECK: vpmovusdb %ymm25, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0xf8,0xfb,0xff,0xff] - vpmovusdb %ymm25, -1032(%rdx) - -// CHECK: vpmovdw %xmm25, %xmm17 -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x33,0xc9] - vpmovdw %xmm25, %xmm17 - -// CHECK: vpmovdw %xmm25, %xmm17 {%k5} -// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x33,0xc9] - vpmovdw %xmm25, %xmm17 {%k5} - -// CHECK: vpmovdw %xmm25, %xmm17 {%k5} {z} -// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x33,0xc9] - vpmovdw %xmm25, %xmm17 {%k5} {z} - -// CHECK: vpmovdw %ymm19, %xmm25 -// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x33,0xd9] - vpmovdw %ymm19, %xmm25 - -// CHECK: vpmovdw %ymm19, %xmm25 {%k4} -// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x33,0xd9] - vpmovdw %ymm19, %xmm25 {%k4} - -// CHECK: vpmovdw %ymm19, %xmm25 {%k4} {z} -// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x33,0xd9] - vpmovdw %ymm19, %xmm25 {%k4} {z} - -// CHECK: vpmovdw %xmm21, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x29] - vpmovdw %xmm21, (%rcx) - -// CHECK: vpmovdw %xmm21, (%rcx) {%k2} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x33,0x29] - vpmovdw %xmm21, (%rcx) {%k2} - -// CHECK: vpmovdw %xmm21, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x33,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovdw %xmm21, 291(%rax,%r14,8) - -// CHECK: vpmovdw %xmm21, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x7f] - vpmovdw %xmm21, 1016(%rdx) - -// CHECK: vpmovdw %xmm21, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0x00,0x04,0x00,0x00] - vpmovdw %xmm21, 1024(%rdx) - -// CHECK: vpmovdw %xmm21, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x80] - vpmovdw %xmm21, -1024(%rdx) - -// CHECK: vpmovdw %xmm21, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0xf8,0xfb,0xff,0xff] - vpmovdw %xmm21, -1032(%rdx) - -// CHECK: vpmovdw %ymm22, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x31] - vpmovdw %ymm22, (%rcx) - -// CHECK: vpmovdw %ymm22, (%rcx) {%k6} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x33,0x31] - vpmovdw %ymm22, (%rcx) {%k6} - -// CHECK: vpmovdw %ymm22, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x33,0xb4,0xf0,0x23,0x01,0x00,0x00] - vpmovdw %ymm22, 291(%rax,%r14,8) - -// CHECK: vpmovdw %ymm22, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x7f] - vpmovdw %ymm22, 2032(%rdx) - -// CHECK: vpmovdw %ymm22, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0x00,0x08,0x00,0x00] - vpmovdw %ymm22, 2048(%rdx) - -// CHECK: vpmovdw %ymm22, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x80] - vpmovdw %ymm22, -2048(%rdx) - -// CHECK: vpmovdw %ymm22, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0xf0,0xf7,0xff,0xff] - vpmovdw %ymm22, -2064(%rdx) - -// CHECK: vpmovsdw %xmm18, %xmm18 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x23,0xd2] - vpmovsdw %xmm18, %xmm18 - -// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} -// CHECK: encoding: [0x62,0xa2,0x7e,0x0e,0x23,0xd2] - vpmovsdw %xmm18, %xmm18 {%k6} - -// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x8e,0x23,0xd2] - vpmovsdw %xmm18, %xmm18 {%k6} {z} - -// CHECK: vpmovsdw %ymm18, %xmm20 -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0xd4] - vpmovsdw %ymm18, %xmm20 - -// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} -// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x23,0xd4] - vpmovsdw %ymm18, %xmm20 {%k2} - -// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x23,0xd4] - vpmovsdw %ymm18, %xmm20 {%k2} {z} - -// CHECK: vpmovsdw %xmm29, (%rcx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x29] - vpmovsdw %xmm29, (%rcx) - -// CHECK: vpmovsdw %xmm29, (%rcx) {%k1} -// CHECK: encoding: [0x62,0x62,0x7e,0x09,0x23,0x29] - vpmovsdw %xmm29, (%rcx) {%k1} - -// CHECK: vpmovsdw %xmm29, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x23,0xac,0xf0,0x23,0x01,0x00,0x00] - vpmovsdw %xmm29, 291(%rax,%r14,8) - -// CHECK: vpmovsdw %xmm29, 1016(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x7f] - vpmovsdw %xmm29, 1016(%rdx) - -// CHECK: vpmovsdw %xmm29, 1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0x00,0x04,0x00,0x00] - vpmovsdw %xmm29, 1024(%rdx) - -// CHECK: vpmovsdw %xmm29, -1024(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x80] - vpmovsdw %xmm29, -1024(%rdx) - -// CHECK: vpmovsdw %xmm29, -1032(%rdx) -// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0xf8,0xfb,0xff,0xff] - vpmovsdw %xmm29, -1032(%rdx) - -// CHECK: vpmovsdw %ymm19, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x19] - vpmovsdw %ymm19, (%rcx) - -// CHECK: vpmovsdw %ymm19, (%rcx) {%k6} -// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x23,0x19] - vpmovsdw %ymm19, (%rcx) {%k6} - -// CHECK: vpmovsdw %ymm19, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0x9c,0xf0,0x23,0x01,0x00,0x00] - vpmovsdw %ymm19, 291(%rax,%r14,8) - -// CHECK: vpmovsdw %ymm19, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x7f] - vpmovsdw %ymm19, 2032(%rdx) - -// CHECK: vpmovsdw %ymm19, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0x00,0x08,0x00,0x00] - vpmovsdw %ymm19, 2048(%rdx) - -// CHECK: vpmovsdw %ymm19, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x80] - vpmovsdw %ymm19, -2048(%rdx) - -// CHECK: vpmovsdw %ymm19, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0xf0,0xf7,0xff,0xff] - vpmovsdw %ymm19, -2064(%rdx) - -// CHECK: vpmovusdw %xmm18, %xmm18 -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xd2] - vpmovusdw %xmm18, %xmm18 - -// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} -// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x13,0xd2] - vpmovusdw %xmm18, %xmm18 {%k2} - -// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} {z} -// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x13,0xd2] - vpmovusdw %xmm18, %xmm18 {%k2} {z} - -// CHECK: vpmovusdw %ymm25, %xmm28 -// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x13,0xcc] - vpmovusdw %ymm25, %xmm28 - -// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} -// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x13,0xcc] - vpmovusdw %ymm25, %xmm28 {%k4} - -// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} {z} -// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x13,0xcc] - vpmovusdw %ymm25, %xmm28 {%k4} {z} - -// CHECK: vpmovusdw %xmm20, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x21] - vpmovusdw %xmm20, (%rcx) - -// CHECK: vpmovusdw %xmm20, (%rcx) {%k6} -// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x13,0x21] - vpmovusdw %xmm20, (%rcx) {%k6} - -// CHECK: vpmovusdw %xmm20, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xa4,0xf0,0x23,0x01,0x00,0x00] - vpmovusdw %xmm20, 291(%rax,%r14,8) - -// CHECK: vpmovusdw %xmm20, 1016(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x7f] - vpmovusdw %xmm20, 1016(%rdx) - -// CHECK: vpmovusdw %xmm20, 1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0x00,0x04,0x00,0x00] - vpmovusdw %xmm20, 1024(%rdx) - -// CHECK: vpmovusdw %xmm20, -1024(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x80] - vpmovusdw %xmm20, -1024(%rdx) - -// CHECK: vpmovusdw %xmm20, -1032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0xf8,0xfb,0xff,0xff] - vpmovusdw %xmm20, -1032(%rdx) - -// CHECK: vpmovusdw %ymm23, (%rcx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x39] - vpmovusdw %ymm23, (%rcx) - -// CHECK: vpmovusdw %ymm23, (%rcx) {%k1} -// CHECK: encoding: [0x62,0xe2,0x7e,0x29,0x13,0x39] - vpmovusdw %ymm23, (%rcx) {%k1} - -// CHECK: vpmovusdw %ymm23, 291(%rax,%r14,8) -// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x13,0xbc,0xf0,0x23,0x01,0x00,0x00] - vpmovusdw %ymm23, 291(%rax,%r14,8) - -// CHECK: vpmovusdw %ymm23, 2032(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x7f] - vpmovusdw %ymm23, 2032(%rdx) - -// CHECK: vpmovusdw %ymm23, 2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0x00,0x08,0x00,0x00] - vpmovusdw %ymm23, 2048(%rdx) - -// CHECK: vpmovusdw %ymm23, -2048(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x80] - vpmovusdw %ymm23, -2048(%rdx) - -// CHECK: vpmovusdw %ymm23, -2064(%rdx) -// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0xf0,0xf7,0xff,0xff] - vpmovusdw %ymm23, -2064(%rdx) - // CHECK: vrndscalepd $171, %xmm28, %xmm29 // CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab] vrndscalepd $0xab, %xmm28, %xmm29