From 633f98bdfa266871dcd17ab27af1594c6cc31d9e Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Sun, 3 Nov 2013 13:46:31 +0000 Subject: [PATCH] AVX-512: added VPCONFLICT instruction and intrinsics, added EVEX_KZ to tablegen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193959 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 28 ++++++++ .../X86DisassemblerDecoderCommon.h | 50 ++++++++++++- lib/Target/X86/X86InstrAVX512.td | 70 +++++++++++++++++++ test/CodeGen/X86/avx512-intrinsics.ll | 23 ++++++ utils/TableGen/X86DisassemblerTables.cpp | 35 +++++++++- utils/TableGen/X86RecognizableInstr.cpp | 7 +- utils/TableGen/X86RecognizableInstr.h | 2 + 7 files changed, 209 insertions(+), 6 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index c24fb0638f4..07e46815ebf 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2995,6 +2995,34 @@ let TargetPrefix = "x86" in { []>; } +// AVX-512 conflict detection +let TargetPrefix = "x86" in { + def int_x86_avx512_conflict_d_512 : GCCBuiltin<"__builtin_ia32_condlictd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], + []>; + def int_x86_avx512_conflict_d_mask_512 : + GCCBuiltin<"__builtin_ia32_mask_condlictd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, + llvm_v16i1_ty, llvm_v16i32_ty], + []>; + def int_x86_avx512_conflict_d_maskz_512: + GCCBuiltin<"__builtin_ia32_maskz_condlictd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v16i1_ty, llvm_v16i32_ty], + []>; + + def int_x86_avx512_conflict_q_512 : GCCBuiltin<"__builtin_ia32_condlictq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], + []>; + def int_x86_avx512_conflict_q_mask_512 : + GCCBuiltin<"__builtin_ia32_mask_condlictq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i1_ty, llvm_v8i64_ty], + []>; + def int_x86_avx512_conflict_q_maskz_512: + GCCBuiltin<"__builtin_ia32_maskz_condlictq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty], + []>; +} // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_mskblendps512">, diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 86a90ee49d3..dd1719c64d7 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -221,7 +221,55 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \ ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") #define ENUM_ENTRY(n, r, d) n, typedef enum { diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5e854da1a0c..6ce5c38dd2a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3397,3 +3397,73 @@ defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512, defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_conflict opc, string OpcodeStr, + RegisterClass RC, RegisterClass KRC, PatFrag memop_frag, + X86MemOperand x86memop, PatFrag scalar_mfrag, + X86MemOperand x86scalar_mop, string BrdcstStr, + Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> { + def rr : AVX5128I, EVEX; + def rm : AVX5128I, EVEX; + def rmb : AVX5128I, EVEX, EVEX_B; + def rrkz : AVX5128I, EVEX, EVEX_KZ; + def rmkz : AVX5128I, + EVEX, EVEX_KZ; + def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; + + let Constraints = "$src1 = $dst" in { + def rrk : AVX5128I, EVEX, EVEX_K; + def rmk : AVX5128I, EVEX, EVEX_K; + def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; + } +} + +let Predicates = [HasCDI] in { +defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, + memopv16i32, i512mem, loadi32, i32mem, "{1to16}", + int_x86_avx512_conflict_d_512, + int_x86_avx512_conflict_d_mask_512, + int_x86_avx512_conflict_d_maskz_512>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, + memopv8i64, i512mem, loadi64, i64mem, "{1to8}", + int_x86_avx512_conflict_q_512, + int_x86_avx512_conflict_q_mask_512, + int_x86_avx512_conflict_q_maskz_512>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 8a512487069..0570b21e78e 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -317,3 +317,26 @@ define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) { ret <8 x i64> %res } declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly + +define <16 x i32> @test_conflict_d(<16 x i32> %a) { + ; CHECK: vpconflictd + %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly + +define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { + ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z} + %vmask = bitcast i16 %mask to <16 x i1> + %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a) + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly + +define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { + ; CHECK: vpconflictq {{.*}} {%k1} + %vmask = bitcast i8 %mask to <8 x i1> + %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a) + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp index 026870e65bc..bdb47931859 100644 --- a/utils/TableGen/X86DisassemblerTables.cpp +++ b/utils/TableGen/X86DisassemblerTables.cpp @@ -128,7 +128,7 @@ static inline bool inheritsFrom(InstructionContext child, inheritsFrom(child, IC_EVEX_L_W_XD); case IC_EVEX_OPSIZE: return inheritsFrom(child, IC_EVEX_W_OPSIZE) || - inheritsFrom(child, IC_EVEX_W_OPSIZE); + inheritsFrom(child, IC_EVEX_L_W_OPSIZE); case IC_EVEX_W: case IC_EVEX_W_XS: case IC_EVEX_W_XD: @@ -176,10 +176,24 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_L_XD_K: case IC_EVEX_L_OPSIZE_K: return false; + case IC_EVEX_W_KZ: + case IC_EVEX_W_XS_KZ: + case IC_EVEX_W_XD_KZ: + case IC_EVEX_W_OPSIZE_KZ: + return false; + case IC_EVEX_L_KZ: + case IC_EVEX_L_XS_KZ: + case IC_EVEX_L_XD_KZ: + case IC_EVEX_L_OPSIZE_KZ: + return false; case IC_EVEX_L_W_K: case IC_EVEX_L_W_XS_K: case IC_EVEX_L_W_XD_K: case IC_EVEX_L_W_OPSIZE_K: + case IC_EVEX_L_W_KZ: + case IC_EVEX_L_W_XS_KZ: + case IC_EVEX_L_W_XD_KZ: + case IC_EVEX_L_W_OPSIZE_KZ: return false; case IC_EVEX_L2_K: case IC_EVEX_L2_B: @@ -187,12 +201,25 @@ static inline bool inheritsFrom(InstructionContext child, case IC_EVEX_L2_XD_K: case IC_EVEX_L2_OPSIZE_K: case IC_EVEX_L2_OPSIZE_B: + case IC_EVEX_L2_OPSIZE_K_B: + case IC_EVEX_L2_KZ: + case IC_EVEX_L2_XS_KZ: + case IC_EVEX_L2_XD_KZ: + case IC_EVEX_L2_OPSIZE_KZ: + case IC_EVEX_L2_OPSIZE_KZ_B: return false; case IC_EVEX_L2_W_K: + case IC_EVEX_L2_W_B: case IC_EVEX_L2_W_XS_K: case IC_EVEX_L2_W_XD_K: case IC_EVEX_L2_W_OPSIZE_K: case IC_EVEX_L2_W_OPSIZE_B: + case IC_EVEX_L2_W_OPSIZE_K_B: + case IC_EVEX_L2_W_KZ: + case IC_EVEX_L2_W_XS_KZ: + case IC_EVEX_L2_W_XD_KZ: + case IC_EVEX_L2_W_OPSIZE_KZ: + case IC_EVEX_L2_W_OPSIZE_KZ_B: return false; default: llvm_unreachable("Unknown instruction class"); @@ -213,7 +240,8 @@ static inline bool outranks(InstructionContext upper, #define ENUM_ENTRY(n, r, d) r, #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) \ - ENUM_ENTRY(n##_K_B, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d) + ENUM_ENTRY(n##_K_B, r, d) ENUM_ENTRY(n##_KZ_B, r, d) \ + ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d) static int ranks[IC_max] = { INSTRUCTION_CONTEXTS }; @@ -235,7 +263,8 @@ static inline const char* stringForContext(InstructionContext insnContext) { llvm_unreachable("Unhandled instruction class"); #define ENUM_ENTRY(n, r, d) case n: return #n; break; #define ENUM_ENTRY_K_B(n, r, d) ENUM_ENTRY(n, r, d) ENUM_ENTRY(n##_K_B, r, d)\ - ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d) + ENUM_ENTRY(n##_KZ, r, d) ENUM_ENTRY(n##_K, r, d) ENUM_ENTRY(n##_B, r, d)\ + ENUM_ENTRY(n##_KZ_B, r, d) INSTRUCTION_CONTEXTS #undef ENUM_ENTRY #undef ENUM_ENTRY_K_B diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index b9666f131c0..708e72d36e1 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -244,6 +244,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables, HasEVEXPrefix = Rec->getValueAsBit("hasEVEXPrefix"); HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2"); HasEVEX_K = Rec->getValueAsBit("hasEVEX_K"); + HasEVEX_KZ = Rec->getValueAsBit("hasEVEX_Z"); HasEVEX_B = Rec->getValueAsBit("hasEVEX_B"); HasLockPrefix = Rec->getValueAsBit("hasLockPrefix"); IsCodeGenOnly = Rec->getValueAsBit("isCodeGenOnly"); @@ -304,8 +305,10 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables, recogInstr.emitDecodePath(tables); } -#define EVEX_KB(n) (HasEVEX_K && HasEVEX_B? n##_K_B : \ - (HasEVEX_K? n##_K : (HasEVEX_B ? n##_B : n))) +#define EVEX_KB(n) (HasEVEX_KZ && HasEVEX_B ? n##_KZ_B : \ + (HasEVEX_K && HasEVEX_B ? n##_K_B : \ + (HasEVEX_KZ ? n##_KZ : \ + (HasEVEX_K? n##_K : (HasEVEX_B ? n##_B : n))))) InstructionContext RecognizableInstr::insnContext() const { InstructionContext insnContext; diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h index 7e1d362e6a2..4d4686e0739 100644 --- a/utils/TableGen/X86RecognizableInstr.h +++ b/utils/TableGen/X86RecognizableInstr.h @@ -72,6 +72,8 @@ private: bool HasEVEX_L2Prefix; /// The hasEVEX_K field from the record bool HasEVEX_K; + /// The hasEVEX_KZ field from the record + bool HasEVEX_KZ; /// The hasEVEX_B field from the record bool HasEVEX_B; /// The hasLockPrefix field from the record -- 2.34.1