From 3922da8ae8fab29de6416eeeebf21208b1491557 Mon Sep 17 00:00:00 2001 From: Robert Khasanov Date: Wed, 23 Jul 2014 14:49:42 +0000 Subject: [PATCH] [SKX] Enabling mask instructions: encoding, lowering KMOVB, KMOVW, KMOVD, KMOVQ, KNOTB, KNOTW, KNOTD, KNOTQ Reviewed by Elena Demikhovsky git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213757 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 ++ lib/Target/X86/X86InstrAVX512.td | 124 +++++++++++++++++++----- lib/Target/X86/X86InstrInfo.cpp | 2 + test/CodeGen/X86/avx512-mask-op.ll | 40 +++++++- test/MC/X86/avx512-encodings.s | 48 +++++++++ utils/TableGen/X86RecognizableInstr.cpp | 4 + 6 files changed, 199 insertions(+), 28 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e9f48a5a58c..cba145208da 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1505,6 +1505,11 @@ void X86TargetLowering::resetOperationActions() { } }// has AVX-512 + if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) { + addRegisterClass(MVT::v32i1, &X86::VK32RegClass); + addRegisterClass(MVT::v64i1, &X86::VK64RegClass); + } + // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion // of this type with custom code. for (int VT = MVT::FIRST_VECTOR_VALUETYPE; @@ -2312,6 +2317,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = &X86::VK8RegClass; else if (RegVT == MVT::v16i1) RC = &X86::VK16RegClass; + else if (RegVT == MVT::v32i1) + RC = &X86::VK32RegClass; + else if (RegVT == MVT::v64i1) + RC = &X86::VK64RegClass; else llvm_unreachable("Unknown argument type!"); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 92d536356d4..4956be5c66c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1031,14 +1031,14 @@ def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), // multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, string OpcodeStr, RegisterClass KRC, - ValueType vt, X86MemOperand x86memop> { + ValueType vvt, ValueType ivt, X86MemOperand x86memop> { let hasSideEffects = 0 in { def kk : I; let mayLoad = 1 in def km : I; + [(set KRC:$dst, (vvt (bitconvert (ivt (load addr:$src)))))]>; let mayStore = 1 in def mk : I; @@ -1056,33 +1056,79 @@ multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, } } -let Predicates = [HasAVX512] in { - defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, - VEX, PS; - defm KMOVW : avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, +let Predicates = [HasDQI] in + defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8, + i8mem>, + avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, + VEX, PD; + +let Predicates = [HasAVX512] in + defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16, + i16mem>, + avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, VEX, PS; + +let Predicates = [HasBWI] in { + defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1, i32, + i32mem>, VEX, PD, VEX_W; + defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, + VEX, XD; } +let Predicates = [HasBWI] in { + defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64, + i64mem>, VEX, PS, VEX_W; + defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, + VEX, XD, VEX_W; +} + +// GR from/to mask register +let Predicates = [HasDQI] in { + def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), + (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>; + def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), + (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>; +} let Predicates = [HasAVX512] in { - // GR16 from/to 16-bit mask def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>; def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>; +} +let Predicates = [HasBWI] in { + def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>; + def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>; +} +let Predicates = [HasBWI] in { + def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>; + def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>; +} - // Store kreg in memory - def : Pat<(store (v16i1 VK16:$src), addr:$dst), +// Load/store kreg +let Predicates = [HasDQI] in { + def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), + (KMOVBmk addr:$dst, VK8:$src)>; +} +let Predicates = [HasAVX512] in { + def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), (KMOVWmk addr:$dst, VK16:$src)>; - - def : Pat<(store VK8:$src, addr:$dst), + def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>; - def : Pat<(i1 (load addr:$src)), (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>; - - def : Pat<(v8i1 (load addr:$src)), + def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>; +} +let Predicates = [HasBWI] in { + def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst), + (KMOVDmk addr:$dst, VK32:$src)>; +} +let Predicates = [HasBWI] in { + def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst), + (KMOVQmk addr:$dst, VK64:$src)>; +} +let Predicates = [HasAVX512] in { def : Pat<(i1 (trunc (i32 GR32:$src))), (COPY_TO_REGCLASS (KMOVWkr (AND32ri $src, (i32 1))), VK1)>; @@ -1094,7 +1140,7 @@ let Predicates = [HasAVX512] in { (COPY_TO_REGCLASS (KMOVWkr (AND32ri (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))), VK1)>; - + def : Pat<(i32 (zext VK1:$src)), (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>; def : Pat<(i8 (zext VK1:$src)), @@ -1113,6 +1159,14 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i1 (scalar_to_vector VK1:$src)), (COPY_TO_REGCLASS VK1:$src, VK8)>; } +let Predicates = [HasBWI] in { + def : Pat<(v32i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK32)>; + def : Pat<(v64i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK64)>; +} + + // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { // GR from/to 8-bit mask without native support @@ -1129,26 +1183,38 @@ let Predicates = [HasAVX512] in { (COPY_TO_REGCLASS VK16:$src, VK1)>; def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), (COPY_TO_REGCLASS VK8:$src, VK1)>; - +} +let Predicates = [HasBWI] in { + def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK32:$src, VK1)>; + def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK64:$src, VK1)>; } // Mask unary operation // - KNOT multiclass avx512_mask_unop opc, string OpcodeStr, - RegisterClass KRC, SDPatternOperator OpNode> { - let Predicates = [HasAVX512] in + RegisterClass KRC, SDPatternOperator OpNode, + Predicate prd> { + let Predicates = [prd] in def rr : I; } -multiclass avx512_mask_unop_w opc, string OpcodeStr, - SDPatternOperator OpNode> { - defm W : avx512_mask_unop, - VEX, PS; +multiclass avx512_mask_unop_all opc, string OpcodeStr, + SDPatternOperator OpNode> { + defm B : avx512_mask_unop, VEX, PD; + defm W : avx512_mask_unop, VEX, PS; + defm D : avx512_mask_unop, VEX, PD, VEX_W; + defm Q : avx512_mask_unop, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; +defm KNOT : avx512_mask_unop_all<0x44, "knot", not>; multiclass avx512_mask_unop_int { let Predicates = [HasAVX512] in @@ -1159,14 +1225,24 @@ multiclass avx512_mask_unop_int { } defm : avx512_mask_unop_int<"knot", "KNOT">; +let Predicates = [HasDQI] in +def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>; +let Predicates = [HasAVX512] in def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; +let Predicates = [HasBWI] in +def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>; +let Predicates = [HasBWI] in +def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>; + +// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit +let Predicates = [HasAVX512] in { def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; -// With AVX-512, 8-bit mask is promoted to 16-bit mask. def : Pat<(not VK8:$src), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; +} // Mask binary operation // - KAND, KANDN, KOR, KXNOR, KXOR diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0d3afc43c2b..d03d76ad1f0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3067,6 +3067,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, inline static bool MaskRegClassContains(unsigned Reg) { return X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || + X86::VK32RegClass.contains(Reg) || + X86::VK64RegClass.contains(Reg) || X86::VK1RegClass.contains(Reg); } static diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index dd33ffdb640..24a266b3644 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -5,8 +5,10 @@ define i16 @mask16(i16 %x) { %m1 = xor <16 x i1> %m0, %ret = bitcast <16 x i1> %m1 to i16 ret i16 %ret -; CHECK: mask16 -; CHECK: knotw +; CHECK-LABEL: mask16 +; CHECK: kmovw +; CHECK-NEXT: knotw +; CHECK-NEXT: kmovw ; CHECK: ret } @@ -15,8 +17,38 @@ define i8 @mask8(i8 %x) { %m1 = xor <8 x i1> %m0, %ret = bitcast <8 x i1> %m1 to i8 ret i8 %ret -; CHECK: mask8 -; CHECK: knotw +; CHECK-LABEL: mask8 +; CHECK: kmovw +; CHECK-NEXT: knotw +; CHECK-NEXT: kmovw +; CHECK: ret +} + +define void @mask16_mem(i16* %ptr) { + %x = load i16* %ptr, align 4 + %m0 = bitcast i16 %x to <16 x i1> + %m1 = xor <16 x i1> %m0, + %ret = bitcast <16 x i1> %m1 to i16 + store i16 %ret, i16* %ptr, align 4 + ret void +; CHECK-LABEL: mask16_mem +; CHECK: kmovw (%rdi), %k{{[0-7]}} +; CHECK-NEXT: knotw +; CHECK-NEXT: kmovw %k{{[0-7]}}, (%rdi) +; CHECK: ret +} + +define void @mask8_mem(i8* %ptr) { + %x = load i8* %ptr, align 4 + %m0 = bitcast i8 %x to <8 x i1> + %m1 = xor <8 x i1> %m0, + %ret = bitcast <8 x i1> %m1 to i8 + store i8 %ret, i8* %ptr, align 4 + ret void +; CHECK-LABEL: mask8_mem +; CHECK: kmovw (%rdi), %k{{[0-7]}} +; CHECK-NEXT: knotw +; CHECK-NEXT: kmovw %k{{[0-7]}}, (%rdi) ; CHECK: ret } diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 187b51264c4..e7eabab5f93 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -2489,6 +2489,54 @@ // CHECK: encoding: [0x62,0x71,0x24,0x50,0x5c,0xb2,0xfc,0xfd,0xff,0xff] vsubps -516(%rdx){1to16}, %zmm27, %zmm14 +// CHECK: knotw %k6, %k3 +// CHECK: encoding: [0xc5,0xf8,0x44,0xde] + knotw %k6, %k3 + +// check: kmovw %k5, %k4 +// check: encoding: [0xc5,0xf8,0x90,0xe5] + kmovw %k5, %k4 + +// check: kmovw (%rcx), %k4 +// check: encoding: [0xc5,0xf8,0x90,0x21] + kmovw (%rcx), %k4 + +// check: kmovw 291(%rax,%r14,8), %k4 +// check: encoding: [0xc4,0xa1,0x78,0x90,0xa4,0xf0,0x23,0x01,0x00,0x00] + kmovw 291(%rax,%r14,8), %k4 + +// check: kmovw %k4, (%rcx) +// check: encoding: [0xc5,0xf8,0x91,0x21] + kmovw %k4, (%rcx) + +// check: kmovw %k4, 291(%rax,%r14,8) +// check: encoding: [0xc4,0xa1,0x78,0x91,0xa4,0xf0,0x23,0x01,0x00,0x00] + kmovw %k4, 291(%rax,%r14,8) + +// check: kmovw %eax, %k3 +// check: encoding: [0xc5,0xf8,0x92,0xd8] + kmovw %eax, %k3 + +// check: kmovw %ebp, %k3 +// check: encoding: [0xc5,0xf8,0x92,0xdd] + kmovw %ebp, %k3 + +// check: kmovw %r13d, %k3 +// check: encoding: [0xc4,0xc1,0x78,0x92,0xdd] + kmovw %r13d, %k3 + +// check: kmovw %k2, %eax +// check: encoding: [0xc5,0xf8,0x93,0xc2] + kmovw %k2, %eax + +// check: kmovw %k2, %ebp +// check: encoding: [0xc5,0xf8,0x93,0xea] + kmovw %k2, %ebp + +// check: kmovw %k2, %r13d +// check: encoding: [0xc5,0x78,0x93,0xea] + kmovw %k2, %r13d + // CHECK: vpmovqb %zmm2, %xmm3 // CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x32,0xd3] vpmovqb %zmm2, %xmm3 diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index b7bd822d31d..2261dc877a9 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -1059,6 +1059,8 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s, ENCODING("VK1", ENCODING_RM) ENCODING("VK8", ENCODING_RM) ENCODING("VK16", ENCODING_RM) + ENCODING("VK32", ENCODING_RM) + ENCODING("VK64", ENCODING_RM) errs() << "Unhandled R/M register encoding " << s << "\n"; llvm_unreachable("Unhandled R/M register encoding"); } @@ -1087,6 +1089,8 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s, ENCODING("VK1", ENCODING_REG) ENCODING("VK8", ENCODING_REG) ENCODING("VK16", ENCODING_REG) + ENCODING("VK32", ENCODING_REG) + ENCODING("VK64", ENCODING_REG) ENCODING("VK1WM", ENCODING_REG) ENCODING("VK8WM", ENCODING_REG) ENCODING("VK16WM", ENCODING_REG) -- 2.34.1