[X86][AVX512CD] add mask broadcast intrinsics
authorAsaf Badouh <asaf.badouh@intel.com>
Wed, 18 Nov 2015 09:42:45 +0000 (09:42 +0000)
committerAsaf Badouh <asaf.badouh@intel.com>
Wed, 18 Nov 2015 09:42:45 +0000 (09:42 +0000)
Differential Revision: http://reviews.llvm.org/D14573

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253450 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/Disassembler/X86Disassembler.cpp
lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512cd-intrinsics.ll [new file with mode: 0644]
test/CodeGen/X86/avx512cdvl-intrinsics.ll

index 2b5577c..df3f993 100644 (file)
@@ -4955,6 +4955,24 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
   def int_x86_avx512_pbroadcastq_i64_512 :
          Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw512">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw256">,
+          Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_128 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw128">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb512">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb256">,
+          Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_128 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb128">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Vector sign and zero extend
index 63929b1..ce8fcf1 100644 (file)
@@ -833,8 +833,12 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM256:
   case TYPE_XMM512:
   case TYPE_VK1:
+  case TYPE_VK2:
+  case TYPE_VK4:
   case TYPE_VK8:
   case TYPE_VK16:
+  case TYPE_VK32:
+  case TYPE_VK64:
   case TYPE_DEBUGREG:
   case TYPE_CONTROLREG:
   case TYPE_BNDR:
index a607315..6140c5a 100644 (file)
@@ -1488,8 +1488,12 @@ static int readModRM(struct InternalInstruction* insn) {
     case TYPE_XMM:                                        \
       return prefix##_XMM0 + index;                       \
     case TYPE_VK1:                                        \
+    case TYPE_VK2:                                        \
+    case TYPE_VK4:                                        \
     case TYPE_VK8:                                        \
     case TYPE_VK16:                                       \
+    case TYPE_VK32:                                       \
+    case TYPE_VK64:                                       \
       if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_K0 + index;                         \
index 2be3551..31401f2 100644 (file)
@@ -16463,6 +16463,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               DataToCompress),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case BROADCASTM: {
+      SDValue Mask = Op.getOperand(1);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits());
+      Mask = DAG.getBitcast(MaskVT, Mask);
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
+    }
     case BLEND: {
       SDValue Mask = Op.getOperand(3);
       MVT VT = Op.getSimpleValueType();
@@ -20108,6 +20114,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
   case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
+  case X86ISD::VBROADCASTM:        return "X86ISD::VBROADCASTM";
   case X86ISD::SUBV_BROADCAST:     return "X86ISD::SUBV_BROADCAST";
   case X86ISD::VEXTRACT:           return "X86ISD::VEXTRACT";
   case X86ISD::VPERMILPV:          return "X86ISD::VPERMILPV";
index c800f16..c1b6328 100644 (file)
@@ -403,6 +403,8 @@ namespace llvm {
       VFPCLASS, 
       // Broadcast scalar to vector
       VBROADCAST,
+      // Broadcast mask to vector
+      VBROADCASTM,
       // Broadcast subvector to vector
       SUBV_BROADCAST,
       // Insert/Extract vector element
index 0069dd0..96da043 100644 (file)
@@ -1113,30 +1113,27 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
 //===----------------------------------------------------------------------===//
 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
 //---
-
-multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
-                       RegisterClass KRC> {
-let Predicates = [HasCDI] in
-def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V512;
-
-let Predicates = [HasCDI, HasVLX] in {
-def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128X:$dst), (ins KRC:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V128;
-def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256X:$dst), (ins KRC:$src),
+multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
+                                  X86VectorVTInfo _, RegisterClass KRC> {
+  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V256;
+                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
 }
+
+multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 
+                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
+  let Predicates = [HasCDI] in
+    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
+  let Predicates = [HasCDI, HasVLX] in {
+    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
+    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
+  }
 }
 
-let Predicates = [HasCDI] in {
 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
-                                             VK16>;
+                                               avx512vl_i32_info, VK16>;
 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
-                                             VK8>, VEX_W;
-}
+                                               avx512vl_i64_info, VK8>, VEX_W;
 
 //===----------------------------------------------------------------------===//
 // -- VPERM2I - 3 source operands form --
index 90710bf..68891fa 100644 (file)
@@ -295,7 +295,8 @@ def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                               SDTCisInt<2>, SDTCisInt<3>]>;
 
 def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
+                                          SDTCisInt<0>, SDTCisInt<1>]>;
 
 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                              SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
@@ -387,6 +388,7 @@ def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST",
                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>, []>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
 def X86Vinsert   : SDNode<"X86ISD::VINSERT",  SDTypeProfile<1, 3,
                               [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
 def X86Vextract   : SDNode<"X86ISD::VEXTRACT",  SDTypeProfile<1, 2,
index 337c46c..b4df8da 100644 (file)
@@ -30,7 +30,7 @@ enum IntrinsicType {
   COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
   TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
   EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
-  TERLOG_OP_MASK, TERLOG_OP_MASKZ
+  TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM
 };
 
 struct IntrinsicData {
@@ -315,6 +315,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2sd32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
diff --git a/test/CodeGen/X86/avx512cd-intrinsics.ll b/test/CodeGen/X86/avx512cd-intrinsics.ll
new file mode 100644 (file)
index 0000000..29f17bb
--- /dev/null
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s\r
+\r
+define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {\r
+  ; CHECK: test_x86_vbroadcastmw_512\r
+  ; CHECK: vpbroadcastmw2d %k0, %zmm0\r
+  %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ; \r
+  ret <16 x i32> %res\r
+}\r
+declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)\r
+\r
+define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {\r
+  ; CHECK: test_x86_broadcastmb_512\r
+  ; CHECK: vpbroadcastmb2q %k0, %zmm0\r
+  %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ; \r
+  ret <8 x i64> %res\r
+}\r
+declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)\r
+\r
index b82782c..14e91e1 100644 (file)
@@ -146,3 +146,34 @@ define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i
   ret <4 x i64> %res2
 }
 
+define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_256
+  ; CHECK: vpbroadcastmw2d %k0, %ymm0
+  %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
+
+define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_128
+  ; CHECK: vpbroadcastmw2d %k0, %xmm0
+  %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
+
+define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_256
+  ; CHECK: vpbroadcastmb2q %k0, %ymm0
+  %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
+
+define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_128
+  ; CHECK: vpbroadcastmb2q %k0, %xmm0
+  %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)