[X86][AVX512DQ] add scalar fpclass
authorAsaf Badouh <asaf.badouh@intel.com>
Sun, 18 Oct 2015 11:04:38 +0000 (11:04 +0000)
committerAsaf Badouh <asaf.badouh@intel.com>
Sun, 18 Oct 2015 11:04:38 +0000 (11:04 +0000)
Differential Revision: http://reviews.llvm.org/D13769

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250650 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512dq-intrinsics.ll
test/MC/X86/x86-64-avx512dq.s

index 938c02c933f26efedb772297748ec002137e48c3..1851e49de06ad729ff11028b3407932ada4db7ac 100644 (file)
@@ -1705,6 +1705,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
          GCCBuiltin<"__builtin_ia32_fpclassps512_mask">,
           Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],
           [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_sd : 
+         GCCBuiltin<"__builtin_ia32_fpclasssd">,
+          Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_ss : 
+         GCCBuiltin<"__builtin_ia32_fpclassss">,
+          Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
 }
 
 // Vector extract sign mask
index b1bafba047c82882d75cb20461029bae59e382c4..a1137cf8bef81bcf8193237c980114de74cc0779 100644 (file)
@@ -16012,6 +16012,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
 
   if (Op.getOpcode() == X86ISD::FSETCC)
     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
+  if (Op.getOpcode() == X86ISD::VFPCLASS)
+    return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
 
   if (PreservedSrc.getOpcode() == ISD::UNDEF)
     PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
@@ -16357,6 +16359,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                  DAG.getIntPtrConstant(0, dl));
        return DAG.getBitcast(Op.getValueType(), Res);
     }
+    case FPCLASSS: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Imm = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+      SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
+      SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
+        DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
+      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
+    }
     case CMP_MASK:
     case CMP_MASK_CC: {
       // Comparison intrinsics with masks.
index 31a7e95df151b67bdf5d85b4ce669115d743fb83..a5a904873e531f85ab95d27e687d5e198deb39ed 100644 (file)
@@ -1803,6 +1803,42 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
 
 // ----------------------------------------------------------------
 // FPClass
+//handle fpclass instruction  mask =  op(reg_scalar,imm)
+//                                    op(mem_scalar,imm)
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                 X86VectorVTInfo _, Predicate prd> {
+  let Predicates = [prd] in {
+      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2)))], NoItinerary>;
+      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#
+                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, 
+                                      (OpNode (_.VT _.RC:$src1),
+                                      (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+    let mayLoad = 1, AddedComplexity = 20 in {
+      def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##
+                                "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,
+                            (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                    (i32 imm:$src2)))], NoItinerary>;
+      def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##
+                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, 
+                          (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                              (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+    }
+  }
+}
+
 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
 //                                  fpclass(reg_vec, mem_vec, imm)
 //                                  fpclass(reg_vec, broadcast(eltVt), imm)
@@ -1873,15 +1909,19 @@ multiclass avx512_vector_fpclass_all<string OpcodeStr,
 }
 
 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
-                                 SDNode OpNode, Predicate prd>{
+             bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
   defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec, 
-                                      OpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
+                                      VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
   defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec, 
-                                      OpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
+                                      VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
+  defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+                                      f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
+  defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+                                      f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
 }
 
-defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, X86Vfpclass, HasDQI>,
-                                      AVX512AIi8Base,EVEX;
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
+                                      X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
 
 //-----------------------------------------------------------------
 // Mask register copy, including
index ca67a2cff7243089ceeff54b4f6fa4108b4f75c4..3eb5a6e528105a3587ce2c2d8c1ca32ed3666ed1 100644 (file)
@@ -376,6 +376,8 @@ def X86VGetMant    : SDNode<"X86ISD::VGETMANT",  SDTFPUnaryOpImmRound>;
 def X86Vfpclass    : SDNode<"X86ISD::VFPCLASS", 
                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
                                             SDTCisVec<1>, SDTCisInt<2>]>, []>;
+def X86Vfpclasss   : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCisInt<0>,
+                              SDTCisFP<1>, SDTCisInt<2>]>,[]>;
 
 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
index 74d6e4e6d73da8aa9697b5463da04be89fc4896a..2d8d9364c860722561c9655e3d846e4fea244bc1 100644 (file)
@@ -18,7 +18,7 @@ namespace llvm {
 
 enum IntrinsicType {
   INTR_NO_TYPE,
-  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS,
+  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
@@ -688,6 +688,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0), 
   X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0), 
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASS, 0),\r
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
index 7348229d35aae05f6c7a6e81a851d2c9e433ca1b..b36f1ef52c18834aa1efb25d490dff01f9d3394b 100644 (file)
@@ -467,3 +467,37 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
        %res2 = add i16 %res, %res1
        ret i16 %res2
 }
+
+declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vfpclasssd
+; CHECK: %k0 {%k1}
+; CHECK: vfpclasssd
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
+; CHECK-NOT: call 
+; CHECK: kmovw 
+; CHECK: vfpclassss
+; CHECK: %k0
+; CHECK: {%k1}
+; CHECK: kmovw
+; CHECK: vfpclassss
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
index fef90032f6888d33a10d4a73356845aba02861e6..5c9f579e97c8f4578cd9ae5d1faf989e95957205 100644 (file)
 // CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
 // CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
           vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+
+// CHECK: vfpclasssd $171, %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0xab]
+          vfpclasssd $0xab, %xmm28, %k4
+
+// CHECK: vfpclasssd $171, %xmm28, %k4 {%k3}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x0b,0x67,0xe4,0xab]
+          vfpclasssd $0xab, %xmm28, %k4 {%k3}
+
+// CHECK: vfpclasssd $123,  %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0x7b]
+          vfpclasssd $0x7b, %xmm28, %k4
+
+// CHECK: vfpclasssd $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x21,0x7b]
+          vfpclasssd $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasssd $123, 291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x7f,0x7b]
+          vfpclasssd $0x7b,1016(%rdx), %k4
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x80,0x7b]
+          vfpclasssd $0x7b,-1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasssd $0x7b,-1032(%rdx), %k4
+
+// CHECK: vfpclassss $171, %xmm26, %k5
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0xab]
+          vfpclassss $0xab, %xmm26, %k5
+
+// CHECK: vfpclassss $171, %xmm26, %k5 {%k4}
+// CHECK:  encoding: [0x62,0x93,0x7d,0x0c,0x67,0xea,0xab]
+          vfpclassss $0xab, %xmm26, %k5 {%k4}
+
+// CHECK: vfpclassss $123,  %xmm26, %k5
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0x7b]
+          vfpclassss $0x7b, %xmm26, %k5
+
+// CHECK: vfpclassss $123, (%rcx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x29,0x7b]
+          vfpclassss $0x7b,(%rcx), %k5
+
+// CHECK: vfpclassss $123, 291(%rax,%r14,8), %k5
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x67,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclassss $0x7b,291(%rax,%r14,8), %k5
+
+// CHECK: vfpclassss $123, 508(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x7f,0x7b]
+          vfpclassss $0x7b,508(%rdx), %k5
+
+// CHECK: vfpclassss $123, 512(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vfpclassss $0x7b,512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -512(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x80,0x7b]
+          vfpclassss $0x7b,-512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -516(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclassss $0x7b,-516(%rdx), %k5
+
+// CHECK: vfpclasssd $171, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0xab]
+          vfpclasssd $0xab, %xmm20, %k3
+
+// CHECK: vfpclasssd $171, %xmm20, %k3 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x0e,0x67,0xdc,0xab]
+          vfpclasssd $0xab, %xmm20, %k3 {%k6}
+
+// CHECK: vfpclasssd $123, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0x7b]
+          vfpclasssd $0x7b, %xmm20, %k3
+
+// CHECK: vfpclasssd $123, (%rcx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x19,0x7b]
+          vfpclasssd $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasssd $123, 4660(%rax,%r14,8), %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x7f,0x7b]
+          vfpclasssd $0x7b,1016(%rdx), %k3
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x80,0x7b]
+          vfpclasssd $0x7b,-1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasssd $0x7b,-1032(%rdx), %k3
+
+// CHECK: vfpclassss $171, %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0xab]
+          vfpclassss $0xab, %xmm28, %k4
+
+// CHECK: vfpclassss $171, %xmm28, %k4 {%k6}
+// CHECK:  encoding: [0x62,0x93,0x7d,0x0e,0x67,0xe4,0xab]
+          vfpclassss $0xab, %xmm28, %k4 {%k6}
+
+// CHECK: vfpclassss $123,  %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0x7b]
+          vfpclassss $0x7b, %xmm28, %k4
+
+// CHECK: vfpclassss $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x21,0x7b]
+          vfpclassss $0x7b,(%rcx), %k4
+
+// CHECK: vfpclassss $123, 4660(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclassss $0x7b,4660(%rax,%r14,8), %k4
+
+// CHECK: vfpclassss $123, 508(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x7f,0x7b]
+          vfpclassss $0x7b,508(%rdx), %k4
+
+// CHECK: vfpclassss $123, 512(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vfpclassss $0x7b,512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -512(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x80,0x7b]
+          vfpclassss $0x7b,-512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -516(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclassss $0x7b,-516(%rdx), %k4