[X86][AVX512] add reduce/range/scalef/rndScale
authorAsaf Badouh <asaf.badouh@intel.com>
Wed, 22 Jul 2015 12:00:43 +0000 (12:00 +0000)
committerAsaf Badouh <asaf.badouh@intel.com>
Wed, 22 Jul 2015 12:00:43 +0000 (12:00 +0000)
include encoding and intrinsics

Differential Revision: http://reviews.llvm.org/D11222

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242896 91177308-0d34-0410-b5e6-96231b3b80d8

14 files changed:
include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512-intrinsics.ll
test/CodeGen/X86/avx512dq-intrinsics.ll
test/CodeGen/X86/avx512dqvl-intrinsics.ll
test/CodeGen/X86/avx512vl-intrinsics.ll
test/MC/X86/avx512-encodings.s
test/MC/X86/x86-64-avx512dq.s
test/MC/X86/x86-64-avx512dq_vl.s
test/MC/X86/x86-64-avx512f_vl.s

index 8eb254f07937cbdb3495c7878893ecafc0bfd3c7..352a592bbd84eff40e4e73ebb8e0250442cfcd8b 100644 (file)
@@ -4221,12 +4221,60 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8i64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
+  def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
+                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, 
+                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
+                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
+                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, 
+                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+  def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, 
+                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
                                      llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, 
+                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
+                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, 
+                                    llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
+                                    llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
+                                    llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
+                                    llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, 
+                                    llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
+                                     llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Vector load with broadcast
@@ -4508,7 +4556,28 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
                                      [IntrNoMem]>;
-
+  def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess_mask">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd_mask">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, 
+                                      llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, 
+                                      llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, 
                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
index 4b904bbcd63244dff6cde72ab4414884db73cdae..ea05a2f8bcd1b7a2901a739a3acd960358af16e8 100644 (file)
@@ -15460,6 +15460,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1, Src2, Rnd),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case INTR_TYPE_3OP_MASK_RM: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Imm = Op.getOperand(3);
+      SDValue PassThru = Op.getOperand(4);
+      SDValue Mask = Op.getOperand(5);
+      // We specify 2 possible modes for intrinsics, with/without rounding modes.
+      // First, we check if the intrinsic have rounding mode (7 operands),
+      // if not, we set rounding mode to "current".
+      SDValue Rnd;
+      if (Op.getNumOperands() == 7)
+        Rnd = Op.getOperand(6);
+      else
+        Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+        Src1, Src2, Imm, Rnd),
+        Mask, PassThru, Subtarget, DAG);
+    }
     case INTR_TYPE_3OP_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
@@ -19039,7 +19057,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FNMSUB_RND:         return "X86ISD::FNMSUB_RND";
   case X86ISD::FMADDSUB_RND:       return "X86ISD::FMADDSUB_RND";
   case X86ISD::FMSUBADD_RND:       return "X86ISD::FMSUBADD_RND";
-  case X86ISD::RNDSCALE:           return "X86ISD::RNDSCALE";
+  case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
+  case X86ISD::VREDUCE:            return "X86ISD::VREDUCE";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
   case X86ISD::XTEST:              return "X86ISD::XTEST";
index 47b3ce0d4ec9432db751d6335a167d482a74498f..7fab19386827ff82d9c85b21c0c3efec608098fa 100644 (file)
@@ -386,6 +386,10 @@ namespace llvm {
       VFIXUPIMM,
       //Range Restriction Calculation For Packed Pairs of Float32/64 values
       VRANGE,
+      // Reduce - Perform Reduction Transformation on scalar\packed FP
+      VREDUCE,
+      // RndScale - Round FP Values To Include A Given Number Of Fraction Bits
+      VRNDSCALE,
       // Broadcast scalar to vector
       VBROADCAST,
       // Broadcast subvector to vector
@@ -419,7 +423,6 @@ namespace llvm {
       FNMSUB_RND,
       FMADDSUB_RND,
       FMSUBADD_RND,
-      RNDSCALE,
 
       // Compress and expand
       COMPRESS,
index 58334a86b6655f7b8ec3927049ebdddef02199d1..215dcebe6610dfe69214a1509228b3ff39f86bfc 100644 (file)
@@ -3394,7 +3394,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
 }
 
 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
-                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
+                         SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
 
   defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -3569,13 +3569,34 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
   }//let mayLoad = 1
 }
 
-multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+                  "$src2, $src1", "$src1, $src2",
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
+  let mayLoad = 1 in {
+    defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+                    "$src2, $src1", "$src1, $src2",
+                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
+  }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
   defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>, 
              avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
                               EVEX_V512, EVEX_CD8<32, CD8VF>;
   defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>, 
              avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
+                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
+                              EVEX_4V,EVEX_CD8<32, CD8VT1>;
+  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
+                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
+                              EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+
   // Define only if AVX512VL feature is present.
   let Predicates = [HasVLX] in {
     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
@@ -3588,7 +3609,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
   }
 }
-defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  VPTESTM instructions
@@ -5481,47 +5502,6 @@ let Predicates = [HasAVX512] in {
             (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
 }
 
-
-multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
-                            X86MemOperand x86memop, RegisterClass RC,
-                            PatFrag mem_frag, Domain d> {
-let ExeDomain = d in {
-  // Intrinsic operation, reg.
-  // Vector intrinsic operation, reg
-  def r : AVX512AIi8<opc, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX;
-
-  // Vector intrinsic operation, mem
-  def m : AVX512AIi8<opc, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX;
-} // ExeDomain
-}
-
-defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
-                                loadv16f32, SSEPackedSingle>, EVEX_V512,
-                                EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
-                   imm:$src2, (v16f32 VR512:$src1), (i16 -1),
-                   FROUND_CURRENT)),
-                   (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
-
-
-defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
-                                loadv8f64, SSEPackedDouble>, EVEX_V512,
-                                VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
-                  imm:$src2, (v8f64 VR512:$src1), (i8 -1),
-                  FROUND_CURRENT)),
-                   (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
-
 multiclass
 avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
 
@@ -5529,20 +5509,20 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
                            "$src3, $src2, $src1", "$src1, $src2, $src3",
-                           (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                             (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
 
   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
-                         "{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
-                         (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
+                         (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                          (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
 
   let mayLoad = 1 in
   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
                          "$src3, $src2, $src1", "$src1, $src2, $src3",
-                         (_.VT (X86RndScale (_.VT _.RC:$src1),
+                         (_.VT (X86RndScales (_.VT _.RC:$src1),
                           (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
                           (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
   }
@@ -5587,29 +5567,6 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
                                 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
 
-let Predicates = [HasAVX512] in {
-def : Pat<(v16f32 (ffloor VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
-def : Pat<(v16f32 (fnearbyint VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
-def : Pat<(v16f32 (fceil VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
-def : Pat<(v16f32 (frint VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
-def : Pat<(v16f32 (ftrunc VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
-
-def : Pat<(v8f64 (ffloor VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
-def : Pat<(v8f64 (fnearbyint VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
-def : Pat<(v8f64 (fceil VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
-def : Pat<(v8f64 (frint VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
-def : Pat<(v8f64 (ftrunc VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-}
 //-------------------------------------------------
 // Integer truncate and extend operations
 //-------------------------------------------------
@@ -6321,6 +6278,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
                                          EVEX, VEX_W;
 
+//handle instruction  reg_vec1 = op(reg_vec,imm)
+//                               op(mem_vec,imm)
+//                               op(broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _>{
+  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>;
+  let mayLoad = 1 in {
+    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+                      (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>;
+    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
+                      "${src1}"##_.BroadcastStr##", $src2",
+                      (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>, EVEX_B;
+  }
+}
+
+//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+                                             SDNode OpNode, X86VectorVTInfo _>{
+  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+                      "$src1, {sae}, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2),
+                              (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+
+multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
+            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+  let Predicates = [prd] in {
+    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+                                  EVEX_V512;
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+                                  EVEX_V128;
+    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+                                  EVEX_V256;
+  }
+}
+
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
 //                               op(reg_vec2,mem_vec,imm)
 //                               op(reg_vec2,broadcast(eltVt),imm)
@@ -6328,27 +6341,27 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                                             X86VectorVTInfo _>{
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
   let mayLoad = 1 in {
     defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (bitconvert (_.LdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
     defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
                       "$src1, ${src2}"##_.BroadcastStr##", $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>, EVEX_B;
   }
 }
@@ -6388,20 +6401,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                                            X86VectorVTInfo _> {
 
   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
   let mayLoad = 1 in {
     defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (scalar_to_vector
                                         (_.ScalarLdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
 
     let isAsmParserOnly = 1 in {
@@ -6417,18 +6430,25 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3,{sae}, $src2, $src1",
                       "$src1, $src2,{sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_NO_EXC))>, EVEX_B;
 }
 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _> {
-  defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
+                      OpcodeStr, "$src3,{sae}, $src2, $src1",
+                      "$src1, $src2,{sae}, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (i32 imm:$src3),
+                              (i32 FROUND_NO_EXC))>, EVEX_B;
 }
 
 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
@@ -6466,6 +6486,14 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
   }
 }
 
+multiclass avx512_common_fp_sae_packed_imm_all<string OpcodeStr, bits<8> opcPs,
+             bits<8> opcPd, SDNode OpNode, Predicate prd>{
+  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr,  avx512vl_f32_info, opcPs, 
+                                            OpNode, prd>, EVEX_CD8<32, CD8VF>;
+  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr,  avx512vl_f64_info, opcPd, 
+                                            OpNode, prd>,EVEX_CD8<64, CD8VF> , VEX_W;
+}
+
 defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
                               avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6480,6 +6508,9 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
                                                  0x55, X86VFixupimm, HasAVX512>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+defm VREDUCE : avx512_common_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, X86VReduce, HasDQI>,AVX512AIi8Base,EVEX;
+defm VRNDSCALE : avx512_common_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, X86VRndScale, HasAVX512>,AVX512AIi8Base, EVEX;
+
 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
                                                        0x50, X86VRange, HasDQI>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6494,6 +6525,12 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
                                                  0x51, X86VRange, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
+                                                 0x57, X86Reduces, HasDQI>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
+                                                 0x57, X86Reduces, HasDQI>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
 multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
                                        bits<8> opc, SDNode OpNode = X86Shuf128>{
@@ -6505,6 +6542,29 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
      defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
   }
 }
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 (ffloor VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+}
 
 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
index abb5314e4b754099e35d8b6ce886fe3ff97c0096..401b3267368a8823ce44f085d35382af1f8a53a9 100644 (file)
@@ -232,6 +232,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                  SDTCisSameAs<0,2>, SDTCisInt<3>]>;
 def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                              SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                              SDTCisInt<2>, SDTCisInt<3>]>;
 
 def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
 def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -302,6 +304,8 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VFixupimm       : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
 def X86VRange          : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VReduce          : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
+def X86VRndScale       : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
 
 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
@@ -346,7 +350,8 @@ def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
 
 def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",  STDFp2SrcRm>;
 def X86rcp28s    : SDNode<"X86ISD::RCP28",    STDFp2SrcRm>;
-def X86RndScale  : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>;
+def X86Reduces   : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>;
 
 def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                          SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
index 7321a7849126e93ff8fccf0229bf18d5b4fece2f..1383fa373068285a9b0222f8b9bcb512498fd91c 100644 (file)
@@ -22,7 +22,7 @@ enum IntrinsicType {
   INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
-  INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+  INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
   VPERM_3OP_MASKZ,
   INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
   EXPAND_FROM_MEM, BLEND
@@ -903,10 +903,32 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::RNDSCALE, 0),
+                     X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::RNDSCALE, 0),
+                     X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -919,6 +941,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
index d0b0a0075d9c09c3a72b2408b346e39f3eaf44c5..6e50fda746726ac7afd301be83b42d1e40c26c3b 100644 (file)
@@ -3352,3 +3352,29 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16
   ret <16 x i32> %res2
 }
 
+
+declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefss {{.*}}{%k1} 
+; CHECK: vscalefss     {rn-sae}
+define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vscalefsd {{.*}}{%k1} 
+; CHECK: vscalefsd     {rn-sae}
+define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
index 04cfeadb134a198ae464692553701ad71a139e35..67a88e155701bc9eeca8c65f37723275aa3df8cf 100644 (file)
@@ -1,3 +1,4 @@
+
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
 
 declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
@@ -192,3 +193,125 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x f
   ret <8 x float> %res2
 }
 
+declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducepd {{.*}}{%k1}
+; CHECK: vreducepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreduceps
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vreduceps
+define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangepd
+; CKECK: {%k1}
+; CHECK: vrangepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangeps
+; CKECK: {%k1}
+; CHECK: vrangeps
+; CHECK: {sae}
+define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducess
+; CKECK: {%k1}
+; CHECK: vreducess
+; CHECK: {sae}
+define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangess
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vrangess
+; CHECK: {sae} 
+define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducesd
+; CKECK: {%k1}
+; CHECK: vreducesd
+; CHECK: {sae} 
+define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangesd
+; CKECK: {%k1}
+; CHECK: vrangesd
+; CHECK: {sae} 
+define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
index a41560ca4656a45dfd7d69105a8365f51ccdcd30..2fcfac0f1bb01cc468ec5de09becd7d1665d3107 100644 (file)
@@ -1537,3 +1537,114 @@ define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x
   %res2 = add <4 x i64> %res, %res1
   ret <4 x i64> %res2
 }
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducepd {{.*}}{%k1} 
+; CHECK: vreducepd
+define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducepd {{.*}}{%k1} 
+; CHECK: vreducepd
+define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreduceps {{.*}}{%k1} 
+; CHECK: vreduceps
+define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreduceps {{.*}}{%k1} 
+; CHECK: vreduceps
+define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangepd {{.*}}{%k1} 
+; CHECK: vrangepd
+define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangepd {{.*}}{%k1} 
+; CHECK: vrangepd
+define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangeps {{.*}}{%k1} 
+; CHECK: vrangeps
+define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangeps {{.*}}{%k1} 
+; CHECK: vrangeps
+define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
index 72e1ac6e6f7c2bedb1d8b4fc04522b377483f7da..46ee51f47b62ddaddcac34ffd28f5900c5482bbc 100644 (file)
@@ -3481,3 +3481,55 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x f
   ret <8 x float> %res2
 }
 
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrndscalepd {{.*}}{%k1} 
+; CHECK: vrndscalepd
+define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrndscalepd {{.*}}{%k1} 
+; CHECK: vrndscalepd
+define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrndscaleps {{.*}}{%k1} 
+; CHECK: vrndscaleps
+define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrndscaleps {{.*}}{%k1} 
+; CHECK: vrndscaleps
+define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
index 3bb7a5bcd2c34e947bcda146659dc09eff786176..dc0e626d44039ebb7f19b30903cf5494bc6c9e92 100644 (file)
@@ -12846,6 +12846,342 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xf1,0xfd,0x58,0x5a,0xaa,0xf8,0xfb,0xff,0xff]
           vcvtpd2ps -1032(%rdx){1to8}, %ymm5
 
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x02,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x82,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+
+// CHECK: vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x10,0x2d,0xed]
+          vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x50,0x2d,0xed]
+          vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x30,0x2d,0xed]
+          vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x70,0x2d,0xed]
+          vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd (%rcx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x29]
+          vscalefsd (%rcx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1016(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x7f]
+          vscalefsd 1016(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1024(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0x00,0x04,0x00,0x00]
+          vscalefsd 1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1024(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x80]
+          vscalefsd -1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1032(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0xf8,0xfb,0xff,0xff]
+          vscalefsd -1032(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x08,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+// CHECK:  encoding: [0x62,0x32,0x05,0x0b,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+// CHECK:  encoding: [0x62,0x32,0x05,0x8b,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+
+// CHECK: vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x18,0x2d,0xef]
+          vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x58,0x2d,0xef]
+          vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x38,0x2d,0xef]
+          vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x78,0x2d,0xef]
+          vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss (%rcx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x29]
+          vscalefss (%rcx), %xmm15, %xmm13
+
+// CHECK: vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x08,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+
+// CHECK: vscalefss 508(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x7f]
+          vscalefss 508(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss 512(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0x00,0x02,0x00,0x00]
+          vscalefss 512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -512(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x80]
+          vscalefss -512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -516(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0xfc,0xfd,0xff,0xff]
+          vscalefss -516(%rdx), %xmm15, %xmm13
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x49,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22 {%k1}
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z}
+
+// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab]
+          vrndscalepd $0xab,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b]
+          vrndscalepd $0x7b, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b]
+          vrndscalepd $0x7b,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x31,0x7b]
+          vrndscalepd $0x7b, (%rcx), %zmm22
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x09,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x31,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x7f,0x7b]
+          vrndscalepd $0x7b, 8128(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0x00,0x20,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x80,0x7b]
+          vrndscalepd $0x7b, -8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -8256(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1}
+// CHECK:  encoding: [0x62,0x73,0x7d,0x49,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13 {%k1}
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} {z}
+// CHECK:  encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z}
+
+// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab]
+          vrndscaleps $0xab,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b]
+          vrndscaleps $0x7b, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b]
+          vrndscaleps $0x7b,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x29,0x7b]
+          vrndscaleps $0x7b, (%rcx), %zmm13
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %zmm13
+// CHECK:  encoding: [0x62,0x33,0x7d,0x48,0x08,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x29,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 8128(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x7f,0x7b]
+          vrndscaleps $0x7b, 8128(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x80,0x7b]
+          vrndscaleps $0x7b, -8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8256(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -8256(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6}
+// CHECK:  encoding: [0x62,0x43,0x9d,0x0e,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6}
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} {z}
+// CHECK:  encoding: [0x62,0x43,0x9d,0x8e,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} {z}
+
+// CHECK: vrndscalesd $171, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0xab]
+          vrndscalesd $0xab,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0x7b]
+          vrndscalesd $0x7b, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0x7b]
+          vrndscalesd $0x7b,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, (%rcx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x09,0x7b]
+          vrndscalesd $0x7b, (%rcx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 291(%rax,%r14,8), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x23,0x9d,0x08,0x0b,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalesd $0x7b, 291(%rax,%r14,8), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1016(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x7f,0x7b]
+          vrndscalesd $0x7b, 1016(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1024(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalesd $0x7b, 1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1024(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x80,0x7b]
+          vrndscalesd $0x7b, -1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1032(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalesd $0x7b, -1032(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3}
+// CHECK:  encoding: [0x62,0x33,0x25,0x0b,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3}
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} {z}
+// CHECK:  encoding: [0x62,0x33,0x25,0x8b,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} {z}
+
+// CHECK: vrndscaless $171, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0xab]
+          vrndscaless $0xab,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0x7b]
+          vrndscaless $0x7b, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0x7b]
+          vrndscaless $0x7b,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, (%rcx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x19,0x7b]
+          vrndscaless $0x7b, (%rcx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 291(%rax,%r14,8), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaless $0x7b, 291(%rax,%r14,8), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 508(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x7f,0x7b]
+          vrndscaless $0x7b, 508(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 512(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaless $0x7b, 512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -512(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x80,0x7b]
+          vrndscaless $0x7b, -512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -516(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaless $0x7b, -516(%rdx), %xmm11, %xmm11
+
 // CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30
 // CHECK:  encoding: [0x62,0x22,0x75,0x00,0x99,0xf6]
           vfmadd132ss %xmm22, %xmm17, %xmm30
index 4b26f7a0b80eb43adb1f08a2e2d49fb74b07d4dc..d4e847557bc21910950b5da3a2618accc04fa9e0 100644 (file)
 // CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
           vrangess $0x7b,-516(%rdx), %xmm24, %xmm25
 
+// CHECK: vreducepd $171, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x4e,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19 {%k6}
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19 {%k6} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab]
+          vreducepd $0xab,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b]
+          vreducepd $0x7b, %zmm19, %zmm19
+
+// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b]
+          vreducepd $0x7b,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x19,0x7b]
+          vreducepd $0x7b,(%rcx), %zmm19
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x19,0x7b]
+          vreducepd $0x7b,(%rcx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x7f,0x7b]
+          vreducepd $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vreducepd $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x80,0x7b]
+          vreducepd $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vreducepd $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to8}, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x4b,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19 {%k3}
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab]
+          vreduceps $0xab,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b]
+          vreduceps $0x7b, %zmm29, %zmm19
+
+// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b]
+          vreduceps $0x7b,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x19,0x7b]
+          vreduceps $0x7b,(%rcx), %zmm19
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x19,0x7b]
+          vreduceps $0x7b,(%rcx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x7f,0x7b]
+          vreduceps $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vreduceps $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x80,0x7b]
+          vreduceps $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vreduceps $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to16}, %zmm19
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0x83,0xf5,0x06,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6}
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab]
+          vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b]
+          vreducesd $0x7b, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b]
+          vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, (%rcx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x09,0x7b]
+          vreducesd $0x7b,(%rcx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xf5,0x00,0x57,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x7f,0x7b]
+          vreducesd $0x7b,1016(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducesd $0x7b,1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x80,0x7b]
+          vreducesd $0x7b,-1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducesd $0x7b,-1032(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x23,0x15,0x01,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1}
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab]
+          vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b]
+          vreducess $0x7b, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b]
+          vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x31,0x7b]
+          vreducess $0x7b,(%rcx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 291(%rax,%r14,8), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducess $0x7b,291(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x7f,0x7b]
+          vreducess $0x7b,508(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vreducess $0x7b,512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x80,0x7b]
+          vreducess $0x7b,-512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreducess $0x7b,-516(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducepd $171, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x4d,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18 {%k5}
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18 {%k5} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab]
+          vreducepd $0xab,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b]
+          vreducepd $0x7b, %zmm28, %zmm18
+
+// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b]
+          vreducepd $0x7b,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx), %zmm18
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %zmm18
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,8128(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vreducepd $0x7b,8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vreducepd $0x7b,-8256(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to8}, %zmm18
+
+// CHECK: vreduceps $171, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3}
+// CHECK:  encoding: [0x62,0x03,0x7d,0x4b,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26 {%k3}
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab]
+          vreduceps $0xab,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b]
+          vreduceps $0x7b, %zmm25, %zmm26
+
+// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b]
+          vreduceps $0x7b,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx), %zmm26
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %zmm26
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,8128(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vreduceps $0x7b,8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vreduceps $0x7b,-8256(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to16}, %zmm26
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3}
+// CHECK:  encoding: [0x62,0x03,0xe5,0x03,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3}
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab]
+          vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b]
+          vreducesd $0x7b, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b]
+          vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, (%rcx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x09,0x7b]
+          vreducesd $0x7b,(%rcx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 4660(%rax,%r14,8), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x23,0xe5,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducesd $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x7f,0x7b]
+          vreducesd $0x7b,1016(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducesd $0x7b,1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x80,0x7b]
+          vreducesd $0x7b,-1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducesd $0x7b,-1032(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x23,0x3d,0x02,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2}
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab]
+          vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b]
+          vreducess $0x7b, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b]
+          vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x31,0x7b]
+          vreducess $0x7b,(%rcx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 4660(%rax,%r14,8), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducess $0x7b,4660(%rax,%r14,8), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x7f,0x7b]
+          vreducess $0x7b,508(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vreducess $0x7b,512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x80,0x7b]
+          vreducess $0x7b,-512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreducess $0x7b,-516(%rdx), %xmm24, %xmm30
+
 // CHECK: vcvtpd2qq %zmm29, %zmm18
 // CHECK:  encoding: [0x62,0x81,0xfd,0x48,0x7b,0xd5]
           vcvtpd2qq %zmm29, %zmm18
index 17c37c08335cfc175840894e54245e21c9a0ed64..e9ffd493fadc905586104ff2eb945e192dea9f06 100644 (file)
 // CHECK:  encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
           vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24
 
+// CHECK: vreducepd $171, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x0b,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18 {%k3}
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x8b,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18 {%k3} {z}
+
+// CHECK: vreducepd $123, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0x7b]
+          vreducepd $0x7b, %xmm17, %xmm18
+
+// CHECK: vreducepd $123, (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx), %xmm18
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %xmm18
+
+// CHECK: vreducepd $171, %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x0c,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25 {%k4}
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x8c,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25 {%k4} {z}
+
+// CHECK: vreducepd $123, %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0x7b]
+          vreducepd $0x7b, %xmm28, %xmm25
+
+// CHECK: vreducepd $123, (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx), %xmm25
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %xmm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,2032(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vreducepd $0x7b,2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vreducepd $0x7b,-2064(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $171, %ymm17, %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x23,0xfd,0x2c,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28 {%k4}
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x23,0xfd,0xac,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28 {%k4} {z}
+
+// CHECK: vreducepd $123, %ymm17, %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0x7b]
+          vreducepd $0x7b, %ymm17, %ymm28
+
+// CHECK: vreducepd $123, (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x21,0x7b]
+          vreducepd $0x7b,(%rcx), %ymm28
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %ymm28
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x21,0x7b]
+          vreducepd $0x7b,(%rcx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x7f,0x7b]
+          vreducepd $0x7b,4064(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vreducepd $0x7b,4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x80,0x7b]
+          vreducepd $0x7b,-4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vreducepd $0x7b,-4128(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to4}, %ymm28
+
+// CHECK: vreduceps $171, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x0f,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29 {%k7}
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x8f,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0x7b]
+          vreduceps $0x7b, %xmm21, %xmm29
+
+// CHECK: vreduceps $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x29,0x7b]
+          vreduceps $0x7b,(%rcx), %xmm29
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %xmm29
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x29,0x7b]
+          vreduceps $0x7b,(%rcx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x7f,0x7b]
+          vreduceps $0x7b,2032(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vreduceps $0x7b,2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x80,0x7b]
+          vreduceps $0x7b,-2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vreduceps $0x7b,-2064(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $171, %ymm23, %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2b,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25 {%k3}
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xab,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25 {%k3} {z}
+
+// CHECK: vreduceps $123, %ymm23, %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0x7b]
+          vreduceps $0x7b, %ymm23, %ymm25
+
+// CHECK: vreduceps $123, (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x09,0x7b]
+          vreduceps $0x7b,(%rcx), %ymm25
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %ymm25
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x09,0x7b]
+          vreduceps $0x7b,(%rcx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x7f,0x7b]
+          vreduceps $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vreduceps $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x80,0x7b]
+          vreduceps $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vreduceps $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to8}, %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0x00,0x08,0x00,0x00,0x7b]
+          vreducepd $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+          vreducepd $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $171, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x29,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25 {%k1}
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0xa9,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25 {%k1} {z}
+
+// CHECK: vreducepd $123, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0x7b]
+          vreducepd $0x7b, %ymm29, %ymm25
+
+// CHECK: vreducepd $123, (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx), %ymm25
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vreducepd $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vreducepd $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to4}, %ymm25
+
+// CHECK: vreduceps $171, %xmm23, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x0f,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20 {%k7}
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x8f,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm23, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0x7b]
+          vreduceps $0x7b, %xmm23, %xmm20
+
+// CHECK: vreduceps $123, (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x21,0x7b]
+          vreduceps $0x7b,(%rcx), %xmm20
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %xmm20
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x21,0x7b]
+          vreduceps $0x7b,(%rcx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x7f,0x7b]
+          vreduceps $0x7b,2032(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vreduceps $0x7b,2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x80,0x7b]
+          vreduceps $0x7b,-2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vreduceps $0x7b,-2064(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $171, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2e,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26 {%k6}
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xae,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26 {%k6} {z}
+
+// CHECK: vreduceps $123, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0x7b]
+          vreduceps $0x7b, %ymm22, %ymm26
+
+// CHECK: vreduceps $123, (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx), %ymm26
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %ymm26
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,4064(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vreduceps $0x7b,4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vreduceps $0x7b,-4128(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to8}, %ymm26
+
 // CHECK: vcvtpd2qq %xmm22, %xmm24
 // CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x7b,0xc6]
           vcvtpd2qq %xmm22, %xmm24
index c746e6627f7a57bde2f4b0785b009249686d23ca..eca2ffbfc09f8029090c60354ab196f6b31ae306 100644 (file)
@@ -16285,6 +16285,246 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
           vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
 
+// CHECK: vrndscalepd $171, %xmm28, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x0c,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29 {%k4}
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x8c,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29 {%k4} {z}
+
+// CHECK: vrndscalepd $123, %xmm28, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0x7b]
+          vrndscalepd $0x7b, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x29,0x7b]
+          vrndscalepd $0x7b, (%rcx), %xmm29
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x09,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x29,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x7f,0x7b]
+          vrndscalepd $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x80,0x7b]
+          vrndscalepd $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x2f,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17 {%k7}
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xaf,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17 {%k7} {z}
+
+// CHECK: vrndscalepd $123, %ymm22, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0x7b]
+          vrndscalepd $0x7b, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x09,0x7b]
+          vrndscalepd $0x7b, (%rcx), %ymm17
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x09,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x7f,0x7b]
+          vrndscalepd $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x80,0x7b]
+          vrndscalepd $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x0c,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22 {%k4}
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x8c,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22 {%k4} {z}
+
+// CHECK: vrndscaleps $123, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0x7b]
+          vrndscaleps $0x7b, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x31,0x7b]
+          vrndscaleps $0x7b, (%rcx), %xmm22
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x08,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x31,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 2032(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x7f,0x7b]
+          vrndscaleps $0x7b, 2032(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x80,0x7b]
+          vrndscaleps $0x7b, -2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2064(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -2064(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x2f,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19 {%k7}
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0xaf,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19 {%k7} {z}
+
+// CHECK: vrndscaleps $123, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0x7b]
+          vrndscaleps $0x7b, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x19,0x7b]
+          vrndscaleps $0x7b, (%rcx), %ymm19
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x19,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 4064(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x7f,0x7b]
+          vrndscaleps $0x7b, 4064(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x80,0x7b]
+          vrndscaleps $0x7b, -4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4128(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -4128(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to8}, %ymm19
+
 // CHECK: vcvtps2pd %xmm27, %xmm20
 // CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x5a,0xe3]
           vcvtps2pd %xmm27, %xmm20