AVX-512: Implemented cvtsi2ss/d cvtusi2ss/d instructions with round control for KNL.
authorIgor Breger <igor.breger@intel.com>
Sun, 14 Jun 2015 12:44:55 +0000 (12:44 +0000)
committerIgor Breger <igor.breger@intel.com>
Sun, 14 Jun 2015 12:44:55 +0000 (12:44 +0000)
Added intrinsics for cvtsi2ss/d instructions.
Added tests for intrinsics and encoding.

Differential Revision: http://reviews.llvm.org/D10430

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@239694 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/avx512-intrinsics.ll
test/MC/X86/avx512-encodings.s

index cb0f53d71b8da534bb9309667850e78746331713..fd9ec43be6f6a2a7a9fc33b90171f4eca30b3786 100644 (file)
@@ -2998,6 +2998,20 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi642sd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
+                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
+                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">,
+                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
+                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
 }
 
 // Pack ops.
index 70221521e7cb5a7664c37948e67d0ade629ea3ff..9f1415dbdf137e062fc530fc82997ccc55d43b64 100644 (file)
@@ -18578,6 +18578,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";
   case X86ISD::ADDS:               return "X86ISD::ADDS";
   case X86ISD::SUBS:               return "X86ISD::SUBS";
+  case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
+  case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
   }
   return nullptr;
 }
index b5d062f72b24aaea17a57456e2f4cb9be52f2eb1..efda861e2ebc7414a21977cdc586cbe4a2e2565d 100644 (file)
@@ -417,6 +417,10 @@ namespace llvm {
       COMPRESS,
       EXPAND,
 
+      //Convert Unsigned/Integer to Scalar Floating-Point Value
+      //with rounding mode
+      SINT_TO_FP_RND,
+      UINT_TO_FP_RND,
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,
index b0e1880ab51341c62de964afd49f099ee8b94fef..2817d576d921b8a91cf6af57d3c1fc6ff7fd90e7 100644 (file)
@@ -4205,15 +4205,32 @@ let hasSideEffects = 0 in {
 } // hasSideEffects = 0
 }
 
+multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                    X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> {
+  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
+              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
+              !strconcat(asm,"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
+              [(set DstVT.RC:$dst,
+                    (OpNode (DstVT.VT DstVT.RC:$src1),
+                             SrcRC:$src2,
+                             (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+                    X86VectorVTInfo DstVT, X86MemOperand x86memop, string asm> {
+  defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, x86memop, asm>,
+              avx512_vcvtsi<opc, SrcRC, DstVT.FRC, x86memop, asm>, VEX_LIG;
+}
+
 let Predicates = [HasAVX512] in {
-defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
-                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">,
-                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
-defm VCVTSI2SDZ   : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">,
-                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">,
-                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+           v4f32x_info, i32mem, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+           v4f32x_info, i64mem, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
+           v2f64x_info, i32mem, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
+           v2f64x_info, i64mem, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -4233,14 +4250,14 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
 def : Pat<(f64 (sint_to_fp GR64:$src)),
           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
 
-defm VCVTUSI2SSZ   : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">,
-                                  XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">,
-                                  XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+          v4f32x_info, i32mem, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+          v4f32x_info, i64mem, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>;
 defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">,
-                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">,
-                                  XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+          XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+          v2f64x_info, i64mem, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
index dfe58ef8067b45f807a6d4da7004c77a26d3a362..f158d1233d967e2db82ee08c3c9cc46cb5741204 100644 (file)
@@ -350,6 +350,12 @@ def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
                               [SDTCisSameAs<0, 3>,
                                SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
 
+def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
+                               SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+
+def X86SintToFpRnd   : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
+def X86SuintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
+
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
 //===----------------------------------------------------------------------===//
index 0268066c2ba19b98197ca0915fc5e49b28dbdbf8..86fa0cf84845fb3d18f477776978523d96603715 100644 (file)
@@ -242,6 +242,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
index 9387192f8aa44d90d2edfc4e8274d120a9fdf52c..22a1e58e697683a1fc961ec67ebb5ee657dbdc11 100644 (file)
@@ -2807,3 +2807,43 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
   ret <2 x double> %res
 }
+
+define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
+; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwind readnone
+
+define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
+; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone
+
+define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
+; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
+
+define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
+; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone
index ca0fccb2e3efad2be49838086b09dd6007a89f6c..83789c78d6ea80fd27bf6c15f82ec3e8b04849f5 100644 (file)
@@ -8812,4 +8812,496 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xe2,0x1d,0x50,0x36,0xb2,0xfc,0xfd,0xff,0xff]
           vpermd -516(%rdx){1to16}, %zmm28, %zmm22
 
+// CHECK:  vcvtsi2sdl %eax, %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xf8]
+          vcvtsi2sd %eax, %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl %ebp, %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xfd]
+          vcvtsi2sd %ebp, %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl %r13d, %xmm10, %xmm7
+// CHECK:  encoding: [0xc4,0xc1,0x2b,0x2a,0xfd]
+          vcvtsi2sd %r13d, %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl (%rcx), %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0x39]
+          vcvtsi2sdl (%rcx), %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl 291(%rax,%r14,8), %xmm10, %xmm7
+// CHECK:  encoding: [0xc4,0xa1,0x2b,0x2a,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsi2sdl 291(%rax,%r14,8), %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl 508(%rdx), %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xba,0xfc,0x01,0x00,0x00]
+          vcvtsi2sdl 508(%rdx), %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl 512(%rdx), %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xba,0x00,0x02,0x00,0x00]
+          vcvtsi2sdl 512(%rdx), %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl -512(%rdx), %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xba,0x00,0xfe,0xff,0xff]
+          vcvtsi2sdl -512(%rdx), %xmm10, %xmm7
+
+// CHECK: vcvtsi2sdl -516(%rdx), %xmm10, %xmm7
+// CHECK:  encoding: [0xc5,0xab,0x2a,0xba,0xfc,0xfd,0xff,0xff]
+          vcvtsi2sdl -516(%rdx), %xmm10, %xmm7
+// CHECK: vcvtsi2sdq %rax, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0xe8]
+          vcvtsi2sd %rax, %xmm12, %xmm29
 
+// CHECK: vcvtsi2sdq %rax,  {rn-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x18,0x2a,0xe8]
+          vcvtsi2sd %rax,  {rn-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %rax,  {ru-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x58,0x2a,0xe8]
+          vcvtsi2sd %rax,  {ru-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %rax,  {rd-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x38,0x2a,0xe8]
+          vcvtsi2sd %rax,  {rd-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %rax,  {rz-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x78,0x2a,0xe8]
+          vcvtsi2sd %rax,  {rz-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %r8, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x41,0x9f,0x08,0x2a,0xe8]
+          vcvtsi2sd %r8, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %r8,  {rn-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x41,0x9f,0x18,0x2a,0xe8]
+          vcvtsi2sd %r8,  {rn-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %r8,  {ru-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x41,0x9f,0x58,0x2a,0xe8]
+          vcvtsi2sd %r8,  {ru-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %r8,  {rd-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x41,0x9f,0x38,0x2a,0xe8]
+          vcvtsi2sd %r8,  {rd-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq %r8,  {rz-sae}, %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x41,0x9f,0x78,0x2a,0xe8]
+          vcvtsi2sd %r8,  {rz-sae}, %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq (%rcx), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0x29]
+          vcvtsi2sdq (%rcx), %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq 291(%rax,%r14,8), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x9f,0x08,0x2a,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsi2sdq 291(%rax,%r14,8), %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq 1016(%rdx), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0x6a,0x7f]
+          vcvtsi2sdq 1016(%rdx), %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq 1024(%rdx), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0xaa,0x00,0x04,0x00,0x00]
+          vcvtsi2sdq 1024(%rdx), %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq -1024(%rdx), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0x6a,0x80]
+          vcvtsi2sdq -1024(%rdx), %xmm12, %xmm29
+
+// CHECK: vcvtsi2sdq -1032(%rdx), %xmm12, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x9f,0x08,0x2a,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtsi2sdq -1032(%rdx), %xmm12, %xmm29
+
+// CHECK: vcvtsi2ssl %eax, %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xf8]
+          vcvtsi2ss %eax, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %eax,  {rn-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x18,0x2a,0xf8]
+          vcvtsi2ss %eax,  {rn-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %eax,  {ru-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x58,0x2a,0xf8]
+          vcvtsi2ss %eax,  {ru-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %eax,  {rd-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x38,0x2a,0xf8]
+          vcvtsi2ss %eax,  {rd-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %eax,  {rz-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x78,0x2a,0xf8]
+          vcvtsi2ss %eax,  {rz-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %ebp, %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xfd]
+          vcvtsi2ss %ebp, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %ebp,  {rn-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x18,0x2a,0xfd]
+          vcvtsi2ss %ebp,  {rn-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %ebp,  {ru-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x58,0x2a,0xfd]
+          vcvtsi2ss %ebp,  {ru-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %ebp,  {rd-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x38,0x2a,0xfd]
+          vcvtsi2ss %ebp,  {rd-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %ebp,  {rz-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x71,0x2e,0x78,0x2a,0xfd]
+          vcvtsi2ss %ebp,  {rz-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %r13d, %xmm10, %xmm15
+// CHECK:  encoding: [0xc4,0x41,0x2a,0x2a,0xfd]
+          vcvtsi2ss %r13d, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %r13d,  {rn-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x51,0x2e,0x18,0x2a,0xfd]
+          vcvtsi2ss %r13d,  {rn-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %r13d,  {ru-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x51,0x2e,0x58,0x2a,0xfd]
+          vcvtsi2ss %r13d,  {ru-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %r13d,  {rd-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x51,0x2e,0x38,0x2a,0xfd]
+          vcvtsi2ss %r13d,  {rd-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl %r13d,  {rz-sae}, %xmm10, %xmm15
+// CHECK:  encoding: [0x62,0x51,0x2e,0x78,0x2a,0xfd]
+          vcvtsi2ss %r13d,  {rz-sae}, %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl (%rcx), %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0x39]
+          vcvtsi2ssl (%rcx), %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl 291(%rax,%r14,8), %xmm10, %xmm15
+// CHECK:  encoding: [0xc4,0x21,0x2a,0x2a,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsi2ssl 291(%rax,%r14,8), %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl 508(%rdx), %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xba,0xfc,0x01,0x00,0x00]
+          vcvtsi2ssl 508(%rdx), %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl 512(%rdx), %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xba,0x00,0x02,0x00,0x00]
+          vcvtsi2ssl 512(%rdx), %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl -512(%rdx), %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xba,0x00,0xfe,0xff,0xff]
+          vcvtsi2ssl -512(%rdx), %xmm10, %xmm15
+
+// CHECK: vcvtsi2ssl -516(%rdx), %xmm10, %xmm15
+// CHECK:  encoding: [0xc5,0x2a,0x2a,0xba,0xfc,0xfd,0xff,0xff]
+          vcvtsi2ssl -516(%rdx), %xmm10, %xmm15
+// CHECK: vcvtsi2ssq %rax, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0xc0]
+          vcvtsi2ss %rax, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %rax,  {rn-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x18,0x2a,0xc0]
+          vcvtsi2ss %rax,  {rn-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %rax,  {ru-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x58,0x2a,0xc0]
+          vcvtsi2ss %rax,  {ru-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %rax,  {rd-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x38,0x2a,0xc0]
+          vcvtsi2ss %rax,  {rd-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %rax,  {rz-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x78,0x2a,0xc0]
+          vcvtsi2ss %rax,  {rz-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %r8, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xc1,0xae,0x08,0x2a,0xc0]
+          vcvtsi2ss %r8, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %r8,  {rn-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xc1,0xae,0x18,0x2a,0xc0]
+          vcvtsi2ss %r8,  {rn-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %r8,  {ru-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xc1,0xae,0x58,0x2a,0xc0]
+          vcvtsi2ss %r8,  {ru-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %r8,  {rd-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xc1,0xae,0x38,0x2a,0xc0]
+          vcvtsi2ss %r8,  {rd-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq %r8,  {rz-sae}, %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xc1,0xae,0x78,0x2a,0xc0]
+          vcvtsi2ss %r8,  {rz-sae}, %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq (%rcx), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0x01]
+          vcvtsi2ssq (%rcx), %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq 291(%rax,%r14,8), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xa1,0xae,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsi2ssq 291(%rax,%r14,8), %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq 1016(%rdx), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0x42,0x7f]
+          vcvtsi2ssq 1016(%rdx), %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq 1024(%rdx), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0x82,0x00,0x04,0x00,0x00]
+          vcvtsi2ssq 1024(%rdx), %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq -1024(%rdx), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0x42,0x80]
+          vcvtsi2ssq -1024(%rdx), %xmm10, %xmm16
+
+// CHECK: vcvtsi2ssq -1032(%rdx), %xmm10, %xmm16
+// CHECK:  encoding: [0x62,0xe1,0xae,0x08,0x2a,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsi2ssq -1032(%rdx), %xmm10, %xmm16
+
+// CHECK:  vcvtusi2sdl  %eax, %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0xd8]
+          vcvtusi2sd %eax, %xmm1, %xmm19
+
+// CHECK:  vcvtusi2sdl  %ebp, %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0xdd]
+          vcvtusi2sd %ebp, %xmm1, %xmm19
+
+// CHECK:  vcvtusi2sdl  %r13d, %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xc1,0x77,0x08,0x7b,0xdd]
+          vcvtusi2sd %r13d, %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl (%rcx), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0x19]
+          vcvtusi2sdl (%rcx), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl 291(%rax,%r14,8), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0x77,0x08,0x7b,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtusi2sdl 291(%rax,%r14,8), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl 508(%rdx), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0x5a,0x7f]
+          vcvtusi2sdl 508(%rdx), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl 512(%rdx), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0x9a,0x00,0x02,0x00,0x00]
+          vcvtusi2sdl 512(%rdx), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl -512(%rdx), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0x5a,0x80]
+          vcvtusi2sdl -512(%rdx), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdl -516(%rdx), %xmm1, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x77,0x08,0x7b,0x9a,0xfc,0xfd,0xff,0xff]
+          vcvtusi2sdl -516(%rdx), %xmm1, %xmm19
+
+// CHECK: vcvtusi2sdq %rax, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0xf0]
+          vcvtusi2sd %rax, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %rax, {rn-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x10,0x7b,0xf0]
+          vcvtusi2sd %rax, {rn-sae}, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %rax, {ru-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x50,0x7b,0xf0]
+          vcvtusi2sd %rax, {ru-sae}, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %rax, {rd-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x30,0x7b,0xf0]
+          vcvtusi2sd %rax, {rd-sae}, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %rax, {rz-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x70,0x7b,0xf0]
+          vcvtusi2sd %rax, {rz-sae}, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %r8, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xaf,0x00,0x7b,0xf0]
+          vcvtusi2sd %r8, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq %r8, {rn-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xaf,0x10,0x7b,0xf0]
+          vcvtusi2sd %r8, {rn-sae}, %xmm26, %xmm14
+
+// CHECK:  vcvtusi2sdq  %r8, {ru-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xaf,0x50,0x7b,0xf0]
+          vcvtusi2sd %r8, {ru-sae}, %xmm26, %xmm14
+
+// CHECK:  vcvtusi2sdq  %r8, {rd-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xaf,0x30,0x7b,0xf0]
+          vcvtusi2sd %r8, {rd-sae}, %xmm26, %xmm14
+
+// CHECK:  vcvtusi2sdq  %r8, {rz-sae}, %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xaf,0x70,0x7b,0xf0]
+          vcvtusi2sd %r8, {rz-sae}, %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq (%rcx), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0x31]
+          vcvtusi2sdq (%rcx), %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq 291(%rax,%r14,8), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x31,0xaf,0x00,0x7b,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtusi2sdq 291(%rax,%r14,8), %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq 1016(%rdx), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0x72,0x7f]
+          vcvtusi2sdq 1016(%rdx), %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq 1024(%rdx), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0xb2,0x00,0x04,0x00,0x00]
+          vcvtusi2sdq 1024(%rdx), %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq -1024(%rdx), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0x72,0x80]
+          vcvtusi2sdq -1024(%rdx), %xmm26, %xmm14
+
+// CHECK: vcvtusi2sdq -1032(%rdx), %xmm26, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xaf,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtusi2sdq -1032(%rdx), %xmm26, %xmm14
+
+// CHECK: vcvtusi2ssl %eax, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xe8]
+          vcvtusi2ss %eax, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %eax, {rn-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x10,0x7b,0xe8]
+          vcvtusi2ss %eax, {rn-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %eax, {ru-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x50,0x7b,0xe8]
+          vcvtusi2ss %eax, {ru-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %eax, {rd-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x30,0x7b,0xe8]
+          vcvtusi2ss %eax, {rd-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %eax, {rz-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x70,0x7b,0xe8]
+          vcvtusi2ss %eax, {rz-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %ebp, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xed]
+          vcvtusi2ss %ebp, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %ebp, {rn-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x10,0x7b,0xed]
+          vcvtusi2ss %ebp, {rn-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %ebp, {ru-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x50,0x7b,0xed]
+          vcvtusi2ss %ebp, {ru-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %ebp, {rd-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x30,0x7b,0xed]
+          vcvtusi2ss %ebp, {rd-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %ebp, {rz-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x70,0x7b,0xed]
+          vcvtusi2ss %ebp, {rz-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %r13d, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xd1,0x2e,0x00,0x7b,0xed]
+          vcvtusi2ss %r13d, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %r13d, {rn-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xd1,0x2e,0x10,0x7b,0xed]
+          vcvtusi2ss %r13d, {rn-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %r13d, {ru-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xd1,0x2e,0x50,0x7b,0xed]
+          vcvtusi2ss %r13d, {ru-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %r13d, {rd-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xd1,0x2e,0x30,0x7b,0xed]
+          vcvtusi2ss %r13d, {rd-sae}, %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssl  %r13d, {rz-sae}, %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xd1,0x2e,0x70,0x7b,0xed]
+          vcvtusi2ss %r13d, {rz-sae}, %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl (%rcx), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x29]
+          vcvtusi2ssl (%rcx), %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl 291(%rax,%r14,8), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xb1,0x2e,0x00,0x7b,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtusi2ssl 291(%rax,%r14,8), %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl 508(%rdx), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x6a,0x7f]
+          vcvtusi2ssl 508(%rdx), %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl 512(%rdx), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xaa,0x00,0x02,0x00,0x00]
+          vcvtusi2ssl 512(%rdx), %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl -512(%rdx), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0x6a,0x80]
+          vcvtusi2ssl -512(%rdx), %xmm26, %xmm5
+
+// CHECK: vcvtusi2ssl -516(%rdx), %xmm26, %xmm5
+// CHECK:  encoding: [0x62,0xf1,0x2e,0x00,0x7b,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvtusi2ssl -516(%rdx), %xmm26, %xmm5
+
+// CHECK:  vcvtusi2ssq  %rax, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0xf0]
+          vcvtusi2ss %rax, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %rax, {rn-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x10,0x7b,0xf0]
+          vcvtusi2ss %rax, {rn-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %rax, {ru-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x50,0x7b,0xf0]
+          vcvtusi2ss %rax, {ru-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %rax, {rd-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x30,0x7b,0xf0]
+          vcvtusi2ss %rax, {rd-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %rax, {rz-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x70,0x7b,0xf0]
+          vcvtusi2ss %rax, {rz-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %r8, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xce,0x00,0x7b,0xf0]
+          vcvtusi2ss %r8, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %r8, {rn-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xce,0x10,0x7b,0xf0]
+          vcvtusi2ss %r8, {rn-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %r8, {ru-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xce,0x50,0x7b,0xf0]
+          vcvtusi2ss %r8, {ru-sae}, %xmm22, %xmm14
+
+// CHECK:  vcvtusi2ssq  %r8, {rd-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xce,0x30,0x7b,0xf0]
+          vcvtusi2ss %r8, {rd-sae}, %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq %r8, {rz-sae}, %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x51,0xce,0x70,0x7b,0xf0]
+          vcvtusi2ss %r8, {rz-sae}, %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq (%rcx), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0x31]
+          vcvtusi2ssq (%rcx), %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq 291(%rax,%r14,8), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x31,0xce,0x00,0x7b,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtusi2ssq 291(%rax,%r14,8), %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq 1016(%rdx), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0x72,0x7f]
+          vcvtusi2ssq 1016(%rdx), %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq 1024(%rdx), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0x00,0x04,0x00,0x00]
+          vcvtusi2ssq 1024(%rdx), %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq -1024(%rdx), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0x72,0x80]
+          vcvtusi2ssq -1024(%rdx), %xmm22, %xmm14
+
+// CHECK: vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14
+// CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14