From 6539dc6e6cb247de6960b2b1b3b8b01badb90728 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 24 Jun 2010 00:32:06 +0000 Subject: [PATCH] Add AVX CMP{SS,SD}{rr,rm} instructions and encoding testcases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106705 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 12 +++ lib/Target/X86/X86InstrSSE.td | 79 +++++++++------ test/MC/AsmParser/X86/x86_32-encoding.s | 128 ++++++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 128 ++++++++++++++++++++++++ 4 files changed, 317 insertions(+), 30 deletions(-) diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 5e8c8258c2c..c492853adb0 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -228,6 +228,18 @@ class SI o, Format F, dag outs, dag ins, string asm, list pattern> let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); } +// SIi8 - SSE 1 & 2 scalar instructions +class SIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8 { + let Predicates = !if(hasVEXPrefix /* VEX_4V */, + !if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]), + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + // PI - SSE 1 & 2 packed instructions class PI o, Format F, dag outs, dag ins, string asm, list pattern, Domain d> diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3ea7ca99434..1afda4a2180 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -725,42 +725,61 @@ multiclass sse12_cmp_alt; } -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; +multiclass sse12_cmp_scalar { + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$cc), + asm, []>; let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, sse_imm_op:$cc), + asm, []>; +} - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; +// FIXME: rename instructions to only use the class above +multiclass sse12_cmp_scalar_alt { + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, sse_imm_op:$src2), + asm, []>; let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, sse_imm_op:$src2), + asm, []>; +} -// Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; - let mayLoad = 1 in - def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let neverHasSideEffects = 1, isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar, + XD, VEX_4V; - def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; - let mayLoad = 1 in - def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar_alt, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar_alt, + XD, VEX_4V; + } } + +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + defm CMPSS : sse12_cmp_scalar, XS; + defm CMPSD : sse12_cmp_scalar, XD; + + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + defm CMPSS : sse12_cmp_scalar_alt, XS; + defm CMPSD : sse12_cmp_scalar_alt, XD; + } } let Defs = [EFLAGS] in { diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index 286911cc845..f9127f4bd06 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -10526,3 +10526,131 @@ // CHECK: encoding: [0xc5,0xf9,0x50,0xc2] vmovmskpd %xmm2, %eax +// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00] + vcmpeqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02] + vcmpless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01] + vcmpltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04] + vcmpneqss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06] + vcmpnless %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05] + vcmpnltss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07] + vcmpordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03] + vcmpunordss %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02] + vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00] + vcmpeqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02] + vcmplesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01] + vcmpltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04] + vcmpneqsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06] + vcmpnlesd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05] + vcmpnltsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07] + vcmpordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03] + vcmpunordsd %xmm1, %xmm2, %xmm3 + +// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00] + vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02] + vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01] + vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04] + vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06] + vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05] + vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + +// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3 +// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03] + vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index 91d95c05c5b..2eca58df4d1 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -582,3 +582,131 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03] vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00] + vcmpeqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02] + vcmpless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01] + vcmpltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04] + vcmpneqss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06] + vcmpnless %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05] + vcmpnltss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07] + vcmpordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03] + vcmpunordss %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02] + vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00] + vcmpeqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02] + vcmplesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01] + vcmpltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04] + vcmpneqsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06] + vcmpnlesd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05] + vcmpnltsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07] + vcmpordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03] + vcmpunordsd %xmm11, %xmm12, %xmm13 + +// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00] + vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02] + vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01] + vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04] + vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06] + vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05] + vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + +// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2 +// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07] + vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2 + +// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13 +// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03] + vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13 + -- 2.34.1