IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
>;
+def SSE_DPPD_ITINS : OpndItins<
+ IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM
+>;
+
+def SSE_DPPS_ITINS : OpndItins<
+ IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM
+>;
+
+def DEFAULT_ITINS : OpndItins<
+ IIC_ALU_NONMEM, IIC_ALU_MEM
+>;
+
+def SSE_EXTRACT_ITINS : OpndItins<
+ IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM
+>;
+
+def SSE_INSERT_ITINS : OpndItins<
+ IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM
+>;
+
+def SSE_MPSADBW_ITINS : OpndItins<
+ IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
+>;
+
+def SSE_PMULLD_ITINS : OpndItins<
+ IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
+>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSE_ALU_F32S>, // same latency as 32 bit compare
+ SSE_ALU_F64S>,
XD;
}
SSE_ALU_F32S>, XS;
defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S>, // same latency as f32
+ SSE_ALU_F64S>,
XD;
}
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, Intrinsic Int, string asm,
- string asm_alt, Domain d> {
+ string asm_alt, Domain d,
+ OpndItins itins = SSE_ALU_F32P> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>,
+ itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>,
+ itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
+ asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ asm_alt, [], itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedSingle>, TB;
+ SSEPackedSingle, SSE_ALU_F32P>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedDouble>, TB, OpSize;
+ SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize;
}
let Predicates = [HasAVX] in {
defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+ int_x86_avx2_psad_bw, SSE_PMADD, 1>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
def PSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Constraints = "$src1 = $dst"
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
Sched<[WriteShuffleLd]>;
}
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
Sched<[WriteShuffleLd]>;
}
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
Sched<[WriteShuffleLd]>;
}
}
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
+ [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
- OpSize;
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))],
+ itins.rm>, OpSize;
}
multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+ [(set VR256:$dst, (IntId (load addr:$src)))]>,
+ OpSize;
}
let Predicates = [HasAVX] in {
-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
- VEX;
-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
- VEX;
-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
- VEX;
-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
- VEX;
-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
- VEX;
-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
- VEX;
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
+ int_x86_sse41_pmovsxbw>, VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
+ int_x86_sse41_pmovsxwd>, VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
+ int_x86_sse41_pmovsxdq>, VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
+ int_x86_sse41_pmovzxbw>, VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
+ int_x86_sse41_pmovzxwd>, VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
+ int_x86_sse41_pmovzxdq>, VEX;
}
let Predicates = [HasAVX2] in {
int_x86_avx2_pmovzxdq>, VEX, VEX_L;
}
-defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
-defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
-defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
-defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
-defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
-defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load.
}
-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))],
+ itins.rm>,
OpSize;
}
int_x86_avx2_pmovzxwq>, VEX, VEX_L;
}
-defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
-defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
-defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
-defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
(PMOVZXWQrm addr:$src)>;
}
-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
int_x86_avx2_pmovzxbq>, VEX, VEX_L;
}
-defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX2] in {
def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
-multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))],
+ itins.rr>,
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>, OpSize;
+ addr:$dst)], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, OpSize, VEX;
}
- defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
}
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
-multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
!if(Is2Addr,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
[(set VR128:$dst,
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
+ imm:$src3))], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
let Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
//===----------------------------------------------------------------------===//
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_MEM>,
OpSize;
} // ExeDomain = SSEPackedSingle
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
} // ExeDomain = SSEPackedDouble
}
let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
OpSize, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
- (implicit EFLAGS)]>, OpSize, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
}
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
+ itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))],
+ itins.rm>, OpSize;
}
/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
- memopv2i64, i128mem>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq,
+ 1, SSE_INTMUL_ITINS_P>;
}
let Predicates = [HasAVX] in {
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>;
defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>;
}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>,
OpSize;
}
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTMUL_ITINS_P>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem, 1,
+ SSE_DPPS_ITINS>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem, 1,
+ SSE_DPPD_ITINS>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- X86MemOperand x86memop, Intrinsic IntId> {
+ X86MemOperand x86memop, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
+ itins.rr>, OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))],
+ itins.rm>, OpSize;
}
}
"crc32{b}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_32_8 GR32:$src1,
- (load addr:$src2)))]>;
+ (load addr:$src2)))], IIC_CRC32_MEM>;
def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR8:$src2),
"crc32{b}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
+ (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))],
+ IIC_CRC32_REG>;
def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i16mem:$src2),
"crc32{w}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_32_16 GR32:$src1,
- (load addr:$src2)))]>,
+ (load addr:$src2)))], IIC_CRC32_MEM>,
OpSize;
def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR16:$src2),
"crc32{w}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
+ (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))],
+ IIC_CRC32_REG>,
OpSize;
def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"crc32{l}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
(int_x86_sse42_crc32_32_32 GR32:$src1,
- (load addr:$src2)))]>;
+ (load addr:$src2)))], IIC_CRC32_MEM>;
def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"crc32{l}\t{$src2, $src1|$src1, $src2}",
[(set GR32:$dst,
- (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
+ (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))],
+ IIC_CRC32_REG>;
def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i8mem:$src2),
"crc32{b}\t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
(int_x86_sse42_crc32_64_8 GR64:$src1,
- (load addr:$src2)))]>,
+ (load addr:$src2)))], IIC_CRC32_MEM>,
REX_W;
def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR8:$src2),
"crc32{b}\t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
+ (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))],
+ IIC_CRC32_REG>,
REX_W;
def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"crc32{q}\t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
(int_x86_sse42_crc32_64_64 GR64:$src1,
- (load addr:$src2)))]>,
+ (load addr:$src2)))], IIC_CRC32_MEM>,
REX_W;
def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"crc32{q}\t{$src2, $src1|$src1, $src2}",
[(set GR64:$dst,
- (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
+ (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))],
+ IIC_CRC32_REG>,
REX_W;
}
+//===----------------------------------------------------------------------===//
+// SHA-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
+ bit UsesXMM0 = 0> {
+ def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
+
+ def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8;
+}
+
+let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
+ def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
+ (i8 imm:$src3)))]>, TA;
+ def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (i8 imm:$src3)))]>, TA;
+
+ defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>;
+ defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>;
+ defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>;
+
+ let Uses=[XMM0] in
+ defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>;
+
+ defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>;
+ defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>;
+}
+
+// Aliases with explicit %xmm0
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
+
//===----------------------------------------------------------------------===//
// AES-NI Instructions
//===----------------------------------------------------------------------===//
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RR>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
- (memopv2i64 addr:$src2), imm:$src3))]>;
+ (memopv2i64 addr:$src2), imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RM>;
} // Constraints = "$src1 = $dst"
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(v4f64 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
TA, OpSize, VEX;
}
-let Predicates = [HasAVX, HasF16C] in {
+let Predicates = [HasF16C] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(v32i8 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}