// SSE 1 & 2 - Move FP Scalar Instructions
//
// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
-// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
-// is used instead. Register-to-register movss/movsd is not modeled as an
-// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
-// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+// register copies because it's a partial register update; Register-to-register
+// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
+// that the insert be implementable in terms of a copy, and just mentioned, we
+// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
(MOVUPSmr addr:$dst, VR128:$src)>;
}
-// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
-// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
-def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-}
-
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
let Predicates = [UseAVX], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
}
let Predicates = [UseSSE2], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
let AddedComplexity = 20 in {
let Predicates = [UseAVX] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>;
}
IIC_SSE_MOVDQ>, VEX, VEX_W;
} // SchedRW
-// Instructions for the disassembler
-// xr = XMM register
-// xm = mem64
-
-let SchedRW = [WriteMove] in {
-let Predicates = [UseAVX] in
-def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
-def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
-} // SchedRW
-
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
}
let ExeDomain = SSEPackedSingle in {
- let Predicates = [HasAVX] in
+ let Predicates = [UseAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(v4f64 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
(v32i8 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}