(VMOVHPSrm VR128:$src1, addr:$src2)>;
// VMOVHPD patterns
-
+
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
// is during lowering, where it's not possible to recognize the load fold
// cause it has two uses through a bitcast. One use disappears at isel time
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
-let Predicates = [HasAVX] in {
- // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
- // problem is during lowering, where it's not possible to recognize the load
- // fold cause it has two uses through a bitcast. One use disappears at isel
- // time and the fold opportunity reappears.
- def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (VUNPCKLPDrr VR128:$src, VR128:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
- // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
- // problem is during lowering, where it's not possible to recognize the load
- // fold cause it has two uses through a bitcast. One use disappears at isel
- // time and the fold opportunity reappears.
- def : Pat<(v2f64 (X86Movddup VR128:$src)),
- (UNPCKLPDrr VR128:$src, VR128:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Extract Floating-Point Sign mask
//===----------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
multiclass sse3_replicate_dfp<string OpcodeStr> {
-let hasSideEffects = 0 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
+ [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))],
+ IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
// AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
// Register-Register patterns
def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
(!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ // Simple Register-Memory patterns
+ def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+
+ def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
// AVX2 Register-Memory patterns
def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
}
let Predicates = [HasAVX2] in {
- defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
- defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
}
// SSE4.1/AVX patterns.
-multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
- PatFrag ExtLoad16> {
+multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
+ SDNode ExtOp, PatFrag ExtLoad16> {
def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
(!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
+ def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+
+ def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
}
let Predicates = [HasAVX] in {
- defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
}
let Predicates = [UseSSE41] in {
- defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
}
//===----------------------------------------------------------------------===//
(loadv2f64 (add addr:$src, (iPTR 16))),
(iPTR 2)),
(VMOVUPDYrm addr:$src)>;
-
+
def : Pat<(insert_subvector
(v32i8 (insert_subvector
undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))),
(bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))),
(iPTR 16)),
(VMOVDQUYrm addr:$src)>;
-
+
def : Pat<(insert_subvector
(v16i16 (insert_subvector
undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))),
(bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))),
(iPTR 8)),
(VMOVDQUYrm addr:$src)>;
-
+
def : Pat<(insert_subvector
(v8i32 (insert_subvector
undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))),