//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MMX specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+// GPR to low word of MMX.
+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
+
//===----------------------------------------------------------------------===//
// MMX Pattern Fragments
//===----------------------------------------------------------------------===//
def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def load_mvmmx : PatFrag<(ops node:$ptr),
+ (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
//===----------------------------------------------------------------------===//
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
+
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
SDTCisVec<0>, SDTCisInt<2>]>;
def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
SDTCisVec<0>, SDTCisInt<3>]>;
+def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
+ SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
+def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>;
+
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
+def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
+def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound>;
+def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>;
+def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>;
+def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>;
+
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
+def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
+def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 3>,
+ SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
+
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
//===----------------------------------------------------------------------===//
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
+// These are needed to match a scalar load that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def loadf32_128 : PatFrag<(ops node:$ptr),
+ (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+def loadf64_128 : PatFrag<(ops node:$ptr),
+ (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
// setting a feature bit in the processor (on startup, for example).
// Opteron 10h and later implement such a feature.
def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
+ return Subtarget->hasSSEUnalignedMem()
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 4;
-}]>;
-
-def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-// 256-bit memop pattern fragments
-// NOTE: all 256-bit integer vector loads are promoted to v4i64
-def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
-def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
-def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+// These are needed to match a scalar memop that is used in a vector-only
+// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
+// The memory operand is required to be a 128-bit load, so it must be converted
+// from a vector to a scalar.
+def memopfsf32_128 : PatFrag<(ops node:$ptr),
+ (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+def memopfsf64_128 : PatFrag<(ops node:$ptr),
+ (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
-// 512-bit memop pattern fragments
-def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
-def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
-def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
-def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
return X86::isVINSERT256Index(N);
}], INSERT_get_vinsert256_imm>;
+def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ return isa<MaskedLoadSDNode>(N);
+}]>;
+
+def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return isa<MaskedStoreSDNode>(N);
+}]>;
+