if (NumElems != 4) return false;
// Each half must refer to only one of the vector.
- SDOperand Elt = N->getOperand(0);
- assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = 1; i < NumElems / 2; ++i) {
+ for (unsigned i = 0; i < 2; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
- if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
- cast<ConstantSDNode>(Elt)->getValue())
- return false;
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val >= 4) return false;
}
- Elt = N->getOperand(NumElems / 2);
- assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
- for (unsigned i = NumElems / 2 + 1; i < NumElems; ++i) {
+ for (unsigned i = 2; i < 4; ++i) {
assert(isa<ConstantSDNode>(N->getOperand(i)) &&
"Invalid VECTOR_SHUFFLE mask!");
- if (cast<ConstantSDNode>(N->getOperand(i))->getValue() !=
- cast<ConstantSDNode>(Elt)->getValue())
- return false;
+ unsigned Val = cast<ConstantSDNode>(N->getOperand(i))->getValue();
+ if (Val < 4) return false;
}
return true;
unsigned NumElems = PermMask.getNumOperands();
// Splat && PSHUFD's 2nd vector must be undef.
- if (X86::isSplatMask(PermMask.Val) ||
- ((MVT::isInteger(VT) &&
- (X86::isPSHUFDMask(PermMask.Val) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val))))) {
+ if (X86::isSplatMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
// Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
return SDOperand();
- if (NumElems == 2 ||
- X86::isSHUFPMask(PermMask.Val)) {
+ if (NumElems == 2)
return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+
+ // If VT is integer, try PSHUF* first, then SHUFP*.
+ if (MVT::isInteger(VT)) {
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return SDOperand();
+ }
+
+ if (X86::isSHUFPMask(PermMask.Val))
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+ } else {
+ // Floating point cases in the other order.
+ if (X86::isSHUFPMask(PermMask.Val))
+ return NormalizeVectorShuffle(V1, V2, PermMask, VT, DAG);
+ if (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ return SDOperand();
+ }
}
assert(0 && "Unexpected VECTOR_SHUFFLE to lower");
return X86::isPSHUFLWMask(N);
}], SHUFFLE_get_pshuflw_imm>;
+// Only use PSHUF* for v4f32 if SHUFP does not match.
+def PSHUFD_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFDMask(N);
+}], SHUFFLE_get_shuf_imm>;
+
+def PSHUFHW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFHWMask(N);
+}], SHUFFLE_get_pshufhw_imm>;
+
+def PSHUFLW_fp_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isSHUFPMask(N) &&
+ X86::isPSHUFLWMask(N);
+}], SHUFFLE_get_pshuflw_imm>;
+
def SHUFP_shuffle_mask : PatLeaf<(build_vector), [{
return X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
-// Only use SHUFP for v4i32 if no other options are available.
-// FIXME: add tblgen hook to reduce the complexity of pattern.
-def SHUFP_v4i32_shuffle_mask : PatLeaf<(build_vector), [{
- return !X86::isUNPCKHMask(N) && !X86::isPSHUFDMask(N) && X86::isSHUFPMask(N);
+// Only use SHUFP for v4i32 if PSHUF* do not match.
+def SHUFP_int_shuffle_mask : PatLeaf<(build_vector), [{
+ return !X86::isPSHUFDMask(N) &&
+ !X86::isPSHUFHWMask(N) &&
+ !X86::isPSHUFLWMask(N) &&
+ X86::isSHUFPMask(N);
}], SHUFFLE_get_shuf_imm>;
//===----------------------------------------------------------------------===//
// SSE2 with ImmT == Imm8 and XD prefix.
def PSHUFLWrr : Ii8<0x70, MRMDestReg,
(ops VR128:$dst, VR128:$src1, i32i8imm:$src2),
- "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
VR128:$src1, (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
XD, Requires<[HasSSE2]>;
def PSHUFLWrm : Ii8<0x70, MRMDestMem,
(ops VR128:$dst, i128mem:$src1, i32i8imm:$src2),
- "pshufLw {$src2, $src1, $dst|$dst, $src1, $src2}",
+ "pshuflw {$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v8i16 (vector_shuffle
(bc_v8i16 (loadv2i64 addr:$src1)), (undef),
PSHUFLW_shuffle_mask:$src2)))]>,
(v4f32 (SHUFPSrr VR128:$src, VR128:$src, SSE_splat_mask:$sm))>,
Requires<[HasSSE1]>;
-// Shuffle v4i32 if others do not match
+// Shuffle v4i32 with SHUFP* if others do not match.
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (v4i32 VR128:$src2),
- SHUFP_shuffle_mask:$sm),
+ SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrr VR128:$src1, VR128:$src2,
- SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+ SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
def : Pat<(vector_shuffle (v4i32 VR128:$src1), (load addr:$src2),
- SHUFP_shuffle_mask:$sm),
+ SHUFP_int_shuffle_mask:$sm),
(v4i32 (SHUFPSrm VR128:$src1, addr:$src2,
- SHUFP_v4i32_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+ SHUFP_int_shuffle_mask:$sm))>, Requires<[HasSSE2]>;
+
+// Shuffle v4f32 with PSHUF* if others do not match.
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFD_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFDrr VR128:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFD_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFDrm addr:$src1, PSHUFD_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFHW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFHWrr VR128:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFHW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFHWrm addr:$src1, PSHUFHW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (v4f32 VR128:$src1), (undef),
+ PSHUFLW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFLWrr VR128:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
+def : Pat<(vector_shuffle (loadv4f32 addr:$src1), (undef),
+ PSHUFLW_fp_shuffle_mask:$sm),
+ (v4f32 (PSHUFLWrm addr:$src1, PSHUFLW_fp_shuffle_mask:$sm))>,
+ Requires<[HasSSE2]>;
// Logical ops
def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),