From 6fa583d78701390079db0cc4d944823af06023c6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 21 Nov 2011 08:26:50 +0000 Subject: [PATCH] Lowering for v32i8 to VPUNPCKLBW/VPUNPCKHBW when AVX2 is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145028 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++- lib/Target/X86/X86ISelLowering.h | 2 + lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + lib/Target/X86/X86InstrSSE.td | 94 ++++--------------------- test/CodeGen/X86/avx2-unpack.ll | 14 ++++ 5 files changed, 44 insertions(+), 84 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4f7bf15a79c..a5bfe1ac8fe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2852,6 +2852,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2863,6 +2864,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: case X86ISD::VPERMILPS: @@ -2939,6 +2941,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::VPUNPCKLWDY: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: case X86ISD::UNPCKHPS: @@ -2950,6 +2953,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: case X86ISD::VPUNPCKHWDY: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: return DAG.getNode(Opc, dl, VT, V1, V2); @@ -3569,7 +3573,7 @@ static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3619,7 +3623,7 @@ static bool isUNPCKHMask(const SmallVectorImpl &Mask, EVT VT, "Unsupported vector type for unpckh"); if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && - (!HasAVX2 || NumElts != 16)) + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -4639,6 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -4654,6 +4659,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: @@ -6595,6 +6601,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; case MVT::v16i16: return X86ISD::VPUNPCKLWDY; + case MVT::v32i8: return X86ISD::VPUNPCKLBWY; default: llvm_unreachable("Unknown type for unpckl"); } @@ -6618,6 +6625,7 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; case MVT::v16i16: return X86ISD::VPUNPCKHWDY; + case MVT::v32i8: return X86ISD::VPUNPCKHBWY; default: llvm_unreachable("Unknown type for unpckh"); } @@ -11270,6 +11278,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; + case X86ISD::VPUNPCKLBWY: return "X86ISD::VPUNPCKLBWY"; case X86ISD::VPUNPCKLWDY: return "X86ISD::VPUNPCKLWDY"; case X86ISD::VPUNPCKLDQY: return "X86ISD::VPUNPCKLDQY"; case X86ISD::VPUNPCKLQDQY: return "X86ISD::VPUNPCKLQDQY"; @@ -11277,6 +11286,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::VPUNPCKHBWY: return "X86ISD::VPUNPCKHBWY"; case X86ISD::VPUNPCKHWDY: return "X86ISD::VPUNPCKHWDY"; case X86ISD::VPUNPCKHDQY: return "X86ISD::VPUNPCKHDQY"; case X86ISD::VPUNPCKHQDQY: return "X86ISD::VPUNPCKHQDQY"; @@ -14867,6 +14877,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: case X86ISD::PUNPCKHQDQ: + case X86ISD::VPUNPCKHBWY: case X86ISD::VPUNPCKHWDY: case X86ISD::VPUNPCKHDQY: case X86ISD::VPUNPCKHQDQY: @@ -14878,6 +14889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: + case X86ISD::VPUNPCKLBWY: case X86ISD::VPUNPCKLWDY: case X86ISD::VPUNPCKLDQY: case X86ISD::VPUNPCKLQDQY: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7bb4da65813..36cb1526fd4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -285,6 +285,7 @@ namespace llvm { PUNPCKLWD, PUNPCKLDQ, PUNPCKLQDQ, + VPUNPCKLBWY, VPUNPCKLWDY, VPUNPCKLDQY, VPUNPCKLQDQY, @@ -292,6 +293,7 @@ namespace llvm { PUNPCKHWD, PUNPCKHDQ, PUNPCKHQDQ, + VPUNPCKHBWY, VPUNPCKHWDY, VPUNPCKHDQY, VPUNPCKHQDQY, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c4d311f8d42..7e8bc044632 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -144,6 +144,7 @@ def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; +def X86Punpcklbwy : SDNode<"X86ISD::VPUNPCKLBWY", SDTShuff2Op>; def X86Punpcklwdy : SDNode<"X86ISD::VPUNPCKLWDY", SDTShuff2Op>; def X86Punpckldqy : SDNode<"X86ISD::VPUNPCKLDQY", SDTShuff2Op>; def X86Punpcklqdqy : SDNode<"X86ISD::VPUNPCKLQDQY", SDTShuff2Op>; @@ -152,6 +153,7 @@ def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckhbwy : SDNode<"X86ISD::VPUNPCKHBWY", SDTShuff2Op>; def X86Punpckhwdy : SDNode<"X86ISD::VPUNPCKHWDY", SDTShuff2Op>; def X86Punpckhdqy : SDNode<"X86ISD::VPUNPCKHDQY", SDTShuff2Op>; def X86Punpckhqdqy : SDNode<"X86ISD::VPUNPCKHQDQY", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index cfb8c850c80..94bd8251ef1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4204,19 +4204,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64, 0>, VEX_4V; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, bc_v16i8, 0>, VEX_4V; @@ -4224,99 +4213,40 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbwy, bc_v32i8>, VEX_4V; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwdy, bc_v16i16>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldqy, bc_v8i32>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdqy, + bc_v4i64>, VEX_4V; - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; - - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbwy, bc_v32i8>, VEX_4V; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwdy, bc_v16i16>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdqy, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdqy VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdqy, + bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, bc_v2i64>; defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, bc_v2i64>; } } // ExeDomain = SSEPackedInt diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll index 51c0f16789c..aa973089ef2 100644 --- a/test/CodeGen/X86/avx2-unpack.ll +++ b/test/CodeGen/X86/avx2-unpack.ll @@ -41,3 +41,17 @@ entry: %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> ret <16 x i16> %shuffle.i } + +; CHECK: vpunpckhbw +define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> + ret <32 x i8> %shuffle.i +} + +; CHECK: vpunpcklbw +define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp { +entry: + %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> + ret <32 x i8> %shuffle.i +} -- 2.34.1