From c0fc62c2f9b10f05bc055f78480d7e393393682f Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 6 Feb 2014 09:54:51 +0000 Subject: [PATCH] X86: deduplicate V[SZ]EXT_MOVL and V[SZ]EXT nodes I believe VZEXT_MOVL means "zero all vector elements except the first" (and should have identical input & output types) whereas VZEXT means "zero extend each element of a vector (discarding higher elements if necessary)". For example: (v4i32 (vzext (v16i8 ...))) should zero extend the low 4 bytes of the incoming vector to 32-bits, discarding higher bytes. However, somewhere in the past, these two concepts had become confused, even leading to a nonsensical VSEXT_MOVL. This re-merges the nodes where appropriate (all VSEXT_MOVL -> VSEXT, VZEXT_MOVL -> VZEXT when it's an actual extension). rdar://problem/15981990 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200918 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 ++++--- lib/Target/X86/X86ISelLowering.h | 5 +--- lib/Target/X86/X86InstrFragmentsSIMD.td | 10 -------- lib/Target/X86/X86InstrSSE.td | 32 ++----------------------- 4 files changed, 7 insertions(+), 49 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5dca225593f..a945284fa6e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9032,7 +9032,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, return SDValue(); if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In); + return DAG.getNode(X86ISD::VZEXT, dl, VT, In); SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); SDValue Undef = DAG.getUNDEF(InVT); @@ -10617,7 +10617,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); + return DAG.getNode(X86ISD::VSEXT, dl, VT, In); // Optimize vectors in AVX mode // Sign extend v8i16 to v8i32 and @@ -10646,8 +10646,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), VT.getVectorNumElements()/2); - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } @@ -14017,7 +14017,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; - case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; case X86ISD::VSEXT: return "X86ISD::VSEXT"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a8939a25955..5d00e26e7c2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -245,12 +245,9 @@ namespace llvm { /// the list of operands. TC_RETURN, - // VZEXT_MOVL - Vector move low and zero extend. + // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements. VZEXT_MOVL, - // VSEXT_MOVL - Vector move low and sign extend. - VSEXT_MOVL, - // VZEXT - Vector integer zero-extend. VZEXT, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 551b446c81f..c029bef6639 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -87,16 +87,6 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; -def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisInt<0>, SDTCisInt<1>, - SDTCisOpSmallerThanOp<1, 0> ]>>; - -def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", - SDTypeProfile<1, 1, - [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisOpSmallerThanOp<1, 0>]>>; - def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index af1352352c5..34b3b855bf1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5824,34 +5824,6 @@ let Predicates = [UseSSE41] in { (PMOVZXDQrm addr:$src)>; } -let Predicates = [HasAVX2] in { - let AddedComplexity = 15 in { - def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))), - (VPMOVZXDQYrr VR128:$src)>; - def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))), - (VPMOVZXWDYrr VR128:$src)>; - def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))), - (VPMOVZXBWYrr VR128:$src)>; - } - - def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; - def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; - def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; -} - -let Predicates = [HasAVX] in { - def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; - def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; - def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; -} - -let Predicates = [UseSSE41] in { - def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; - def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; - def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; -} - - multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId, OpndItins itins = DEFAULT_ITINS> { def rr : SS48I; - def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), + def : Pat<(v8i32 (X86vsext (v8i16 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXWDYrm addr:$src)>; - def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), + def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))), (VPMOVSXDQYrm addr:$src)>; def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 -- 2.34.1