From 6e56e2c602084bd7ab4af54e3c1a8700a9320f97 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 7 Nov 2006 22:14:24 +0000 Subject: [PATCH] Fixed a bug which causes x86 be to incorrectly match shuffle v, undef, <2, ?, 3, ?> to movhlps It should match to unpckhps instead. Added proper matching code for shuffle v, undef, <2, 3, 2, 3> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@31519 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 +++++++++++++++++- lib/Target/X86/X86ISelLowering.h | 5 +++++ lib/Target/X86/X86InstrSSE.td | 12 ++++++++---- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ac7327f9e95..55022dab247 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2582,6 +2582,22 @@ bool X86::isMOVHLPSMask(SDNode *N) { isUndefOrEqual(N->getOperand(3), 3); } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + if (N->getNumOperands() != 4) + return false; + + // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3 + return isUndefOrEqual(N->getOperand(0), 2) && + isUndefOrEqual(N->getOperand(1), 3) && + isUndefOrEqual(N->getOperand(2), 2) && + isUndefOrEqual(N->getOperand(3), 3); +} + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool X86::isMOVLPMask(SDNode *N) { @@ -3724,7 +3740,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), - Vec, Vec, Mask); + Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec, DAG.getConstant(0, getPointerTy())); } else if (MVT::getSizeInBits(VT) == 64) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dde123c02f0..68088ad2654 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -186,6 +186,11 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool isMOVHLPSMask(SDNode *N); + /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form + /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, + /// <2, 3, 2, 3> + bool isMOVHLPS_v_undef_Mask(SDNode *N); + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool isMOVLPMask(SDNode *N); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8154d1730ad..b49beaf5b2f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -110,6 +110,10 @@ def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isMOVHLPSMask(N); }]>; +def MOVHLPS_v_undef_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isMOVHLPS_v_undef_Mask(N); +}]>; + def MOVHP_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isMOVHPMask(N); }]>; @@ -1987,16 +1991,16 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2, MOVHLPS_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; -// vector_shuffle v1, undef <2, 3, ?, ?> using MOVHLPS +// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS def : Pat<(v4f32 (vector_shuffle VR128:$src1, (undef), - UNPCKH_shuffle_mask)), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; def : Pat<(v4i32 (vector_shuffle VR128:$src1, (undef), - UNPCKH_shuffle_mask)), + MOVHLPS_v_undef_shuffle_mask)), (MOVHLPSrr VR128:$src1, VR128:$src1)>; } - let AddedComplexity = 20 in { +let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS // vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS def : Pat<(v4f32 (vector_shuffle VR128:$src1, (loadv4f32 addr:$src2), -- 2.34.1