From: Chandler Carruth Date: Wed, 9 Jul 2014 12:36:54 +0000 (+0000) Subject: [x86] Fix a bug in my new zext-vector-inreg DAG trickery where we were X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=4c27c85cde1657246ad9291af15899f249026bc7;p=oota-llvm.git [x86] Fix a bug in my new zext-vector-inreg DAG trickery where we were not widening the input type to the node sufficiently to let the ext take place in a register. This would in turn result in a mysterious bitcast assertion failure downstream. First change here is to add back the helpful assert I had in an earlier version of the code to catch this immediately. Next change is to add support to the type legalization to detect when we have widened the operand either too little or too much (for whatever reason) and find a size-matched legal vector type to convert it to first. This can also fail so we get a new fallback path, but that seems OK. With this, we no longer crash on vec_cast2.ll when using widening. I've also added the CHECK lines for the zero-extend cases here. We still need to support sign-extend and trunc (or something) to get plausible code for the other two thirds of this test which is one of the regression tests that showed the most scalarization when widening was force-enabled. Slowly closing in on widening being a viable legalization strategy without it resorting to scalarization at every turn. =] git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212614 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 4af854ec0c9..c50625e2080 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2425,6 +2425,39 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { InOp.getValueType().getVectorNumElements() && "Input wasn't widened!"); + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + // Use a special DAG node to represent the operation of zero extending the // low lanes. return DAG.getZeroExtendVectorInReg(InOp, DL, VT); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9b65d90383b..119b0255e7c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1034,6 +1034,9 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && "The destination vector type must have fewer lanes than the input."); return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll index 5f6e7a853a3..c7f1554cdd4 100644 --- a/test/CodeGen/X86/vec_cast2.ll +++ b/test/CodeGen/X86/vec_cast2.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE ;CHECK-LABEL: foo1_8: ;CHECK: vcvtdq2ps @@ -19,6 +20,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) { ;CHECK-LABEL: foo2_8: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_8: +;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}} +;CHECK-WIDE: ret define <8 x float> @foo2_8(<8 x i8> %src) { %res = uitofp <8 x i8> %src to <8 x float> ret <8 x float> %res @@ -27,6 +32,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) { ;CHECK-LABEL: foo2_4: ;CHECK: vcvtdq2ps ;CHECK: ret +; +;CHECK-WIDE-LABEL: foo2_4: +;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}} +;CHECK-WIDE: ret define <4 x float> @foo2_4(<4 x i8> %src) { %res = uitofp <4 x i8> %src to <4 x float> ret <4 x float> %res