From: Chandler Carruth <chandlerc@gmail.com>
Date: Wed, 9 Jul 2014 12:36:54 +0000 (+0000)
Subject: [x86] Fix a bug in my new zext-vector-inreg DAG trickery where we were
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=4c27c85cde1657246ad9291af15899f249026bc7;p=oota-llvm.git

[x86] Fix a bug in my new zext-vector-inreg DAG trickery where we were
not widening the input type to the node sufficiently to let the ext take
place in a register.

This would in turn result in a mysterious bitcast assertion failure
downstream. First change here is to add back the helpful assert I had in
an earlier version of the code to catch this immediately.

Next change is to add support to the type legalization to detect when we
have widened the operand either too little or too much (for whatever
reason) and find a size-matched legal vector type to convert it to
first. This can also fail so we get a new fallback path, but that seems
OK.

With this, we no longer crash on vec_cast2.ll when using widening. I've
also added the CHECK lines for the zero-extend cases here. We still need
to support sign-extend and trunc (or something) to get plausible code
for the other two thirds of this test which is one of the regression
tests that showed the most scalarization when widening was
force-enabled. Slowly closing in on widening being a viable legalization
strategy without it resorting to scalarization at every turn. =]

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212614 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4af854ec0c9..c50625e2080 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2425,6 +2425,39 @@ SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) {
              InOp.getValueType().getVectorNumElements() &&
          "Input wasn't widened!");
 
+  // We may need to further widen the operand until it has the same total
+  // vector size as the result.
+  EVT InVT = InOp.getValueType();
+  if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+    EVT InEltVT = InVT.getVectorElementType();
+    for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+      EVT FixedVT = (MVT::SimpleValueType)i;
+      EVT FixedEltVT = FixedVT.getVectorElementType();
+      if (TLI.isTypeLegal(FixedVT) &&
+          FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+          FixedEltVT == InEltVT) {
+        assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+               "Not enough elements in the fixed type for the operand!");
+        assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+               "We can't have the same type as we started with!");
+        if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+          InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+                             DAG.getUNDEF(FixedVT), InOp,
+                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+        else
+          InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+                             DAG.getConstant(0, TLI.getVectorIdxTy()));
+        break;
+      }
+    }
+    InVT = InOp.getValueType();
+    if (InVT.getSizeInBits() != VT.getSizeInBits())
+      // We couldn't find a legal vector type that was a widening of the input
+      // and could be extended in-register to the result type, so we have to
+      // scalarize.
+      return WidenVecOp_Convert(N);
+  }
+
   // Use a special DAG node to represent the operation of zero extending the
   // low lanes.
   return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9b65d90383b..119b0255e7c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1034,6 +1034,9 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
 
 SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
+  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+         "The sizes of the input and result must match in order to perform the "
+         "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
          "The destination vector type must have fewer lanes than the input.");
   return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 5f6e7a853a3..c7f1554cdd4 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
 ;CHECK-LABEL: foo1_8:
 ;CHECK: vcvtdq2ps
@@ -19,6 +20,10 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
 ;CHECK-LABEL: foo2_8:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo2_8:
+;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}}
+;CHECK-WIDE: ret
 define <8 x float> @foo2_8(<8 x i8> %src) {
   %res = uitofp <8 x i8> %src to <8 x float>
   ret <8 x float> %res
@@ -27,6 +32,10 @@ define <8 x float> @foo2_8(<8 x i8> %src) {
 ;CHECK-LABEL: foo2_4:
 ;CHECK: vcvtdq2ps
 ;CHECK: ret
+;
+;CHECK-WIDE-LABEL: foo2_4:
+;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}}
+;CHECK-WIDE: ret
 define <4 x float> @foo2_4(<4 x i8> %src) {
   %res = uitofp <4 x i8> %src to <4 x float>
   ret <4 x float> %res