From d5391478340c6e63b28d03c29ea9fde580b38e93 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Sun, 23 Nov 2014 13:09:06 +0000 Subject: [PATCH] [X86] Fixes bug in build_vector v4x32 lowering r222375 made some improvements to build_vector lowering of v4x32 and v4xf32 into an insertps, but it missed a case where: 1. A single extracted element is used twice. 2. The lower of the two non-zero indexes should be preserved, and the higher should be used for the dest mask. This caused a crash, since the source value for the insertps ends-up uninitialized. Differential Revision: http://reviews.llvm.org/D6377 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222635 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 ++++++--- test/CodeGen/X86/sse41.ll | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 90e88171dcc..0347a517444 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5771,7 +5771,8 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, // We only know how to deal with build_vector nodes where elements are either // zeroable or extract_vector_elt with constant index. SDValue FirstNonZero; - for (int i=0; i < 4; ++i) { + unsigned FirstNonZeroIdx; + for (unsigned i=0; i < 4; ++i) { if (Zeroable[i]) continue; SDValue Elt = Op->getOperand(i); @@ -5782,8 +5783,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, MVT VT = Elt.getOperand(0).getSimpleValueType(); if (!VT.is128BitVector()) return SDValue(); - if (!FirstNonZero.getNode()) + if (!FirstNonZero.getNode()) { FirstNonZero = Elt; + FirstNonZeroIdx = i; + } } assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); @@ -5822,7 +5825,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, return SDValue(); SDValue V2 = Elt.getOperand(0); - if (Elt == FirstNonZero) + if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) V1 = SDValue(); bool CanFold = true; diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll index 2bdfafe8374..71c1c9ed33c 100644 --- a/test/CodeGen/X86/sse41.ll +++ b/test/CodeGen/X86/sse41.ll @@ -1145,6 +1145,23 @@ entry: ret <4 x float> %vecinit3 } +define <4 x float> @insertps_10(<4 x float> %A) +{ +; X32-LABEL: insertps_10: +; X32: ## BB#0: +; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero +; X32-NEXT: retl +; +; X64-LABEL: insertps_10: +; X64: ## BB#0: +; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero +; X64-NEXT: retq + %vecext = extractelement <4 x float> %A, i32 0 + %vecbuild1 = insertelement <4 x float> , float %vecext, i32 0 + %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2 + ret <4 x float> %vecbuild2 +} + define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) { ; X32-LABEL: build_vector_to_shuffle_1: ; X32: ## BB#0: -- 2.34.1