From: Filipe Cabecinhas Date: Mon, 13 Oct 2014 16:16:16 +0000 (+0000) Subject: Fix a broadcast related regression on the vector shuffle lowering. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=40251eb0b0eacb93d8510611a80af84e3a52110d;p=oota-llvm.git Fix a broadcast related regression on the vector shuffle lowering. Summary: Test by Robert Lougher! Reviewers: chandlerc Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5745 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219617 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ed201e88d3a..a8afe816d90 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7895,10 +7895,42 @@ static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V, "a sorted mask where the broadcast " "comes from V1."); - // Check if this is a broadcast of a scalar. We special case lowering for - // scalars so that we can more effectively fold with loads. + // Go up the chain of (vector) values to try and find a scalar load that + // we can combine with the broadcast. + for (;;) { + switch (V.getOpcode()) { + case ISD::CONCAT_VECTORS: { + int OperandSize = Mask.size() / V.getNumOperands(); + V = V.getOperand(BroadcastIdx / OperandSize); + BroadcastIdx %= OperandSize; + continue; + } + + case ISD::INSERT_SUBVECTOR: { + SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); + auto ConstantIdx = dyn_cast(V.getOperand(2)); + if (!ConstantIdx) + break; + + int BeginIdx = (int)ConstantIdx->getZExtValue(); + int EndIdx = + BeginIdx + (int)VInner.getValueType().getVectorNumElements(); + if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) { + BroadcastIdx -= BeginIdx; + V = VInner; + } else { + V = VOuter; + } + continue; + } + } + break; + } + + // Check if this is a broadcast of a scalar. We special case lowering + // for scalars so that we can more effectively fold with loads. if (V.getOpcode() == ISD::BUILD_VECTOR || - (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { + (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) { V = V.getOperand(BroadcastIdx); // If the scalar isn't a load we can't broadcast from it in AVX1, only with diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index 64cdee75db8..2fa7e4d81f9 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -734,3 +734,29 @@ define <4 x i64> @splat_mem_v4i64(i64* %ptr) { %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> ret <4 x i64> %shuffle } + +define <4 x double> @splat_mem_v4f64_2(double* %p) { +; ALL-LABEL: splat_mem_v4f64_2: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 +; ALL-NEXT: retq + %1 = load double* %p + %2 = insertelement <2 x double> undef, double %1, i32 0 + %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer + ret <4 x double> %3 +} + +define <4 x double> @splat_v4f64(<2 x double> %r) { +; AVX1-LABEL: splat_v4f64: +; AVX1: # BB#0: +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splat_v4f64: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 +; AVX2-NEXT: retq + %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer + ret <4 x double> %1 +} diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index c5f3e93283c..e57a4b81a2b 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1579,3 +1579,29 @@ define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) { %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } + +define <8 x float> @splat_mem_v8f32_2(float* %p) { +; ALL-LABEL: splat_mem_v8f32_2: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastss (%rdi), %ymm0 +; ALL-NEXT: retq + %1 = load float* %p + %2 = insertelement <4 x float> undef, float %1, i32 0 + %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer + ret <8 x float> %3 +} + +define <8 x float> @splat_v8f32(<4 x float> %r) { +; AVX1-LABEL: splat_v8f32: +; AVX1: # BB#0: +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splat_v8f32: +; AVX2: # BB#0: +; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 +; AVX2-NEXT: retq + %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer + ret <8 x float> %1 +}