From 0b94b5f52b11092a69267159dfe0df3acdfcabd7 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 17 Jan 2012 09:13:19 +0000 Subject: [PATCH] Fix 11769. In CanXFormVExtractWithShuffleIntoLoad we assumed that EXTRACT_VECTOR_ELT can be later handled by the DAGCombiner. However, in some cases on AVX, the EXTRACT_VECTOR_ELT is legalized to EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which currently is not handled by the DAGCombiner. In this patch I added a check that we only extract from the XMM part. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ test/CodeGen/X86/avx-shuffle.ll | 13 ++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 043110867f0..8f69b0e314f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6241,6 +6241,13 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); + // If we are accessing the upper part of a YMM register + // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of + // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point + // because the legalization of N did not happen yet. + if (Idx >= NumElems/2 && VT.getSizeInBits() == 256) + return false; + // Skip one more bit_convert if necessary if (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index a059a4d3575..232fc785120 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -60,7 +60,18 @@ entry: define <16 x i16> @test7(<4 x i16> %a) nounwind { ; CHECK: test7 - %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> +; CHECK: ret ret <16 x i16> %b } + +; CHECK: test8 +define void @test8() { +entry: + %0 = load <16 x i64> addrspace(1)* null, align 128 + %1 = shufflevector <16 x i64> , <16 x i64> %0, <16 x i32> + %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> + store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128 +; CHECK: ret + ret void +} -- 2.34.1