From 0787274b70e718af737bd3c0f427574acd829810 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 23 Oct 2012 21:40:15 +0000 Subject: [PATCH] Fix PR14161 - Check index being extracted to be constant 0 before simplfiying. Otherwise, retain the original sequence. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166504 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 +++- test/CodeGen/X86/pr14161.ll | 38 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/pr14161.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a3973ed963f..f73b9d64b94 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6630,9 +6630,12 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const { .getOperand(0).getValueType().getSizeInBits() == SignificantBits) { // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) SDValue V = V1.getOperand(0).getOperand(0).getOperand(0); + ConstantSDNode *CIdx = + dyn_cast(V1.getOperand(0).getOperand(0).getOperand(1)); // If it's foldable, i.e. normal load with single use, we will let code // selection to fold it. Otherwise, we will short the conversion sequence. - if (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse()) + if (CIdx && CIdx->getZExtValue() == 0 && + (!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V); } diff --git a/test/CodeGen/X86/pr14161.ll b/test/CodeGen/X86/pr14161.ll new file mode 100644 index 00000000000..ff4532eac3a --- /dev/null +++ b/test/CodeGen/X86/pr14161.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=corei7 | FileCheck %s + +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) + +define <2 x i16> @good(<4 x i32>*, <4 x i8>*) { +entry: + %2 = load <4 x i32>* %0, align 16 + %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> ) + %4 = extractelement <4 x i32> %3, i32 0 + %5 = extractelement <4 x i32> %3, i32 1 + %6 = extractelement <4 x i32> %3, i32 2 + %7 = extractelement <4 x i32> %3, i32 3 + %8 = bitcast i32 %4 to <2 x i16> + %9 = bitcast i32 %5 to <2 x i16> + ret <2 x i16> %8 +; CHECK: good +; CHECK: pminud +; CHECK-NEXT: pmovzxwq +; CHECK: ret +} + +define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) { +entry: + %2 = load <4 x i32>* %0, align 16 + %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> ) + %4 = extractelement <4 x i32> %3, i32 0 + %5 = extractelement <4 x i32> %3, i32 1 + %6 = extractelement <4 x i32> %3, i32 2 + %7 = extractelement <4 x i32> %3, i32 3 + %8 = bitcast i32 %4 to <2 x i16> + %9 = bitcast i32 %5 to <2 x i16> + ret <2 x i16> %9 +; CHECK: bad +; CHECK: pminud +; CHECK: pextrd +; CHECK: pmovzxwq +; CHECK: ret +} -- 2.34.1