From: Alexey Samsonov Date: Tue, 27 Jan 2015 21:34:11 +0000 (+0000) Subject: Revert "[x86] Combine x86mmx/i64 to v2i64 conversion to use scalar_to_vector" X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=00b7a940e71ed6550a32fc69a5f11840a48ace64;p=oota-llvm.git Revert "[x86] Combine x86mmx/i64 to v2i64 conversion to use scalar_to_vector" This reverts commits r226953 and r226974. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227248 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8b82ae9a42d..968e47ac658 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24761,8 +24761,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, LoadSDNode *Ld = cast(N); EVT RegVT = Ld->getValueType(0); EVT MemVT = Ld->getMemoryVT(); - SDValue Ptr = Ld->getBasePtr(); - SDValue Chain = Ld->getChain(); SDLoc dl(Ld); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -24801,33 +24799,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, NewVec, TF, true); } - // Conversion from x86mmx/i64 to v2i64 types is often done via stack - // store/load. Under certain conditions we can bypass the memory access and - // combine this load to use a scalar_to_vector instead. This leads to - // a reduction in the stack use, redundant emission of shuffles and create - // isel matching candidates for movq2dq instructions. - if (RegVT == MVT::v2i64 && Subtarget->hasSSE2() && Ext == ISD::EXTLOAD && - !Ld->isVolatile() && ISD::isNON_TRUNCStore(Chain.getNode())) { - - // If this load is directly stored, get the original source value. - StoreSDNode *PrevST = cast(Chain); - EVT SrcTy = PrevST->getValue().getValueType(); - if (PrevST->getBasePtr() != Ptr || - !(SrcTy == MVT::i64 || SrcTy == MVT::x86mmx)) - return SDValue(); - SDValue SrcVal = Chain.getOperand(1); - - // On 32bit systems, we can't store 64bit integers, use f64 instead. - bool Usef64 = TLI.isTypeLegal(MVT::f64) && !Subtarget->is64Bit(); - if (Usef64) - SrcVal = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SrcVal); - SrcVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, Usef64 ? MVT::v2f64 : RegVT, - SrcVal); - - return DCI.CombineTo(N, Usef64 ? - DAG.getNode(ISD::BITCAST, dl, RegVT, SrcVal) : SrcVal, Chain); - } - return SDValue(); } diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll index efba66be97e..9eb59e41ef7 100644 --- a/test/CodeGen/X86/2012-01-18-vbitcast.ll +++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll @@ -1,15 +1,14 @@ ; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -; CHECK-LABEL: vcast: +;CHECK-LABEL: vcast: define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) { -; CHECK-NOT: pmovzxdq -; CHECK-NOT: pmovzxdq -; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]] +;CHECK: pmovzxdq +;CHECK: pmovzxdq %af = bitcast <2 x float> %a to <2 x i32> %bf = bitcast <2 x float> %b to <2 x i32> -; CHECK-NEXT: psubq (%{{.*}}), %[[R0]] %x = sub <2 x i32> %af, %bf -; CHECK: ret +;CHECK: psubq ret <2 x i32> %x +;CHECK: ret } diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll index 5fad82497b9..edb8433ec30 100644 --- a/test/CodeGen/X86/lower-bitcast.ll +++ b/test/CodeGen/X86/lower-bitcast.ll @@ -68,13 +68,12 @@ define i64 @test4(i64 %A) { %2 = bitcast <2 x i32> %add to i64 ret i64 %2 } -; FIXME: At the moment we still produce the sequence paddd+pshufd. +; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd. ; Ideally, we should fold that sequence into a single paddd. This is fixed with ; the widening legalization. ; ; CHECK-LABEL: test4 -; CHECK: movd -; CHECK-NOT: pshufd +; CHECK: pshufd ; CHECK-NEXT: paddd ; CHECK-NEXT: pshufd ; CHECK: ret diff --git a/test/CodeGen/X86/mmx-movq2dq.ll b/test/CodeGen/X86/mmx-movq2dq.ll deleted file mode 100644 index 9f46da53bd7..00000000000 --- a/test/CodeGen/X86/mmx-movq2dq.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32 -; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-64 - -; X86-32-LABEL: test0 -; X86-64-LABEL: test0 -define i32 @test0(<1 x i64>* %v4) { - %v5 = load <1 x i64>* %v4, align 8 - %v12 = bitcast <1 x i64> %v5 to <4 x i16> - %v13 = bitcast <4 x i16> %v12 to x86_mmx - ; X86-32: pshufw $238 - ; X86-32-NOT: movq - ; X86-32-NOT: movsd - ; X86-32: movq2dq - ; X86-64: pshufw $238 - ; X86-64-NOT: movq - ; X86-64-NOT: pshufd - ; X86-64: movq2dq - ; X86-64-NEXT: movd - %v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18) - %v15 = bitcast x86_mmx %v14 to <4 x i16> - %v16 = bitcast <4 x i16> %v15 to <1 x i64> - %v17 = extractelement <1 x i64> %v16, i32 0 - %v18 = bitcast i64 %v17 to <2 x i32> - %v19 = extractelement <2 x i32> %v18, i32 0 - %v20 = add i32 %v19, 32 - ret i32 %v20 -} - -declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 0d5380eb3ac..c6bd96421d7 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -78,7 +78,8 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp ; CHECK-NEXT: paddd %[[R0]], %[[R1]] ; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}}) ; CHECK-NEXT: pshufb {{.*}}, %[[R1]] -; CHECK-NEXT: movd %[[R1]], (%{{.*}}) +; CHECK-NEXT: pmovzxdq %[[R1]], %[[R0]] +; CHECK-NEXT: movd %[[R0]], (%{{.*}}) %a = load %i16vec3* %ap, align 16 %b = load %i16vec3* %bp, align 16 %x = add %i16vec3 %a, %b