From b82f8a28e8768d1acdb565adbda9ac8b241471a9 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 26 Mar 2014 16:30:54 +0000 Subject: [PATCH] Revert "X86 memcpy lowering: use "rep movs" even when esi is used as base pointer" (r204174) > For functions where esi is used as base pointer, we would previously fall ba > from lowering memcpy with "rep movs" because that clobbers esi. > > With this patch, we just store esi in another physical register, and restore > it afterwards. This adds a little bit of register preassure, but the more > efficient memcpy should be worth it. > > Differential Revision: http://llvm-reviews.chandlerc.com/D2968 This didn't work. I was ending up with code like this: lea edi,[esi+38h] mov ecx,0Fh mov edx,esi mov esi,ebx rep movs dword ptr es:[edi],dword ptr [esi] lea ecx,[esi+74h] <-- Ooops, we're now using esi before restoring it from edx. add ebx,3Ch mov esi,edx I guess if we want to do this we need stronger glue or something, or doing the expansion much later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204829 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SelectionDAGInfo.cpp | 42 ++++++------------- .../X86/inline-asm-sp-clobber-memcpy.ll | 4 +- test/CodeGen/X86/stack-align-memcpy.ll | 9 +--- 3 files changed, 16 insertions(+), 39 deletions(-) diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 3b3ab2a83a1..b9c620fddc4 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -200,11 +200,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); - // If ESI is used as a base pointer, we must preserve it when doing rep movs. + // ESI might be used as a base pointer, in that case we can't simply overwrite + // the register. Fall back to generic code. const X86RegisterInfo *TRI = static_cast(DAG.getTarget().getRegisterInfo()); - bool PreserveESI = TRI->hasBasePointer(DAG.getMachineFunction()) && - TRI->getBaseRegister() == X86::ESI; + if (TRI->hasBasePointer(DAG.getMachineFunction()) && + TRI->getBaseRegister() == X86::ESI) + return SDValue(); MVT AVT; if (Align & 1) @@ -223,45 +225,27 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; - - if (PreserveESI) { - // Save ESI to a physical register. (We cannot use a virtual register - // because if it is spilled we wouldn't be able to reload it.) - // We don't glue this because the register dependencies are explicit. - Chain = DAG.getCopyToReg(Chain, dl, X86::EDX, - DAG.getRegister(X86::ESI, MVT::i32)); - } - - SDValue InGlue(0, 0); + SDValue InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, - Count, InGlue); - InGlue = Chain.getValue(1); + Count, InFlag); + InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dst, InGlue); - InGlue = Chain.getValue(1); + Dst, InFlag); + InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Src, InGlue); - InGlue = Chain.getValue(1); + Src, InFlag); + InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue }; - // FIXME: Make X86rep_movs explicitly use FCX, RDI, RSI instead of glue. + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); - if (PreserveESI) { - InGlue = RepMovs.getValue(1); - RepMovs = DAG.getCopyToReg(RepMovs, dl, X86::ESI, - DAG.getRegister(X86::EDX, MVT::i32), InGlue); - } - SmallVector Results; Results.push_back(RepMovs); - - if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; diff --git a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll index f64ccb84683..b55571bcba0 100644 --- a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll +++ b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll @@ -13,7 +13,5 @@ define void @test1(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ; CHECK-LABEL: test1: ; CHECK: movl %esp, %esi -; CHECK: movl %esi, %edx -; CHECK: rep;movsl -; CHECK: movl %edx, %esi +; CHECK-NOT: rep;movsl } diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll index d6ea8b31475..0cc3aa84889 100644 --- a/test/CodeGen/X86/stack-align-memcpy.ll +++ b/test/CodeGen/X86/stack-align-memcpy.ll @@ -15,9 +15,7 @@ define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: andl $-16, %esp ; CHECK: movl %esp, %esi -; CHECK: movl %esi, %edx -; CHECK: rep;movsl -; CHECK: movl %edx, %esi +; CHECK-NOT: rep;movsl } ; PR19012 @@ -30,9 +28,7 @@ define void @test2(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ; CHECK-LABEL: test2: ; CHECK: movl %esp, %esi -; CHECK: movl %esi, %edx -; CHECK: rep;movsl -; CHECK: movl %edx, %esi +; CHECK-NOT: rep;movsl } ; Check that we do use rep movs if we make the alloca static. @@ -43,6 +39,5 @@ define void @test3(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { ret void ; CHECK-LABEL: test3: -; CHECK-NOT: movl %esi, %edx ; CHECK: rep;movsl } -- 2.34.1