From: Rafael Espindola Date: Fri, 19 Oct 2007 14:35:17 +0000 (+0000) Subject: split LowerMEMCPY into LowerMEMCPYCall and LowerMEMCPYInline in the ARM backend. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=7b73a5d6dead3fa765cdde8316a19ac8930cc60d;p=oota-llvm.git split LowerMEMCPY into LowerMEMCPYCall and LowerMEMCPYInline in the ARM backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43176 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 15a84091688..99d1d24d070 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1288,40 +1288,73 @@ static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG, } SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { - SDOperand Chain = Op.getOperand(0); - SDOperand Dest = Op.getOperand(1); - SDOperand Src = Op.getOperand(2); - SDOperand Count = Op.getOperand(3); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); + SDOperand ChainOp = Op.getOperand(0); + SDOperand DestOp = Op.getOperand(1); + SDOperand SourceOp = Op.getOperand(2); + SDOperand CountOp = Op.getOperand(3); + SDOperand AlignOp = Op.getOperand(4); + SDOperand AlwaysInlineOp = Op.getOperand(5); + + bool AlwaysInline = (bool)cast(AlwaysInlineOp)->getValue(); + unsigned Align = (unsigned)cast(AlignOp)->getValue(); if (Align == 0) Align = 1; - ConstantSDNode *I = dyn_cast(Count); - // Just call memcpy if: - // not 4-byte aligned - // size is unknown - // size is >= the threshold. - if ((Align & 3) != 0 || - !I || - I->getValue() >= 64 || - (I->getValue() & 3) != 0) { - MVT::ValueType IntPtr = getPointerTy(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getTargetData()->getIntPtrType(); - Entry.Node = Op.getOperand(1); Args.push_back(Entry); - Entry.Node = Op.getOperand(2); Args.push_back(Entry); - Entry.Node = Op.getOperand(3); Args.push_back(Entry); - std::pair CallResult = + // If size is unknown, call memcpy. + ConstantSDNode *I = dyn_cast(CountOp); + if (!I) { + assert(!AlwaysInline && "Cannot inline copy of unknown size"); + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); + } + unsigned Size = I->getValue(); + + if (AlwaysInline) + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + + // The libc version is likely to be faster for the following cases. It can + // use the address value and run time information about the CPU. + // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster + + // If not DWORD aligned, call memcpy. + if ((Align & 3) != 0) + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); + + // If size is more than the threshold, call memcpy. + // if (Size > Subtarget->getMinRepStrSizeThreshold()) + if (Size >= 64) + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); + + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); +} + +SDOperand ARMTargetLowering::LowerMEMCPYCall(SDOperand Chain, + SDOperand Dest, + SDOperand Source, + SDOperand Count, + SelectionDAG &DAG) { + MVT::ValueType IntPtr = getPointerTy(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getTargetData()->getIntPtrType(); + Entry.Node = Dest; Args.push_back(Entry); + Entry.Node = Source; Args.push_back(Entry); + Entry.Node = Count; Args.push_back(Entry); + std::pair CallResult = LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); - return CallResult.second; - } + return CallResult.second; +} + +SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, + SDOperand Dest, + SDOperand Source, + unsigned Size, + unsigned Align, + SelectionDAG &DAG) { - // Otherwise do repeated 4-byte loads and stores. To be improved. - assert((I->getValue() & 3) == 0); + // Do repeated 4-byte loads and stores. To be improved. + assert((Size& 3) == 0); assert((Align & 3) == 0); - unsigned NumMemOps = I->getValue() >> 2; + unsigned NumMemOps = Size >> 2; unsigned EmittedNumMemOps = 0; unsigned SrcOff = 0, DstOff = 0; MVT::ValueType VT = MVT::i32; @@ -1337,7 +1370,7 @@ SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { unsigned i; for (i=0; i