From: Rafael Espindola Date: Fri, 19 Oct 2007 10:41:11 +0000 (+0000) Subject: Add support for byval function whose argument is not 32 bit aligned. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=5c0d6ed325417baa5d119af9c2b6790231d8565f;p=oota-llvm.git Add support for byval function whose argument is not 32 bit aligned. To do this it is necessary to add a "always inline" argument to the memcpy node. For completeness I have also added this node to memmove and memset. I have also added getMem* functions, because the extra argument makes it cumbersome to use getNode and because I get confused by it :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43172 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 76ed7f6f661..6a66d030345 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -307,7 +307,19 @@ public: SDOperand N5); SDOperand getNode(unsigned Opcode, SDVTList VTs, const SDOperand *Ops, unsigned NumOps); - + + SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand Src, + SDOperand Size, SDOperand Align, + SDOperand AlwaysInline); + + SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand Src, + SDOperand Size, SDOperand Align, + SDOperand AlwaysInline); + + SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand Src, + SDOperand Size, SDOperand Align, + SDOperand AlwaysInline); + /// getSetCC - Helper function to make it easier to build SetCC's if you just /// have an ISD::CondCode instead of an SDOperand. /// diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 19f144653f6..8a0ff29a4c0 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -485,10 +485,10 @@ namespace ISD { // it returns an output chain. STACKRESTORE, - // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain, and the rest - // correspond to the operands of the LLVM intrinsic functions. The only - // result is a token chain. The alignment argument is guaranteed to be a - // Constant node. + // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The following + // correspond to the operands of the LLVM intrinsic functions and the last + // one is AlwaysInline. The only result is a token chain. The alignment + // argument is guaranteed to be a Constant node. MEMSET, MEMMOVE, MEMCPY, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9516dff6cb8..1ad8e802585 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2506,18 +2506,31 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } + SDOperand Tmp6; + switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool + case Expand: assert(0 && "Cannot expand this yet!"); + case Legal: + Tmp6 = LegalizeOp(Node->getOperand(5)); + break; + case Promote: + Tmp6 = PromoteOp(Node->getOperand(5)); + break; + } + switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) { default: assert(0 && "This action not implemented for this operation!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH - case TargetLowering::Legal: - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, Tmp3, Tmp4, Tmp5); + case TargetLowering::Legal: { + SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 }; + Result = DAG.UpdateNodeOperands(Result, Ops, 6); if (isCustom) { Tmp1 = TLI.LowerOperation(Result, DAG); if (Tmp1.Val) Result = Tmp1; } break; + } case TargetLowering::Expand: { // Otherwise, the target does not support this operation. Lower the // operation to an explicit libcall as appropriate. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e6ce2eb9951..09e8b579a0f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2269,6 +2269,30 @@ SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT, return getNode(Opcode, VT, Ops, 5); } +SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest, + SDOperand Src, SDOperand Size, + SDOperand Align, + SDOperand AlwaysInline) { + SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; + return getNode(ISD::MEMCPY, MVT::Other, Ops, 6); +} + +SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest, + SDOperand Src, SDOperand Size, + SDOperand Align, + SDOperand AlwaysInline) { + SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; + return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6); +} + +SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest, + SDOperand Src, SDOperand Size, + SDOperand Align, + SDOperand AlwaysInline) { + SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline }; + return getNode(ISD::MEMSET, MVT::Other, Ops, 6); +} + SDOperand SelectionDAG::getLoad(MVT::ValueType VT, SDOperand Chain, SDOperand Ptr, const Value *SV, int SVOffset, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index a5b6300f6e3..f69b0959400 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -4367,7 +4367,22 @@ void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) { } } - DAG.setRoot(DAG.getNode(Op, MVT::Other, getRoot(), Op1, Op2, Op3, Op4)); + SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1); + SDOperand Node; + switch(Op) { + default: + assert(0 && "Unknown Op"); + case ISD::MEMCPY: + Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); + break; + case ISD::MEMMOVE: + Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); + break; + case ISD::MEMSET: + Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline); + break; + } + DAG.setRoot(Node); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9c9c6a41e22..2d16c3c63c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1246,9 +1246,10 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); - return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, - AlignNode); + return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, + AlwaysInline); } else { return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); } @@ -4472,9 +4473,23 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { SDOperand SourceOp = Op.getOperand(2); SDOperand CountOp = Op.getOperand(3); SDOperand AlignOp = Op.getOperand(4); + SDOperand AlwaysInlineOp = Op.getOperand(5); + + bool AlwaysInline = (bool)cast(AlwaysInlineOp)->getValue(); unsigned Align = (unsigned)cast(AlignOp)->getValue(); if (Align == 0) Align = 1; + // If size is unknown, call memcpy. + ConstantSDNode *I = dyn_cast(CountOp); + if (!I) { + assert(!AlwaysInline && "Cannot inline copy of unknown size"); + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); + } + unsigned Size = I->getValue(); + + if (AlwaysInline) + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + // The libc version is likely to be faster for the following cases. It can // use the address value and run time information about the CPU. // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster @@ -4483,13 +4498,7 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { if ((Align & 3) != 0) return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); - // If size is unknown, call memcpy. - ConstantSDNode *I = dyn_cast(CountOp); - if (!I) - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); - // If size is more than the threshold, call memcpy. - unsigned Size = I->getValue(); if (Size > Subtarget->getMinRepStrSizeThreshold()) return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll index f4942869687..b3794eccb46 100644 --- a/test/CodeGen/X86/byval3.ll +++ b/test/CodeGen/X86/byval3.ll @@ -1,4 +1,5 @@ ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2 %struct.s = type { i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll new file mode 100644 index 00000000000..9ce635b562d --- /dev/null +++ b/test/CodeGen/X86/byval4.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsw | count 2 + +%struct.s = type { i16, i16, i16, i16, i16, i16 } + + +define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3, + i16 signext %a4, i16 signext %a5, i16 signext %a6) { +entry: + %a = alloca %struct.s, align 16 + %tmp = getelementptr %struct.s* %a, i32 0, i32 0 + store i16 %a1, i16* %tmp, align 16 + %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1 + store i16 %a2, i16* %tmp2, align 16 + %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2 + store i16 %a3, i16* %tmp4, align 16 + %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3 + store i16 %a4, i16* %tmp6, align 16 + %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4 + store i16 %a5, i16* %tmp8, align 16 + %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5 + store i16 %a6, i16* %tmp10, align 16 + call void @f( %struct.s* %a byval ) + call void @f( %struct.s* %a byval ) + ret void +} + +declare void @f(%struct.s* byval)