From c85e1716f0e45e4c18a9ef2fbe431a51ac3a4252 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 11 Oct 2007 19:40:01 +0000 Subject: [PATCH] Added tail call optimization to the x86 back end. It can be enabled by passing -tailcallopt to llc. The optimization is performed if the following conditions are satisfied: * caller/callee are fastcc * elf/pic is disabled OR elf/pic enabled + callee is in module + callee has visibility protected or hidden git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42870 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 9 + include/llvm/Target/TargetOptions.h | 5 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 48 ++ lib/Target/TargetMachine.cpp | 7 + lib/Target/X86/README.txt | 80 +++ lib/Target/X86/X86CallingConv.td | 50 ++ lib/Target/X86/X86ISelLowering.cpp | 511 ++++++++++++++++-- lib/Target/X86/X86ISelLowering.h | 24 +- lib/Target/X86/X86InstrInfo.cpp | 2 + lib/Target/X86/X86InstrInfo.td | 43 +- lib/Target/X86/X86InstrX86-64.td | 35 ++ lib/Target/X86/X86MachineFunctionInfo.h | 15 +- lib/Target/X86/X86RegisterInfo.cpp | 129 ++++- test/CodeGen/X86/tailcall1.ll | 11 + test/CodeGen/X86/tailcallpic1.ll | 12 + test/CodeGen/X86/tailcallpic2.ll | 12 + 16 files changed, 928 insertions(+), 65 deletions(-) create mode 100644 test/CodeGen/X86/tailcall1.ll create mode 100644 test/CodeGen/X86/tailcallpic1.ll create mode 100644 test/CodeGen/X86/tailcallpic2.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 8586d7f0916..1352eaff732 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -860,6 +860,15 @@ public: /// implement this. The default implementation of this aborts. virtual SDOperand LowerOperation(SDOperand Op, SelectionDAG &DAG); + /// IsEligibleForTailCallOptimization - Check whether the call is eligible for + /// tail call optimization. Target which want to do tail call optimization + /// should implement this function. + virtual bool IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG &DAG) const { + return false; + } + /// CustomPromoteOperation - This callback is invoked for operations that are /// unsupported by the target, are registered to use 'custom' lowering, and /// whose type needs to be promoted. diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 7421f96b7bc..dd544324420 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -73,6 +73,11 @@ namespace llvm { /// ExceptionHandling - This flag indicates that exception information should /// be emitted. extern bool ExceptionHandling; + + /// PerformTailCallOpt - This flag is enabled when the -tailcallopt is + /// specified on the commandline. When the flag is on, the target will perform + /// tail call optimization (pop the caller's stack) providing it supports it. + extern bool PerformTailCallOpt; } // End llvm namespace #endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5f321654d86..b1bf475c473 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -4444,6 +4444,48 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, } } +/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the +/// DAG and fixes their tailcall attribute operand +static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, + TargetLowering& TLI) { + SDNode * Ret = NULL; + SDOperand Terminator = DAG.getRoot(); + + // Find RET node. + if (Terminator.getOpcode() == ISD::RET) { + Ret = Terminator.Val; + } + + // Fix tail call attribute of CALL nodes. + for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), + BI = prior(DAG.allnodes_end()); BI != BE; --BI) { + if (BI->getOpcode() == ISD::CALL) { + SDOperand OpRet(Ret, 0); + SDOperand OpCall(static_cast(BI), 0); + bool isMarkedTailCall = + cast(OpCall.getOperand(3))->getValue() != 0; + // If CALL node has tail call attribute set to true and the call is not + // eligible (no RET or the target rejects) the attribute is fixed to + // false. The TargetLowering::IsEligibleForTailCallOptimization function + // must correctly identify tail call optimizable calls. + if (isMarkedTailCall && + (Ret==NULL || + !TLI.IsEligibleForTailCallOptimization(OpCall, OpRet, DAG))) { + SmallVector Ops; + unsigned idx=0; + for(SDNode::op_iterator I =OpCall.Val->op_begin(), + E=OpCall.Val->op_end(); I!=E; I++, idx++) { + if (idx!=3) + Ops.push_back(*I); + else + Ops.push_back(DAG.getConstant(false, TLI.getPointerTy())); + } + DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); + } + } + } +} + void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, std::vector > &PHINodesToUpdate, FunctionLoweringInfo &FuncInfo) { @@ -4621,6 +4663,12 @@ void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, // Make sure the root of the DAG is up-to-date. DAG.setRoot(SDL.getRoot()); + + // Check whether calls in this block are real tail calls. Fix up CALL nodes + // with correct tailcall attribute so that the target can rely on the tailcall + // attribute indicating whether the call is really eligible for tail call + // optimization. + CheckDAGForTailCallsAndFixThem(DAG, TLI); } void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) { diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 6c00a3f492b..9caea11dd39 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -33,6 +33,7 @@ namespace llvm { bool ExceptionHandling; Reloc::Model RelocationModel; CodeModel::Model CMModel; + bool PerformTailCallOpt; } namespace { cl::opt PrintCode("print-machineinstrs", @@ -116,6 +117,12 @@ namespace { clEnumValN(CodeModel::Large, "large", " Large code model"), clEnumValEnd)); + + cl::opt + EnablePerformTailCallOpt("tailcallopt", + cl::desc("Turn on tail call optimization."), + cl::location(PerformTailCallOpt), + cl::init(false)); } //--------------------------------------------------------------------------- diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 9bafff73d5c..0d4dce32d83 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1368,3 +1368,83 @@ L7: L5: //===---------------------------------------------------------------------===// +Tail call optimization improvements: Tail call optimization currently +pushes all arguments on the top of the stack (their normal place if +that was a not tail call optimized functiong call ) before moving them +to actual stack slot. this is done to prevent overwriting of paramters +(see example below) that might be used, since the arguments of the +callee overwrites callers arguments. + + example: + +int callee(int32, int64); +int caller(int32 arg1, int32 arg2) { + int64 local = arg2 * 2; + return callee(arg2, (int64)local); +} + +[arg1] [!arg2 no longer valid since we moved local onto it] +[arg2] -> [(int64) +[RETADDR] local ] + +moving arg1 onto the stack slot of callee function would overwrite +arg2 of the caller. + +Possible optimizations: + + - only push those arguments to the top of the stack that are actual + parameters of the caller function and have no local value in the + caller + + in above example local does not need to be pushed onto the top of + the stack as it is definitetly not a caller's function parameter + + - analyse the actual parameters of the callee to see which would + overwrite a caller paramter which is used by the callee and only + push them onto the top of the stack + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg1,arg2); + } + + here we don't need to write any variables to the top of the stack + since they don't overwrite each other + + int callee (int32 arg1, int32 arg2); + int caller (int32 arg1, int32 arg2) { + return callee(arg2,arg1); + } + + here we need to push the arguments because they overwrite each other + + + code for lowering directly onto callers arguments: ++ SmallVector, 8> RegsToPass; ++ SmallVector MemOpChains; ++ ++ SDOperand FramePtr; ++ SDOperand PtrOff; ++ SDOperand FIN; ++ int FI = 0; ++ // Walk the register/memloc assignments, inserting copies/loads. ++ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { ++ CCValAssign &VA = ArgLocs[i]; ++ SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); ++ ++ .... ++ ++ if (VA.isRegLoc()) { ++ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); ++ } else { ++ assert(VA.isMemLoc()); ++ // create frame index ++ int32_t Offset = VA.getLocMemOffset()+FPDiff; ++ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; ++ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); ++ FIN = DAG.getFrameIndex(FI, MVT::i32); ++ // store relative to framepointer ++ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0)); ++ } ++ } +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 9c2d95a1991..f23e5806563 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -127,6 +127,40 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; +// tail call convetion (fast) one register is reserved for target address +// namely R9 +def CC_X86_64_TailCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + CCIfStruct>, + + // The first 6 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, + CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, + + // The first 8 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>, + + // The first 8 MMX vector arguments are passed in GPRs. + CCIfType<[v8i8, v4i16, v2i32, v1i64], + CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, + + // The 'nest' parameter, if any, is passed in R10. + CCIfNest>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, + + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> +]>; + //===----------------------------------------------------------------------===// // X86 C Calling Convention @@ -173,6 +207,22 @@ def CC_X86_32_C : CallingConv<[ CCDelegateTo ]>; +/// Same as C calling convention up to nonfree ECX which is used for storing +/// potential pointer to tail called function +def CC_X86_32_TailCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in ECX. + CCIfNest>, + + // The first 3 integer arguments, if marked 'inreg' and if the call is not + // a vararg call, are passed in integer registers. + CCIfNotVarArg>>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; def CC_X86_32_FastCall : CallingConv<[ // Promote i8/i16 arguments to i32. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1917a6a291f..8767d8d33b9 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32,6 +32,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ParameterAttributes.h" @@ -43,6 +45,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + RegInfo = TM.getRegisterInfo(); @@ -641,6 +644,19 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) //===----------------------------------------------------------------------===// #include "X86GenCallingConv.inc" + +/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it +/// exists skip possible ISD:TokenFactor. +static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { + if (Chain.getOpcode()==X86ISD::TAILCALL) { + return Chain; + } else if (Chain.getOpcode()==ISD::TokenFactor) { + if (Chain.getNumOperands() && + Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) + return Chain.getOperand(0); + } + return Chain; +} /// LowerRET - Lower an ISD::RET node. SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { @@ -651,8 +667,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); - - + // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().liveout_empty()) { @@ -660,10 +675,38 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { if (RVLocs[i].isRegLoc()) DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); } - SDOperand Chain = Op.getOperand(0); - SDOperand Flag; + // Handle tail call return. + Chain = GetPossiblePreceedingTailCall(Chain); + if (Chain.getOpcode() == X86ISD::TAILCALL) { + SDOperand TailCall = Chain; + SDOperand TargetAddress = TailCall.getOperand(1); + SDOperand StackAdjustment = TailCall.getOperand(2); + assert ( ((TargetAddress.getOpcode() == ISD::Register && + (cast(TargetAddress)->getReg() == X86::ECX || + cast(TargetAddress)->getReg() == X86::R9)) || + TargetAddress.getOpcode() == ISD::TargetExternalSymbol || + TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && + "Expecting an global address, external symbol, or register"); + assert( StackAdjustment.getOpcode() == ISD::Constant && + "Expecting a const value"); + + SmallVector Operands; + Operands.push_back(Chain.getOperand(0)); + Operands.push_back(TargetAddress); + Operands.push_back(StackAdjustment); + // Copy registers used by the call. Last operand is a flag so it is not + // copied. + for(unsigned i=3; i < TailCall.getNumOperands()-1;i++) { + Operands.push_back(Chain.getOperand(i)); + } + return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], Operands.size()); + } + + // Regular return. + SDOperand Flag; + // Copy the result values into the output registers. if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || RVLocs[0].getLocReg() != X86::ST0) { @@ -684,7 +727,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { SDOperand MemLoc; - + // If this is a load into a scalarsse value, don't store the loaded value // back to the stack, only to reload it: just replace the scalar-sse load. if (ISD::isNON_EXTLoad(Value.Val) && @@ -784,12 +827,14 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, //===----------------------------------------------------------------------===// -// C & StdCall Calling Convention implementation +// C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// // StdCall calling convention seems to be standard for many Windows' API // routines and around. It differs from C calling convention just a little: // callee should clean up the stack, not caller. Symbols should be also // decorated in some fancy way :) It doesn't support any vector arguments. +// For info on fast calling convention see Fast Calling Convention (tail call) +// implementation LowerX86_32FastCCCallTo. /// AddLiveIn - This helper function adds the specified physical register to the /// MachineFunction as a live in value. It also creates a corresponding virtual @@ -802,6 +847,9 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, return VReg; } +// align stack arguments according to platform alignment needed for tail calls +unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); + SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, @@ -826,13 +874,17 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - + unsigned CC = MF.getFunction()->getCallingConv(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); - + // Check for possible tail call calling convention. + if (CC == CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); + else + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); + SmallVector ArgValues; unsigned LastVal = ~0U; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -877,6 +929,9 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, } unsigned StackSize = CCInfo.getNextStackOffset(); + // align stack specially for tail calls + if (CC==CallingConv::Fast) + StackSize = GetAlignedArgumentStackSize(StackSize,DAG); ArgValues.push_back(Root); @@ -885,7 +940,12 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, if (isVarArg) VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); - if (isStdCall && !isVarArg) { + // Tail call calling convention (CallingConv::Fast) does not support varargs. + assert( !(isVarArg && CC == CallingConv::Fast) && + "CallingConv::Fast does not support varargs."); + + if (isStdCall && !isVarArg && + (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { BytesToPopOnReturn = StackSize; // Callee pops everything.. BytesCallerReserves = 0; } else { @@ -914,17 +974,21 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; - + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); + if(CC==CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + if (CC==CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); @@ -1023,19 +1087,21 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, - NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush = 0; - if (CC == CallingConv::X86_StdCall) { + if (CC == CallingConv::X86_StdCall || + (CC == CallingConv::Fast && PerformTailCallOpt)) { if (isVarArg) NumBytesForCalleeToPush = isSRet ? 4 : 0; else NumBytesForCalleeToPush = NumBytes; + assert(!(isVarArg && CC==CallingConv::Fast) && + "CallingConv::Fast does not support varargs."); } else { // If this is is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. @@ -1132,7 +1198,8 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. + // arguments and the arguments after the retaddr has been pushed are + // aligned. if ((StackSize & 7) == 0) StackSize += 4; } @@ -1194,7 +1261,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. + // arguments and the arguments after the retaddr has been pushed are + // aligned. if ((NumBytes & 7) == 0) NumBytes += 4; } @@ -1292,8 +1360,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - // FIXME: Do not generate X86ISD::TAILCALL for now. - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, + assert(isTailCall==false && "no tail call here"); + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -1312,6 +1380,314 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); } +//===----------------------------------------------------------------------===// +// Fast Calling Convention (tail call) implementation +//===----------------------------------------------------------------------===// + +// Like std call, callee cleans arguments, convention except that ECX is +// reserved for storing the tail called function address. Only 2 registers are +// free for argument passing (inreg). Tail call optimization is performed +// provided: +// * tailcallopt is enabled +// * caller/callee are fastcc +// * elf/pic is disabled OR +// * elf/pic enabled + callee is in module + callee has +// visibility protected or hidden +// To ensure the stack is aligned according to platform abi pass +// tail-call-align-stack. This makes sure that argument delta is always +// multiples of stack alignment. (Dynamic linkers need this - darwin's dyld for +// example) +// If a tail called function callee has more arguments than the caller the +// caller needs to make sure that there is room to move the RETADDR to. This is +// achived by reserving an area the size of the argument delta right after the +// original REtADDR, but before the saved framepointer or the spilled registers +// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) +// stack layout: +// arg1 +// arg2 +// RETADDR +// [ new RETADDR +// move area ] +// (possible EBP) +// ESI +// EDI +// local1 .. + +/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned +/// for a 16 byte align requirement. +unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG& DAG) { + if (PerformTailCallOpt) { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + uint64_t AlignMask = StackAlignment - 1; + int64_t Offset = StackSize; + unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; + if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { + // Number smaller than 12 so just add the difference. + Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); + } else { + // Mask out lower bits, add stackalignment once plus the 12 bytes. + Offset = ((~AlignMask) & Offset) + StackAlignment + + (StackAlignment-SlotSize); + } + StackSize = Offset; + } + return StackSize; +} + +/// IsEligibleForTailCallElimination - Check to see whether the next instruction +// following the call is a return. A function is eligible if caller/callee +// calling conventions match, currently only fastcc supports tail calls, and the +// function CALL is immediatly followed by a RET. +bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG& DAG) const { + bool IsEligible = false; + + // Check whether CALL node immediatly preceeds the RET node and whether the + // return uses the result of the node or is a void return. + if ((Ret.getNumOperands() == 1 && + (Ret.getOperand(0)== SDOperand(Call.Val,1) || + Ret.getOperand(0)== SDOperand(Call.Val,0))) || + (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) && + Ret.getOperand(1)== SDOperand(Call.Val,0))) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CallerCC = MF.getFunction()->getCallingConv(); + unsigned CalleeCC = cast(Call.getOperand(1))->getValue(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + SDOperand Callee = Call.getOperand(4); + // On elf/pic %ebx needs to be livein. + if(getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) { + // Can only do local tail calls with PIC. + GlobalValue * GV = 0; + GlobalAddressSDNode *G = dyn_cast(Callee); + if(G != 0 && + (GV = G->getGlobal()) && + (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) + IsEligible=true; + } else { + IsEligible=true; + } + } + } + return IsEligible; +} + +SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, + SelectionDAG &DAG, + unsigned CC) { + SDOperand Chain = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + SDOperand Callee = Op.getOperand(4); + bool is64Bit = Subtarget->is64Bit(); + + assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + if (is64Bit) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); + + + // Lower arguments at fp - stackoffset + fpdiff. + MachineFunction &MF = DAG.getMachineFunction(); + + unsigned NumBytesToBePushed = + GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); + + unsigned NumBytesCallerPushed = + MF.getInfo()->getBytesToPopOnReturn(); + int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; + + // Set the delta of movement of the returnaddr stackslot. + // But only set if delta is greater than previous delta. + if (FPDiff < (MF.getInfo()->getTCReturnAddrDelta())) + MF.getInfo()->setTCReturnAddrDelta(FPDiff); + + // Adjust the ret address stack slot. + if (FPDiff) { + MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; + SDOperand RetAddrFrIdx = getReturnAddressFrameIndex(DAG); + RetAddrFrIdx = + DAG.getLoad(VT, DAG.getEntryNode(),RetAddrFrIdx, NULL, 0); + // Emit a store of the saved ret value to the new location. + int SlotSize = is64Bit ? 8 : 4; + int NewReturnAddrFI = + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); + } + + Chain = DAG. + getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + SmallVector MemOpChains2; + SDOperand FramePtr, StackPtr; + SDOperand PtrOff; + SDOperand FIN; + int FI = 0; + + // Walk the register/memloc assignments, inserting copies/loads. Lower + // arguments first to the stack slot where they would normally - in case of a + // normal function call - be. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.Val == 0) + StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, + Arg)); + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + InFlag = SDOperand(); + // Copy from stack slots to stack slot of a tail called function. This needs + // to be done because if we would lower the arguments directly to their real + // stack slot we might end up overwriting each other. + // TODO: To make this more efficient (sometimes saving a store/load) we could + // analyse the arguments and emit this store/load/store sequence only for + // arguments which would be overwritten otherwise. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (!VA.isRegLoc()) { + SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); + unsigned Flags = cast(FlagsOp)->getValue(); + + // Get source stack slot. + SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + // Create frame index. + int32_t Offset = VA.getLocMemOffset()+FPDiff; + uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FIN = DAG.getFrameIndex(FI, MVT::i32); + if (Flags & ISD::ParamFlags::ByVal) { + // Copy relative to framepointer. + unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> + ISD::ParamFlags::ByValAlignOffs); + + unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> + ISD::ParamFlags::ByValSizeOffs; + + SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + // Copy relative to framepointer. + MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN, + PtrOff, SizeNode, AlignNode)); + } else { + SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); + // Store relative to framepointer. + MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); + } + } + } + + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains2[0], MemOpChains.size()); + + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + // Does not work with tail call since ebx is not restored correctly by + // tailcaller. TODO: at least for x86 - verify for x86-64 + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + // We should use extra load for direct calls to dllimported functions in + // non-JIT mode. + if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), + getTargetMachine(), true)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + else { + assert(Callee.getOpcode() == ISD::LOAD && + "Function destination must be loaded into virtual register"); + unsigned Opc = is64Bit ? X86::R9 : X86::ECX; + + Chain = DAG.getCopyToReg(Chain, + DAG.getRegister(Opc, getPointerTy()) , + Callee,InFlag); + Callee = DAG.getRegister(Opc, getPointerTy()); + // Add register as live out. + DAG.getMachineFunction().addLiveOut(Opc); + } + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + + Ops.push_back(Chain); + Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); + Ops.push_back(DAG.getConstant(0, getPointerTy())); + if (InFlag.Val) + Ops.push_back(InFlag); + Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Returns a chain & a flag for retval copy to use. + NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(Callee); + Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + if (InFlag.Val) + Ops.push_back(InFlag); + assert(InFlag.Val && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(X86ISD::TAILCALL, + Op.Val->getVTList(), &Ops[0], Ops.size()); + + return SDOperand(Chain.Val, Op.ResNo); +} //===----------------------------------------------------------------------===// // X86-64 C Calling Convention implementation @@ -1323,6 +1699,7 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + unsigned CC= MF.getFunction()->getCallingConv(); static const unsigned GPR64ArgRegs[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 @@ -1335,9 +1712,12 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); + if (CC == CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); SmallVector ArgValues; unsigned LastVal = ~0U; @@ -1398,10 +1778,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { } unsigned StackSize = CCInfo.getNextStackOffset(); + if (CC==CallingConv::Fast) + StackSize =GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { + assert(CC!=CallingConv::Fast + && "Var arg not supported with calling convention fastcc"); unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); @@ -1446,10 +1830,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { } ArgValues.push_back(Root); - - BytesToPopOnReturn = 0; // Callee pops nothing. - BytesCallerReserves = StackSize; - + // Tail call convention (fastcc) needs callee pop. + if (CC == CallingConv::Fast && PerformTailCallOpt){ + BytesToPopOnReturn = StackSize; // Callee pops everything. + BytesCallerReserves = 0; + } else { + BytesToPopOnReturn = 0; // Callee pops nothing. + BytesCallerReserves = StackSize; + } X86MachineFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); @@ -1463,16 +1851,21 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); + if (CC==CallingConv::Fast) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + if (CC == CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); SmallVector, 8> RegsToPass; @@ -1526,6 +1919,9 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, } if (isVarArg) { + assert ( CallingConv::Fast != CC && + "Var args not supported with calling convention fastcc"); + // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -1574,17 +1970,22 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - // FIXME: Do not generate X86ISD::TAILCALL for now. - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - + int NumBytesForCalleeToPush = 0; + if (CC==CallingConv::Fast) { + NumBytesForCalleeToPush = NumBytes; // Callee pops everything + + } else { + NumBytesForCalleeToPush = 0; // Callee pops nothing. + } // Returns a flag for retval copy to use. NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); Ops.clear(); Ops.push_back(Chain); Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(0, getPointerTy())); + Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -3106,10 +3507,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // SHUFPS the element to the lowest double word, then movss. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -3128,7 +3533,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -3777,17 +4183,23 @@ SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { } SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { - unsigned CallingConv= cast(Op.getOperand(1))->getValue(); - - if (Subtarget->is64Bit()) - return LowerX86_64CCCCallTo(Op, DAG, CallingConv); + unsigned CallingConv = cast(Op.getOperand(1))->getValue(); + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + + if (Subtarget->is64Bit()) + if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) + return LowerX86_TailCallTo(Op, DAG, CallingConv); + else + return LowerX86_64CCCCallTo(Op, DAG, CallingConv); else switch (CallingConv) { default: assert(0 && "Unsupported calling convention"); case CallingConv::Fast: - // TODO: Implement fastcc - // Falls through + if (isTailCall && PerformTailCallOpt) + return LowerX86_TailCallTo(Op, DAG, CallingConv); + else + return LowerCCCCallTo(Op,DAG, CallingConv); case CallingConv::C: case CallingConv::X86_StdCall: return LowerCCCCallTo(Op, DAG, CallingConv); @@ -3855,8 +4267,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { default: assert(0 && "Unsupported calling convention"); case CallingConv::Fast: - // TODO: implement fastcc. - + return LowerCCCArguments(Op,DAG, true); // Falls through case CallingConv::C: return LowerCCCArguments(Op, DAG); @@ -4176,7 +4587,8 @@ X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { SDOperand TheOp = Op.getOperand(0); SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); if (Subtarget->is64Bit()) { - SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); + SDOperand Copy1 = + DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, MVT::i64, Copy1.getValue(2)); SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, @@ -4612,6 +5024,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; + case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; } } @@ -4885,7 +5298,7 @@ static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { i %= NumElems; if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { return (i == 0) - ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); + ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { SDOperand Idx = PermMask.getOperand(i); if (Idx.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index aa579d69f34..7123adaad27 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -181,7 +181,14 @@ namespace llvm { TLSADDR, THREAD_POINTER, // Exception Handling helpers - EH_RETURN + EH_RETURN, + + // tail call return + // oeprand #0 chain + // operand #1 callee (register or absolute) + // operand #2 stack adjustment + // operand #3 optional in flag + TC_RETURN }; } @@ -285,6 +292,7 @@ namespace llvm { unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. int BytesToPopOnReturn; // Number of arg bytes ret should pop. int BytesCallerReserves; // Number of arg bytes caller makes. + public: explicit X86TargetLowering(TargetMachine &TM); @@ -364,6 +372,14 @@ namespace llvm { virtual bool isVectorClearMaskLegal(std::vector &BVOps, MVT::ValueType EVT, SelectionDAG &DAG) const; + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Target which want to do tail call + /// optimization should implement this function. + virtual bool IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG &DAG) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -372,7 +388,7 @@ namespace llvm { /// X86StackPtr - X86 physical register used as stack ptr. unsigned X86StackPtr; - + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. @@ -402,6 +418,10 @@ namespace llvm { SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG); SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,unsigned CC); + // fast calling convention (tail call) implementation for 32/64bit + SDOperand LowerX86_TailCallTo(SDOperand Op, + SelectionDAG & DAG, unsigned CC); + unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); // Fast and FastCall Calling Convention implementation. SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC); diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 986aa0bc806..9d5e6371199 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -706,6 +706,8 @@ bool X86InstrInfo::BlockHasNoFallThrough(MachineBasicBlock &MBB) const { if (MBB.empty()) return false; switch (MBB.back().getOpcode()) { + case X86::TCRETURNri: + case X86::TCRETURNdi: case X86::RET: // Return. case X86::RETI: case X86::TAILJMPd: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 641eb2f7cc6..a6bd3fbbbde 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -55,6 +55,8 @@ def SDT_X86TLSTP : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; + def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>; def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>; @@ -73,7 +75,7 @@ def X86callseq_start : [SDNPHasChain, SDNPOutFlag]>; def X86callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; @@ -99,6 +101,8 @@ def X86TLStp : SDNode<"X86ISD::THREAD_POINTER", SDT_X86TLSTP, []>; def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; +def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, + [SDNPHasChain, SDNPOptInFlag]>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -356,15 +360,30 @@ let isCall = 1 in } // Tail call stuff. + +def TAILCALL : I<0, Pseudo, (outs), (ins ), + "#TAILCALL", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", + []>; + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAIL CALL", +def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", []>; + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp\t{*}$dst # TAIL CALL", + def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL", []>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL", + []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), - "jmp\t{*}$dst # TAIL CALL", []>; + "jmp\t{*}$dst # TAILCALL", []>; //===----------------------------------------------------------------------===// // Miscellaneous Instructions... @@ -2507,13 +2526,23 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV32mi addr:$dst, texternalsym:$src)>; // Calls +// tailcall stuff def : Pat<(X86tailcall GR32:$dst), - (CALL32r GR32:$dst)>; + (TAILCALL)>; def : Pat<(X86tailcall (i32 tglobaladdr:$dst)), - (CALLpcrel32 tglobaladdr:$dst)>; + (TAILCALL)>; def : Pat<(X86tailcall (i32 texternalsym:$dst)), - (CALLpcrel32 texternalsym:$dst)>; + (TAILCALL)>; + +def : Pat<(X86tcret GR32:$dst, imm:$off), + (TCRETURNri GR32:$dst, imm:$off)>; + +def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>; + +def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>; def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td index f6f48a21d47..f501b5ec558 100644 --- a/lib/Target/X86/X86InstrX86-64.td +++ b/lib/Target/X86/X86InstrX86-64.td @@ -102,6 +102,23 @@ let isCall = 1 in "call\t{*}$dst", []>; } + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", + []>; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in +def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", + []>; + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst # TAILCALL", + []>; + // Branches let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", @@ -1105,6 +1122,24 @@ def : Pat<(X86tailcall (i64 texternalsym:$dst)), def : Pat<(X86tailcall GR64:$dst), (CALL64r GR64:$dst)>; + +// tailcall stuff +def : Pat<(X86tailcall GR32:$dst), + (TAILCALL)>; +def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), + (TAILCALL)>; +def : Pat<(X86tailcall (i64 texternalsym:$dst)), + (TAILCALL)>; + +def : Pat<(X86tcret GR64:$dst, imm:$off), + (TCRETURNri64 GR64:$dst, imm:$off)>; + +def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + +def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + // Comparisons. // TEST R,R is smaller than CMP R,0 diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index e50a104f216..05972c66c27 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -47,18 +47,26 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { // FrameIndex for return slot. int ReturnAddrIndex; + + // Delta the ReturnAddr stack slot is moved + // Used for creating an area before the register spill area on the stack + // the returnaddr can be savely move to this area + int TailCallReturnAddrDelta; + public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), DecorationStyle(None), - ReturnAddrIndex(0) {} + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0){} X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), DecorationStyle(None), - ReturnAddrIndex(0) {} + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -74,6 +82,9 @@ public: int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } + + int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; } + void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;} }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 83cb03c76f8..f017d4020ae 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1436,18 +1436,42 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!hasFP(MF)) Offset += MF.getFrameInfo()->getStackSize(); - else + else { Offset += SlotSize; // Skip the saved EBP - + // Skip the RETADDR move area + X86MachineFunctionInfo *X86FI = MF.getInfo(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; + } + MI.getOperand(i+3).ChangeToImmediate(Offset); } void X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ + X86MachineFunctionInfo *X86FI = MF.getInfo(); + int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MF.getFrameInfo()-> + CreateFixedObject(-TailCallReturnAddrDelta, + (-1*SlotSize)+TailCallReturnAddrDelta); + } if (hasFP(MF)) { + assert((TailCallReturnAddrDelta <= 0) && + "The Delta should always be zero or negative"); // Create a frame entry for the EBP register that must be saved. int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, - (int)SlotSize * -2); + (int)SlotSize * -2+ + TailCallReturnAddrDelta); assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); } @@ -1530,6 +1554,41 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } } +/// mergeSPUpdates - Checks the instruction before/after the passed +/// instruction. If it is an ADD/SUB instruction it is deleted +/// argument and the stack adjustment is returned as a positive value for ADD +/// and a negative for SUB. +static int mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, + bool doMergeWithPrevious) { + + if ((doMergeWithPrevious && MBBI == MBB.begin()) || + (!doMergeWithPrevious && MBBI == MBB.end())) + return 0; + + int Offset = 0; + + MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr){ + Offset += PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + Offset -= PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } + + return Offset; +} + void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1543,10 +1602,23 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // Prepare for frame info. unsigned FrameLabelId = 0; - // Get the number of bytes to allocate from the FrameInfo + // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); + // Add RETADDR move area to callee saved frame size. + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + X86FI->setCalleeSavedFrameSize( + X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); uint64_t NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + if (TailCallReturnAddrDelta < 0) { + BuildMI(MBB, MBBI, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); + } + if (hasFP(MF)) { // Get the offset of the stack slot for the EBP register... which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. @@ -1615,6 +1687,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { MBB.insert(MBBI, MI); } } else { + // If there is an SUB32ri of ESP immediately before this instruction, + // merge the two. This can be the case when tail call elimination is + // enabled and the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); @@ -1711,6 +1787,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, switch (RetOpcode) { case X86::RET: case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNri64: + case X86::TCRETURNdi64: case X86::EH_RETURN: case X86::TAILJMPd: case X86::TAILJMPr: @@ -1773,7 +1853,46 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, MachineOperand &DestAddr = MBBI->getOperand(0); assert(DestAddr.isRegister() && "Offset should be in register!"); BuildMI(MBB, MBBI, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),StackPtr). - addReg(DestAddr.getReg()); + addReg(DestAddr.getReg()); + // Tail call return: adjust the stack pointer and jump to callee + } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || + RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert( StackAdjust.isImmediate() && "Expecting immediate value."); + + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + if (Offset) { + // Check for possible merge with preceeding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); + } + // Jump to label or value in register. + if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) + BuildMI(MBB, MBBI, TII.get(X86::TAILJMPd)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (RetOpcode== X86::TCRETURNri64) { + BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + } else + BuildMI(MBB, MBBI, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && + (X86FI->getTCReturnAddrDelta() < 0)) { + // Add the return addr area delta back since we are not tail calling. + int delta = -1*X86FI->getTCReturnAddrDelta(); + MBBI = prior(MBB.end()); + // Check for possible merge with preceeding ADD instruction. + delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); } } diff --git a/test/CodeGen/X86/tailcall1.ll b/test/CodeGen/X86/tailcall1.ll new file mode 100644 index 00000000000..74687f584ab --- /dev/null +++ b/test/CodeGen/X86/tailcall1.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llc -tailcallopt | grep TAILCALL +define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +} diff --git a/test/CodeGen/X86/tailcallpic1.ll b/test/CodeGen/X86/tailcallpic1.ll new file mode 100644 index 00000000000..54074eb0ba2 --- /dev/null +++ b/test/CodeGen/X86/tailcallpic1.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL + +define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +} diff --git a/test/CodeGen/X86/tailcallpic2.ll b/test/CodeGen/X86/tailcallpic2.ll new file mode 100644 index 00000000000..60818e4f62c --- /dev/null +++ b/test/CodeGen/X86/tailcallpic2.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llc -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL + +define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +entry: + ret i32 %a3 +} + +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { +entry: + %tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; [#uses=1] + ret i32 %tmp11 +} -- 2.34.1