Pass callsite return type to TargetLowering::LowerCall and use that to check sibcall...

[oota-llvm.git] / lib / Target / X86 / X86ISelLowering.cpp
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index eb7f4815915292bfbe8e4330f28920f766a9f3af..228244f06bfd9f5d5bace6ead7d67168da3d15d9 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1778,7 +1778,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
  }
  
  SDValue
-X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const Type *RetTy,
                               CallingConv::ID CallConv, bool isVarArg,
                               bool &isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -1791,8 +1791,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
  
    if (isTailCall)
      // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
-                                                   Outs, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(Callee, RetTy, CallConv,
+                                                   isVarArg, Outs, Ins, DAG);
  
    assert(!(isVarArg && CallConv == CallingConv::Fast) &&
           "Var args not supported with calling convention fastcc");
@@ -1807,6 +1807,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    unsigned NumBytes = CCInfo.getNextStackOffset();
    if (FuncIsMadeTailCallSafe(CallConv))
      NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+  else if (isTailCall && !PerformTailCallOpt)
+    // This is a sibcall. The memory operands are available in caller's
+    // own caller's stack.
+    NumBytes = 0;
  
    int FPDiff = 0;
    if (isTailCall) {
@@ -1976,9 +1980,11 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      int FI = 0;
      // Do not flag preceeding copytoreg stuff together with the following stuff.
      InFlag = SDValue();
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-      CCValAssign &VA = ArgLocs[i];
-      if (!VA.isRegLoc()) {
+    if (PerformTailCallOpt) {
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (VA.isRegLoc())
+          continue;
          assert(VA.isMemLoc());
          SDValue Arg = Outs[i].Val;
          ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2241,32 +2247,89 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
  /// optimization should implement this function.
  bool
  X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     const Type *RetTy,
                                                       CallingConv::ID CalleeCC,
                                                       bool isVarArg,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                                       SelectionDAG& DAG) const {
-  // If -tailcallopt is specified, make fastcc functions tail-callable.
-  const Function *F = DAG.getMachineFunction().getFunction();
-  if (PerformTailCallOpt &&
-      CalleeCC == CallingConv::Fast && F->getCallingConv() == CalleeCC)
-    return true;
-
    if (CalleeCC != CallingConv::Fast &&
        CalleeCC != CallingConv::C)
      return false;
  
-  // Look for obvious safe cases to perform tail call optimization.
-  // For now, only consider callees which take no arguments and no return
-  // values.
-  if (!Outs.empty())
+  // If -tailcallopt is specified, make fastcc functions tail-callable.
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  if (PerformTailCallOpt) {
+    if (CalleeCC == CallingConv::Fast &&
+        CallerF->getCallingConv() == CalleeCC)
+      return true;
      return false;
+  }
  
-  if (Ins.empty())
-    // If the caller does not return a value, then this is obviously safe.
-    return F->getReturnType()->isVoidTy();
  
-  return false;
+  // Look for obvious safe cases to perform tail call optimization that does not
+  // requite ABI changes. This is what gcc calls sibcall.
+
+  // Do not tail call optimize vararg calls for now.
+  if (isVarArg)
+    return false;
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+                   ArgLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+        return false;
+      if (Subtarget->isTargetWin64())
+        // Win64 ABI has additional complications.
+        return false;
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        EVT RegVT = VA.getLocVT();
+        SDValue Arg = Outs[i].Val;
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        if (Flags.isByVal())
+          return false; // TODO
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (!VA.isRegLoc()) {
+          LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
+          if (!Ld)
+            return false;
+          SDValue Ptr = Ld->getBasePtr();
+          FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+          if (!FINode)
+            return false;
+          int FI = FINode->getIndex();
+          if (!MFI->isFixedObjectIndex(FI))
+            return false;
+          if (VA.getLocMemOffset() != MFI->getObjectOffset(FI))
+            return false;
+        }
+      }
+    }
+  }
+
+  // If the caller does not return a value, then this is obviously safe.
+  // This is one case where it's safe to perform this optimization even
+  // if the return types do not match.
+  const Type *CallerRetTy = CallerF->getReturnType();
+  if (CallerRetTy->isVoidTy())
+    return true;
+
+  // If the return types match, then it's safe.
+  return CallerRetTy == RetTy;
  }
  
  FastISel *
@@ -6044,7 +6107,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
            N2C && N2C->isNullValue() &&
            RHSC && RHSC->isNullValue()) {
          SDValue CmpOp0 = Cmp.getOperand(0);
-        Cmp = DAG.getNode(X86ISD::CMP, dl, Op.getValueType(),
+        Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(),
                            CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
          return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
                             DAG.getConstant(X86::COND_B, MVT::i8), Cmp);