//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
X86ScalarSSEf32 = Subtarget->hasSSE1();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+ bool Fast = false;
RegInfo = TM.getRegisterInfo();
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, X86::GR64RegisterClass);
- setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
- setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom);
+ setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::MEMSET , MVT::Other, Custom);
setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
- // Use the default ISD::LOCATION expansion.
+ // Use the default ISD::LOCATION, ISD::DECLARE expansion.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
// FIXME - use subtarget debug flags
if (!Subtarget->isTargetDarwin() &&
setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAARG , MVT::Other, Expand);
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
- // Conversions to long double (in X87) go through memory.
- setConvertAction(MVT::f32, MVT::f80, Expand);
- setConvertAction(MVT::f64, MVT::f80, Expand);
-
- // Conversions from long double (in X87) go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // Floating truncations from f80 and extensions to f80 go through memory.
+ // If optimizing, we lie about this though and handle it in
+ // InstructionSelectPreprocess so that dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+ setConvertAction(MVT::f64, MVT::f80, Expand);
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
} else if (X86ScalarSSEf32) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
- // SSE->x87 conversions go through memory.
- setConvertAction(MVT::f32, MVT::f64, Expand);
- setConvertAction(MVT::f32, MVT::f80, Expand);
-
- // x87->SSE truncations need to go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f64, MVT::f32, Expand);
- // And x87->x87 truncations also.
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // SSE <-> X87 conversions go through memory. If optimizing, we lie about
+ // this though and handle it in InstructionSelectPreprocess so that
+ // dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f32, MVT::f64, Expand);
+ setConvertAction(MVT::f32, MVT::f80, Expand);
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ // And x87->x87 truncations also.
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- // Floating truncations need to go through memory.
- setConvertAction(MVT::f80, MVT::f32, Expand);
- setConvertAction(MVT::f64, MVT::f32, Expand);
- setConvertAction(MVT::f80, MVT::f64, Expand);
+ // Floating truncations go through memory. If optimizing, we lie about
+ // this though and handle it in InstructionSelectPreprocess so that
+ // dagcombine2 can hack on these.
+ if (Fast) {
+ setConvertAction(MVT::f80, MVT::f32, Expand);
+ setConvertAction(MVT::f64, MVT::f32, Expand);
+ setConvertAction(MVT::f80, MVT::f64, Expand);
+ }
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ {
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ APFloat TmpFlt(+0.0);
+ TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
+ addLegalFPImmediate(TmpFlt); // FLD0
+ TmpFlt.changeSign();
+ addLegalFPImmediate(TmpFlt); // FLD0/FCHS
+ APFloat TmpFlt2(+1.0);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
+ addLegalFPImmediate(TmpFlt2); // FLD1
+ TmpFlt2.changeSign();
+ addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
+ }
+
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80 , Expand);
setOperationAction(ISD::FCOS , MVT::f80 , Expand);
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
}
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
allowUnalignedMemoryAccesses = true; // x86 supports it!
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+ if (MaxAlign == 16)
+ return;
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (VTy->getBitWidth() == 128)
+ MaxAlign = 16;
+ } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == 16)
+ break;
+ }
+ }
+ return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+ if (Subtarget->is64Bit())
+ return getTargetData()->getABITypeAlignment(Ty);
+ unsigned Align = 4;
+ if (Subtarget->hasSSE1())
+ getMaxByValAlign(Ty, Align);
+ return Align;
+}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
/// exists skip possible ISD:TokenFactor.
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
- if (Chain.getOpcode()==X86ISD::TAILCALL) {
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
return Chain;
- } else if (Chain.getOpcode()==ISD::TokenFactor) {
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
if (Chain.getNumOperands() &&
- Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL)
+ Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
return Chain.getOperand(0);
}
return Chain;
}
-
+
/// LowerRET - Lower an ISD::RET node.
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
- if (DAG.getMachineFunction().liveout_empty()) {
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
SDOperand Chain = Op.getOperand(0);
SDOperand TailCall = Chain;
SDOperand TargetAddress = TailCall.getOperand(1);
SDOperand StackAdjustment = TailCall.getOperand(2);
- assert ( ((TargetAddress.getOpcode() == ISD::Register &&
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting an global address, external symbol, or register");
- assert( StackAdjustment.getOpcode() == ISD::Constant &&
- "Expecting a const value");
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
SmallVector<SDOperand,8> Operands;
Operands.push_back(Chain.getOperand(0));
// a register.
SDOperand Value = Op.getOperand(1);
- // If this is an FP return with ScalarSSE, we need to move the value from
- // an XMM register onto the fp-stack.
- if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
- (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
- SDOperand MemLoc;
-
- // If this is a load into a scalarsse value, don't store the loaded value
- // back to the stack, only to reload it: just replace the scalar-sse load.
- if (ISD::isNON_EXTLoad(Value.Val) &&
- (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
- Chain = Value.getOperand(0);
- MemLoc = Value.getOperand(1);
- } else {
- // Spill the value to memory and reload it into top of stack.
- unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
- MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
- Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
- }
- SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
- SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
- Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
- Chain = Value.getValue(1);
- }
+ // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80.
+ // This will get legalized into a load/store if it can't get optimized away.
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
+ Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops[] = { Chain, Value };
CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
// Copies from the FP stack are special, as ST0 isn't a valid register
// before the fp stackifier runs.
- // Copy ST0 into an RFP register with FP_GET_RESULT.
- SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
+ // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up
+ // in an SSE register, copy it out as F80 and do a truncate, otherwise use
+ // the specified value type.
+ MVT::ValueType GetResultTy = RVLocs[0].getValVT();
+ if (isScalarFPTypeInSSEReg(GetResultTy))
+ GetResultTy = MVT::f80;
+ SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
+
SDOperand GROps[] = { Chain, InFlag };
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
Chain = RetVal.getValue(1);
InFlag = RetVal.getValue(2);
+
+ // If we want the result in an SSE register, use an FP_TRUNCATE to get it
+ // there.
+ if (GetResultTy != RVLocs[0].getValVT())
+ RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
- // If we are using ScalarSSE, store ST(0) to the stack and reload it into
- // an XMM register.
- if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
- (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
- // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
- // shouldn't be necessary except that RFP cannot be live across
- // multiple blocks. When stackifier is fixed, they can be uncoupled.
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
- SDOperand Ops[] = {
- Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
- };
- Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
- RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
- Chain = RetVal.getValue(1);
- }
ResultVals.push_back(RetVal);
}
&ResultVals[0], ResultVals.size()).Val;
}
+/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64
+/// ISD::CALL where the results are known to be in two 64-bit registers,
+/// e.g. XMM0 and XMM1. This simplify store the two values back to the
+/// fixed stack slot allocated for StructRet.
+SDNode *X86TargetLowering::
+LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag,
+ SDNode *TheCall, unsigned Reg1, unsigned Reg2,
+ MVT::ValueType VT, SelectionDAG &DAG) {
+ SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag);
+ Chain = RetVal1.getValue(1);
+ InFlag = RetVal1.getValue(2);
+ SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag);
+ Chain = RetVal2.getValue(1);
+ InFlag = RetVal2.getValue(2);
+ SDOperand FIN = TheCall->getOperand(5);
+ Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
+ Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0);
+ return Chain.Val;
+}
+
+/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL
+/// where the results are known to be in ST0 and ST1.
+SDNode *X86TargetLowering::
+LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
+ SDNode *TheCall, SelectionDAG &DAG) {
+ SmallVector<SDOperand, 8> ResultVals;
+ const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
+ SDVTList Tys = DAG.getVTList(VTs, 4);
+ SDOperand Ops[] = { Chain, InFlag };
+ SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
+ Chain = RetVal.getValue(2);
+ SDOperand FIN = TheCall->getOperand(5);
+ Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16));
+ Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0);
+ return Chain.Val;
+}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
- MF.addLiveIn(PReg, VReg);
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
}
-// align stack arguments according to platform alignment needed for tail calls
-unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG);
+// Determines whether a CALL node uses struct return semantics.
+static bool CallIsStructReturn(SDOperand Op) {
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+ if (!NumOps)
+ return false;
+
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
+}
+
+// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics.
+static bool ArgsAreStructReturn(SDOperand Op) {
+ unsigned NumArgs = Op.Val->getNumValues() - 1;
+ if (!NumArgs)
+ return false;
+
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
+}
+
+// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop
+// its own arguments. Callee pop is necessary to support tail calls.
+bool X86TargetLowering::IsCalleePop(SDOperand Op) {
+ bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (IsVarArg)
+ return false;
+
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::X86_FastCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::Fast:
+ return PerformTailCallOpt;
+ }
+}
+
+// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node.
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+
+ if (Subtarget->is64Bit())
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_32_TailCall;
+ else
+ return CC_X86_32_C;
+}
+
+// Selects the appropriate decoration to apply to a MachineFunction containing a
+// given FORMAL_ARGUMENTS node.
+NameDecorationStyle
+X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (CC == CallingConv::X86_FastCall)
+ return FastCall;
+ else if (CC == CallingConv::X86_StdCall)
+ return StdCall;
+ return None;
+}
+
+
+// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly
+// be overwritten when lowering the outgoing arguments in a tail call. Currently
+// the implementation of this call is very conservative and assumes all
+// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
+// registers would be overwritten by direct lowering.
+// Possible improvement:
+// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
+// indicating inreg passed arguments which also need not be lowered to a safe
+// stack slot.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
+ OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
+ return true;
+ return false;
+}
+
+// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+// by "Src" to address "Dst" with size and alignment information specified by
+// the specific parameter attribute. The copy will be passed as a byval function
+// parameter.
+static SDOperand
+CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
+ unsigned Flags, SelectionDAG &DAG) {
+ unsigned Align = 1 <<
+ ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs);
+ unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
+ ISD::ParamFlags::ByValSizeOffs;
+ SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
+ SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
SDOperand Root, unsigned i) {
// Create the nodes corresponding to a load from this parameter slot.
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
+ bool isByVal = Flags & ISD::ParamFlags::ByVal;
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This
+ // can be changed with more analysis.
int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset());
+ VA.getLocMemOffset(), !isByVal);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
-
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
-
- if (Flags & ISD::ParamFlags::ByVal)
+ if (isByVal)
return FIN;
- else
- return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
+ return DAG.getLoad(VA.getValVT(), Root, FIN,
+ PseudoSourceValue::getFixedStack(), FI);
}
-SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
- bool isStdCall) {
- unsigned NumArgs = Op.Val->getNumValues() - 1;
+SDOperand
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ // Decorate the function name.
+ FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
unsigned CC = MF.getFunction()->getCallingConv();
+ bool Is64Bit = Subtarget->is64Bit();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
- // Check for possible tail call calling convention.
- if (CC == CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall);
- else
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op));
SmallVector<SDOperand, 8> ArgValues;
unsigned LastVal = ~0U;
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
+ else if (Is64Bit && RegVT == MVT::i64)
+ RC = X86::GR64RegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = X86::FR32RegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = X86::FR64RegisterClass;
else {
assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
+ if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) {
+ RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
+ RegVT = MVT::i64;
+ } else
+ RC = X86::VR128RegisterClass;
}
-
+
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
if (VA.getLocInfo() != CCValAssign::Full)
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
+ // Handle MMX values passed in GPRs.
+ if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
+ MVT::getSizeInBits(RegVT) == 64)
+ ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
+
ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
+
unsigned StackSize = CCInfo.getNextStackOffset();
// align stack specially for tail calls
- if (CC==CallingConv::Fast)
- StackSize = GetAlignedArgumentStackSize(StackSize,DAG);
-
- ArgValues.push_back(Root);
+ if (CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg)
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ if (isVarArg) {
+ if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ }
+ if (Is64Bit) {
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDOperand, 8> MemOps;
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsGPOffset));
+ for (; NumIntRegs != 6; ++NumIntRegs) {
+ unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ SDOperand Store =
+ DAG.getStore(Val.getValue(1), Val, FIN,
+ PseudoSourceValue::getFixedStack(),
+ RegSaveFrameIndex);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(8));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsFPOffset));
+ for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
+ SDOperand Store =
+ DAG.getStore(Val.getValue(1), Val, FIN,
+ PseudoSourceValue::getFixedStack(),
+ RegSaveFrameIndex);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(16));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+ }
+
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are
+ // aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (StackSize & 7) == 0)
+ StackSize += 4;
- // Tail call calling convention (CallingConv::Fast) does not support varargs.
- assert( !(isVarArg && CC == CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
+ ArgValues.push_back(Root);
- if (isStdCall && !isVarArg &&
- (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) {
- BytesToPopOnReturn = StackSize; // Callee pops everything..
+ // Some CCs need callee pop.
+ if (IsCalleePop(Op)) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
-
// If this is an sret function, the return should pop the hidden pointer.
- if (NumArgs &&
- (cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
- ISD::ParamFlags::StructReturn))
+ if (!Is64Bit && ArgsAreStructReturn(Op))
BytesToPopOnReturn = 4;
-
BytesCallerReserves = StackSize;
}
-
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ if (!Is64Bit) {
+ RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
+ if (CC == CallingConv::X86_FastCall)
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ }
+
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
}
-SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
- unsigned NumOps = (Op.getNumOperands() - 5) / 2;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if(CC==CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
- NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+SDOperand
+X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
+ const SDOperand &StackPtr,
+ const CCValAssign &VA,
+ SDOperand Chain,
+ SDOperand Arg) {
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDOperand PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ if (Flags & ISD::ParamFlags::ByVal) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ }
+ return DAG.getStore(Chain, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64
+/// struct return call to the specified function. X86-64 ABI specifies
+/// some SRet calls are actually returned in registers. Since current
+/// LLVM cannot represent multi-value calls, they are represent as
+/// calls where the results are passed in a hidden struct provided by
+/// the caller. This function examines the type of the struct to
+/// determine the correct way to implement the call.
+X86::X86_64SRet
+X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) {
+ // FIXME: Disabled for now.
+ return X86::InMemory;
+
+ const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType());
+ const Type *RTy = PTy->getElementType();
+ unsigned Size = getTargetData()->getABITypeSize(RTy);
+ if (Size != 16 && Size != 32)
+ return X86::InMemory;
+
+ if (Size == 32) {
+ const StructType *STy = dyn_cast<StructType>(RTy);
+ if (!STy) return X86::InMemory;
+ if (STy->getNumElements() == 2 &&
+ STy->getElementType(0) == Type::X86_FP80Ty &&
+ STy->getElementType(1) == Type::X86_FP80Ty)
+ return X86::InX87;
+ }
+
+ bool AllFP = true;
+ for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end();
+ I != E; ++I) {
+ const Type *STy = I->get();
+ if (!STy->isFPOrFPVector()) {
+ AllFP = false;
break;
}
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- // If the first argument is an sret pointer, remember it.
- bool isSRet = NumOps &&
- (cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
- ISD::ParamFlags::StructReturn);
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- if (InFlag.Val)
- Ops.push_back(InFlag);
-
- Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPush = 0;
-
- if (CC == CallingConv::X86_StdCall ||
- (CC == CallingConv::Fast && PerformTailCallOpt)) {
- if (isVarArg)
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
- else
- NumBytesForCalleeToPush = NumBytes;
- assert(!(isVarArg && CC==CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
- } else {
- // If this is is a call to a struct-return function, the callee
- // pops the hidden struct pointer, so we have to push it back.
- // This is common for Darwin/X86, Linux & Mingw32 targets.
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
}
- Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getConstant(NumBytes, getPointerTy()),
- DAG.getConstant(NumBytesForCalleeToPush,
- getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
-}
-
-
-//===----------------------------------------------------------------------===//
-// FastCall Calling Convention implementation
-//===----------------------------------------------------------------------===//
-//
-// The X86 'fastcall' calling convention passes up to two integer arguments in
-// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
-// and requires that the callee pop its arguments off the stack (allowing proper
-// tail calls), and has the same return value conventions as C calling convs.
-//
-// This calling convention always arranges for the callee pop value to be 8n+4
-// bytes, which is needed for tail recursion elimination and stack alignment
-// reasons.
-SDOperand
-X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
- }
- }
-
- ArgValues.push_back(Root);
-
- unsigned StackSize = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are
- // aligned.
- if ((StackSize & 7) == 0)
- StackSize += 4;
- }
-
- VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
- BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
- BytesCallerReserves = 0;
-
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
-}
-
-SDOperand
-X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
- const SDOperand &StackPtr,
- const CCValAssign &VA,
- SDOperand Chain,
- SDOperand Arg) {
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
- if (Flags & ISD::ParamFlags::ByVal) {
- unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
- ISD::ParamFlags::ByValAlignOffs);
-
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
-
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
-
- return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
- AlwaysInline);
- } else {
- return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
- }
-}
-
-SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are
- // aligned.
- if ((NumBytes & 7) == 0)
- NumBytes += 4;
- }
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- if (InFlag.Val)
- Ops.push_back(InFlag);
-
- assert(isTailCall==false && "no tail call here");
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
-}
-
-//===----------------------------------------------------------------------===//
-// Fast Calling Convention (tail call) implementation
-//===----------------------------------------------------------------------===//
-
-// Like std call, callee cleans arguments, convention except that ECX is
-// reserved for storing the tail called function address. Only 2 registers are
-// free for argument passing (inreg). Tail call optimization is performed
-// provided:
-// * tailcallopt is enabled
-// * caller/callee are fastcc
-// * elf/pic is disabled OR
-// * elf/pic enabled + callee is in module + callee has
-// visibility protected or hidden
-// To keep the stack aligned according to platform abi the function
-// GetAlignedArgumentStackSize ensures that argument delta is always multiples
-// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
-// If a tail called function callee has more arguments than the caller the
-// caller needs to make sure that there is room to move the RETADDR to. This is
-// achieved by reserving an area the size of the argument delta right after the
-// original REtADDR, but before the saved framepointer or the spilled registers
-// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
-// stack layout:
-// arg1
-// arg2
-// RETADDR
-// [ new RETADDR
-// move area ]
-// (possible EBP)
-// ESI
-// EDI
-// local1 ..
-
-/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
-/// for a 16 byte align requirement.
-unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
- SelectionDAG& DAG) {
- if (PerformTailCallOpt) {
- MachineFunction &MF = DAG.getMachineFunction();
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
- unsigned StackAlignment = TFI.getStackAlignment();
- uint64_t AlignMask = StackAlignment - 1;
- int64_t Offset = StackSize;
- unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
- if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
- // Number smaller than 12 so just add the difference.
- Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
- } else {
- // Mask out lower bits, add stackalignment once plus the 12 bytes.
- Offset = ((~AlignMask) & Offset) + StackAlignment +
- (StackAlignment-SlotSize);
- }
- StackSize = Offset;
- }
- return StackSize;
-}
-
-/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-/// following the call is a return. A function is eligible if caller/callee
-/// calling conventions match, currently only fastcc supports tail calls, and
-/// the function CALL is immediatly followed by a RET.
-bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
- SDOperand Ret,
- SelectionDAG& DAG) const {
- if (!PerformTailCallOpt)
- return false;
-
- // Check whether CALL node immediatly preceeds the RET node and whether the
- // return uses the result of the node or is a void return.
- unsigned NumOps = Ret.getNumOperands();
- if ((NumOps == 1 &&
- (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
- Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
- (NumOps > 1 &&
- Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
- Ret.getOperand(1) == SDOperand(Call.Val,0))) {
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned CallerCC = MF.getFunction()->getCallingConv();
- unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
- if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
- SDOperand Callee = Call.getOperand(4);
- // On elf/pic %ebx needs to be livein.
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
- !Subtarget->isPICStyleGOT())
- return true;
-
- // Can only do local tail calls with PIC.
- GlobalValue * GV = 0;
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if(G != 0 &&
- (GV = G->getGlobal()) &&
- (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
- return true;
- }
- }
-
- return false;
-}
-
-SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
- SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
- bool is64Bit = Subtarget->is64Bit();
-
- assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls.");
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if (is64Bit)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
-
-
- // Lower arguments at fp - stackoffset + fpdiff.
- MachineFunction &MF = DAG.getMachineFunction();
-
- unsigned NumBytesToBePushed =
- GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG);
-
- unsigned NumBytesCallerPushed =
- MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
- int FPDiff = NumBytesCallerPushed - NumBytesToBePushed;
-
- // Set the delta of movement of the returnaddr stackslot.
- // But only set if delta is greater than previous delta.
- if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
- MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
-
- Chain = DAG.
- getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
-
- // Adjust the Return address stack slot.
- SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
- if (FPDiff) {
- MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32;
- RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
- // Load the "old" Return address.
- RetAddrFrIdx =
- DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
- // Calculate the new stack slot for the return address.
- int SlotSize = is64Bit ? 8 : 4;
- int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
- Chain = SDOperand(RetAddrFrIdx.Val, 1);
- }
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
- SmallVector<SDOperand, 8> MemOpChains2;
- SDOperand FramePtr, StackPtr;
- SDOperand PtrOff;
- SDOperand FIN;
- int FI = 0;
-
- // Walk the register/memloc assignments, inserting copies/loads. Lower
- // arguments first to the stack slot where they would normally - in case of a
- // normal function call - be.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag = SDOperand();
-
- // Copy from stack slots to stack slot of a tail called function. This needs
- // to be done because if we would lower the arguments directly to their real
- // stack slot we might end up overwriting each other.
- // TODO: To make this more efficient (sometimes saving a store/load) we could
- // analyse the arguments and emit this store/load/store sequence only for
- // arguments which would be overwritten otherwise.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- if (!VA.isRegLoc()) {
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
-
- // Get source stack slot.
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- // Create frame index.
- int32_t Offset = VA.getLocMemOffset()+FPDiff;
- uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
- FIN = DAG.getFrameIndex(FI, MVT::i32);
- if (Flags & ISD::ParamFlags::ByVal) {
- // Copy relative to framepointer.
- unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
- ISD::ParamFlags::ByValAlignOffs);
-
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
-
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
-
- MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode,
- AlignNode,AlwaysInline));
- } else {
- SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0);
- // Store relative to framepointer.
- MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
- }
- }
- }
-
- if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains2[0], MemOpChains.size());
-
- // Store the return address to the appropriate stack slot.
- if (FPDiff)
- Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- // Does not work with tail call since ebx is not restored correctly by
- // tailcaller. TODO: at least for x86 - verify for x86-64
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
- else {
- assert(Callee.getOpcode() == ISD::LOAD &&
- "Function destination must be loaded into virtual register");
- unsigned Opc = is64Bit ? X86::R9 : X86::ECX;
-
- Chain = DAG.getCopyToReg(Chain,
- DAG.getRegister(Opc, getPointerTy()) ,
- Callee,InFlag);
- Callee = DAG.getRegister(Opc, getPointerTy());
- // Add register as live out.
- DAG.getMachineFunction().addLiveOut(Opc);
- }
-
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
-
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy()));
- Ops.push_back(DAG.getConstant(0, getPointerTy()));
- if (InFlag.Val)
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Returns a chain & a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
- if (InFlag.Val)
- Ops.push_back(InFlag);
- assert(InFlag.Val &&
- "Flag must be set. Depend on flag being set in LowerRET");
- Chain = DAG.getNode(X86ISD::TAILCALL,
- Op.Val->getVTList(), &Ops[0], Ops.size());
-
- return SDOperand(Chain.Val, Op.ResNo);
-}
-
-//===----------------------------------------------------------------------===//
-// X86-64 C Calling Convention implementation
-//===----------------------------------------------------------------------===//
-
-SDOperand
-X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- unsigned CC= MF.getFunction()->getCallingConv();
-
- static const unsigned GPR64ArgRegs[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- static const unsigned XMMArgRegs[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
-
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
- if (CC == CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else if (RegVT == MVT::i64)
- RC = X86::GR64RegisterClass;
- else if (RegVT == MVT::f32)
- RC = X86::FR32RegisterClass;
- else if (RegVT == MVT::f64)
- RC = X86::FR64RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- if (MVT::getSizeInBits(RegVT) == 64) {
- RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
- RegVT = MVT::i64;
- } else
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- // Handle MMX values passed in GPRs.
- if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
- MVT::getSizeInBits(RegVT) == 64)
- ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
- }
- }
-
- unsigned StackSize = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
- StackSize =GetAlignedArgumentStackSize(StackSize, DAG);
-
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg) {
- assert(CC!=CallingConv::Fast
- && "Var arg not supported with calling convention fastcc");
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- VarArgsGPOffset = NumIntRegs * 8;
- VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
- RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
-
- // Store the integer parameter registers.
- SmallVector<SDOperand, 8> MemOps;
- SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsGPOffset, getPointerTy()));
- for (; NumIntRegs != 6; ++NumIntRegs) {
- unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
- }
-
- // Now store the XMM (fp + vector) parameter registers.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsFPOffset, getPointerTy()));
- for (; NumXMMRegs != 8; ++NumXMMRegs) {
- unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
- }
- if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOps[0], MemOps.size());
- }
-
- ArgValues.push_back(Root);
- // Tail call convention (fastcc) needs callee pop.
- if (CC == CallingConv::Fast && PerformTailCallOpt) {
- BytesToPopOnReturn = StackSize; // Callee pops everything.
- BytesCallerReserves = 0;
- } else {
- BytesToPopOnReturn = 0; // Callee pops nothing.
- BytesCallerReserves = StackSize;
- }
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+ if (AllFP)
+ return X86::InSSE;
+ return X86::InGPR64;
}
-SDOperand
-X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
+void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall,
+ CCAssignFn *Fn,
+ CCState &CCInfo) {
+ unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
+ for (unsigned i = 1; i != NumOps; ++i) {
+ MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
+ SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
+ unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) {
+ cerr << "Call operand #" << i << " has unhandled type "
+ << MVT::getValueTypeString(ArgVT) << "\n";
+ abort();
+ }
+ }
+}
+
+SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
SDOperand Chain = Op.getOperand(0);
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0
+ && CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
-
+ bool Is64Bit = Subtarget->is64Bit();
+ bool IsStructRet = CallIsStructReturn(Op);
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if (CC==CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
-
+ CCAssignFn *CCFn = CCAssignFnForNode(Op);
+
+ X86::X86_64SRet SRetMethod = X86::InMemory;
+ if (Is64Bit && IsStructRet)
+ // FIXME: We can't figure out type of the sret structure for indirect
+ // calls. We need to copy more information from CallSite to the ISD::CALL
+ // node.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ SRetMethod =
+ ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal()));
+
+ // UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of
+ // registers. Unfortunately, llvm does not support i128 yet so we pretend it's
+ // a sret call.
+ if (SRetMethod != X86::InMemory)
+ X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo);
+ else
+ CCInfo.AnalyzeCallOperands(Op.Val, CCFn);
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (CC == CallingConv::Fast)
- NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG);
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (NumBytes & 7) == 0)
+ NumBytes += 4;
+
+ int FPDiff = 0;
+ if (IsTailCall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+ SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
+ if (IsTailCall) {
+ // Adjust the Return address stack slot.
+ if (FPDiff) {
+ MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
+ RetAddrFrIdx =
+ DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = SDOperand(RetAddrFrIdx.Val, 1);
+ }
+ }
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
+
+ // Walk the register/memloc assignments, inserting copies/loads. For tail
+ // calls, lower arguments which could otherwise be possibly overwritten to the
+ // stack slot where they would go on normal function calls.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
+ if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
+ Arg));
+ }
}
}
InFlag = Chain.getValue(1);
}
- if (isVarArg) {
- assert ( CallingConv::Fast != CC &&
- "Var args not supported with calling convention fastcc");
+ if (IsTailCall)
+ InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output?
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ // Does not work with tail call since ebx is not restored correctly by
+ // tailcaller. TODO: at least for x86 - verify for x86-64
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ Chain = DAG.getCopyToReg(Chain, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
InFlag = Chain.getValue(1);
}
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (IsTailCall) {
+ SmallVector<SDOperand, 8> MemOpChains2;
+ SDOperand FIN;
+ int FI = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc()) {
+ assert(VA.isMemLoc());
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
+ SDOperand Source = Arg;
+ if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ // Copy from stack slots to stack slot of a tail called function. This
+ // needs to be done because if we would lower the arguments directly
+ // to their real stack slot we might end up overwriting each other.
+ // Get source stack slot.
+ Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+ Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
+ if ((Flags & ISD::ParamFlags::ByVal)==0)
+ Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0);
+ }
+
+ if (Flags & ISD::ParamFlags::ByVal) {
+ // Copy relative to framepointer.
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ Flags, DAG));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(Chain, Source, FIN,
+ PseudoSourceValue::getFixedStack(), FI));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ if (FPDiff)
+ Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+ }
+
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
- if (getTargetMachine().getCodeModel() != CodeModel::Large
+ if ((IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (getTargetMachine().getCodeModel() != CodeModel::Large)
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ } else if (IsTailCall) {
+ assert(Callee.getOpcode() == ISD::LOAD &&
+ "Function destination must be loaded into virtual register");
+ unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
+ Chain = DAG.getCopyToReg(Chain,
+ DAG.getRegister(Opc, getPointerTy()) ,
+ Callee,InFlag);
+ Callee = DAG.getRegister(Opc, getPointerTy());
+ // Add register as live out.
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ }
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
+
+ if (IsTailCall) {
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getIntPtrConstant(NumBytes));
+ Ops.push_back(DAG.getIntPtrConstant(0));
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Returns a chain & a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ }
+
Ops.push_back(Chain);
Ops.push_back(Callee);
+ if (IsTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
-
+
if (InFlag.Val)
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
+ if (IsTailCall) {
+ assert(InFlag.Val &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(X86ISD::TAILCALL,
+ Op.Val->getVTList(), &Ops[0], Ops.size());
+
+ return SDOperand(Chain.Val, Op.ResNo);
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
- int NumBytesForCalleeToPush = 0;
- if (CC==CallingConv::Fast && PerformTailCallOpt) {
- NumBytesForCalleeToPush = NumBytes; // Callee pops everything
- } else {
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (IsCalleePop(Op))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && IsStructRet)
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
- }
+
// Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush),
+ InFlag);
InFlag = Chain.getValue(1);
-
+
// Handle result values, copying them out of physregs into vregs that we
// return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+ switch (SRetMethod) {
+ default:
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+ case X86::InGPR64:
+ return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
+ X86::RAX, X86::RDX,
+ MVT::i64, DAG), Op.ResNo);
+ case X86::InSSE:
+ return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
+ X86::XMM0, X86::XMM1,
+ MVT::f64, DAG), Op.ResNo);
+ case X86::InX87:
+ return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG),
+ Op.ResNo);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// * elf/pic is disabled OR
+// * elf/pic enabled + callee is in module + callee has
+// visibility protected or hidden
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) {
+ if (PerformTailCallOpt) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ StackSize = Offset;
+ }
+ return StackSize;
}
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
+ SDOperand Ret,
+ SelectionDAG& DAG) const {
+ if (!PerformTailCallOpt)
+ return false;
+
+ // Check whether CALL node immediatly preceeds the RET node and whether the
+ // return uses the result of the node or is a void return.
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0))) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ SDOperand Callee = Call.getOperand(4);
+ // On elf/pic %ebx needs to be livein.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT())
+ return true;
+
+ // Can only do local tail calls with PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
- DAG.getConstant(i/2, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i/2));
}
}
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
- DAG.getConstant(i, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i));
}
}
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
return SDOperand();
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
- N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
}
-
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
- unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 4> MaskVec;
- for (unsigned i = 0; i < 4; ++i)
- MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT));
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
- DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size()));
+ return SDOperand();
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ // If it's a debug information descriptor, don't mess with it.
+ if (DAG.isVerifiedDebugInfoDesc(Op))
+ return Result;
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
- Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
+ Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
+ PseudoSourceValue::getGOT(), 0);
return Result;
}
SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
- Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
+ Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
+ PseudoSourceValue::getGOT(), 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
- StackSlot, NULL, 0);
+ StackSlot,
+ PseudoSourceValue::getFixedStack(),
+ SSFI);
// These are really Legal; caller falls through into that case.
- if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
- return Result;
- if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Result;
- if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
+ if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
return Result;
// Build the FILD
SDVTList Tys;
- bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
- (X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
- Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
+ Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
+ PseudoSourceValue::getFixedStack(), SSFI);
}
return Result;
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
- X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
- return std::make_pair(SDOperand(), SDOperand());
- if (Op.getValueType() == MVT::i32 &&
- X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDOperand(), SDOperand());
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
- if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
- (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
+ if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
+ Chain = DAG.getStore(Chain, Value, StackSlot,
+ PseudoSourceValue::getFixedStack(), SSFI);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
if (MVT::isVector(VT)) {
return DAG.getNode(ISD::BIT_CONVERT, VT,
}
// And if it is bigger, shrink it first.
if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
- Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1);
+ Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
SrcTy = MVT::getTypeForValueType(SrcVT);
}
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
// Clear first operand sign bit.
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT::ValueType VT = Op.getValueType();
+
bool IllegalFPCMov = false;
- if (VT == MVT::f32 && !X86ScalarSSEf32)
- IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
- else if (VT == MVT::f64 && !X86ScalarSSEf64)
- IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
- else if (VT == MVT::f80)
+ if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
Chain, Op.getOperand(2), CC, Cond);
}
-SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
- unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
-
- if (Subtarget->is64Bit())
- if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
- return LowerX86_TailCallTo(Op, DAG, CallingConv);
- else
- return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
- else
- switch (CallingConv) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- if (isTailCall && PerformTailCallOpt)
- return LowerX86_TailCallTo(Op, DAG, CallingConv);
- else
- return LowerCCCCallTo(Op,DAG, CallingConv);
- case CallingConv::C:
- case CallingConv::X86_StdCall:
- return LowerCCCCallTo(Op, DAG, CallingConv);
- case CallingConv::X86_FastCall:
- return LowerFastCCCallTo(Op, DAG, CallingConv);
- }
-}
-
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
SDOperand Flag;
MVT::ValueType IntPtr = getPointerTy();
- MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand
-X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- const Function* Fn = MF.getFunction();
- if (Fn->hasExternalLinkage() &&
- Subtarget->isTargetCygMing() &&
- Fn->getName() == "main")
- MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
-
- unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (Subtarget->is64Bit())
- return LowerX86_64CCCArguments(Op, DAG);
- else
- switch(CC) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- return LowerCCCArguments(Op,DAG, true);
- // Falls through
- case CallingConv::C:
- return LowerCCCArguments(Op, DAG);
- case CallingConv::X86_StdCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
- return LowerCCCArguments(Op, DAG, true);
- case CallingConv::X86_FastCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
- return LowerFastCCArguments(Op, DAG);
- }
-}
-
SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
SDOperand InFlag(0, 0);
SDOperand Chain = Op.getOperand(0);
if (AVT > MVT::i8) {
if (I) {
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
BytesLeft = I->getValue() % UBytes;
} else {
assert(AVT >= MVT::i32 &&
}
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
+ SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
BytesLeft = Size % UBytes;
SDOperand InFlag(0, 0);
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
- SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
if (!Subtarget->is64Bit()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
- SV->getOffset());
+ return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0);
}
// __va_list_tag:
// Store gp_offset
SDOperand Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsGPOffset, MVT::i32),
- FIN, SV->getValue(), SV->getOffset());
+ FIN, SV, 0);
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsFPOffset, MVT::i32),
- FIN, SV->getValue(), SV->getOffset());
+ FIN, SV, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
- SV->getOffset());
+ Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
- SV->getOffset());
+ Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
}
SDOperand Chain = Op.getOperand(0);
SDOperand DstPtr = Op.getOperand(1);
SDOperand SrcPtr = Op.getOperand(2);
- SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
- SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
- SrcSV->getValue(), SrcSV->getOffset());
+ SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, SrcSV, 0);
Chain = SrcPtr.getValue(1);
for (unsigned i = 0; i < 3; ++i) {
- SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
- SrcSV->getValue(), SrcSV->getOffset());
+ SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, SrcSV, 0);
Chain = Val.getValue(1);
- Chain = DAG.getStore(Chain, Val, DstPtr,
- DstSV->getValue(), DstSV->getOffset());
+ Chain = DAG.getStore(Chain, Val, DstPtr, DstSV, 0);
if (i == 2)
break;
SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
}
return Chain;
}
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
- DAG.getConstant(4, getPointerTy()));
+ DAG.getIntPtrConstant(4));
}
SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
if (Subtarget->is64Bit())
return SDOperand();
- return DAG.getConstant(8, getPointerTy());
+ return DAG.getIntPtrConstant(8);
}
SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
getPointerTy());
SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
- DAG.getConstant(-4UL, getPointerTy()));
+ DAG.getIntPtrConstant(-4UL));
StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
- MF.addLiveOut(X86::ECX);
+ MF.getRegInfo().addLiveOut(X86::ECX);
return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
Chain, DAG.getRegister(X86::ECX, getPointerTy()));
SDOperand FPtr = Op.getOperand(2); // nested function
SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
- SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
if (Subtarget->is64Bit()) {
- return SDOperand(); // not yet supported
+ SDOperand OutChains[6];
+
+ // Large code-model.
+
+ const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
+ const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+
+ const unsigned char N86R10 =
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
+ const unsigned char N86R11 =
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDOperand Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpAddr, 0);
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpAddr, 10);
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpAddr, 20);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr,
+ TrmpAddr, 22);
+
+ SDOperand Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) };
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
} else {
- Function *Func = (Function *)
+ const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
unsigned CC = Func->getCallingConv();
unsigned NestReg;
break;
}
- const X86InstrInfo *TII =
- ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
-
SDOperand OutChains[4];
SDOperand Addr, Disp;
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
- unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- unsigned char N86Reg = ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
+ const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+ const unsigned char N86Reg =
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
+ Trmp, TrmpAddr, 0);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 1, false, 1);
+ OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1);
- unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+ const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpSV->getValue() + 5, TrmpSV->getOffset());
+ TrmpAddr, 5, false, 1);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 6, false, 1);
+ OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1);
SDOperand Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
}
}
-SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
+SDOperand X86TargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) {
/*
The rounding mode is in bits 11:10 of FPSR, and has the following
settings:
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
- case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
+ case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2";
case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
//===----------------------------------------------------------------------===//
MachineBasicBlock *
-X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
- MachineBasicBlock *BB) {
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
// Load the old value of the high byte of the control word...
unsigned OldCW =
- F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
- AM.Base.FrameIndex = Op.getFrameIndex();
+ AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
const X86Subtarget *Subtarget) {
GlobalValue *GV;
- int64_t Offset;
+ int64_t Offset = 0;
if (isGAPlusOffset(Base, GV, Offset))
return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
- else {
- assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
- int BFI = cast<FrameIndexSDNode>(Base)->getIndex();
- if (BFI < 0)
- // Fixed objects do not specify alignment, however the offsets are known.
- return ((Subtarget->getStackAlignment() % 16) == 0 &&
- (MFI->getObjectOffset(BFI) % 16) == 0);
- else
- return MFI->getObjectAlignment(BFI) >= 16;
- }
+ // DAG combine handles the stack object case.
return false;
}
return SDOperand();
}
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+ // F[X]OR(0.0, x) -> x
+ // F[X]OR(x, 0.0) -> x
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ return SDOperand();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDOperand PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // FAND(0.0, x) -> 0.0
+ // FAND(x, 0.0) -> 0.0
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(0);
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+ if (C->getValueAPF().isPosZero())
+ return N->getOperand(1);
+ return SDOperand();
+}
+
SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE:
- return PerformShuffleCombine(N, DAG, Subtarget);
- case ISD::SELECT:
- return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
+ case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case X86ISD::FXOR:
+ case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG);
}
return SDOperand();
return TargetLowering::getConstraintType(Constraint);
}
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+void X86TargetLowering::lowerXConstraint(MVT::ValueType ConstraintVT,
+ std::string& s) const {
+ if (MVT::isFloatingPoint(ConstraintVT)) {
+ if (Subtarget->hasSSE2())
+ s = "Y";
+ else if (Subtarget->hasSSE1())
+ s = "x";
+ else
+ s = "f";
+ } else
+ return TargetLowering::lowerXConstraint(ConstraintVT, s);
+}
+
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDOperand Op,