#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ParameterAttributes.h"
using namespace llvm;
X86TargetLowering::X86TargetLowering(TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
- setSetCCResultType(MVT::i8);
setSetCCResultContents(ZeroOrOneSetCCResult);
setSchedulingPreference(SchedulingForRegPressure);
setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
}
- // Scalar integer multiply, multiply-high, divide, and remainder are
- // lowered to use operations that produce two results, to match the
- // available instructions. This exposes the two-result form to trivial
- // CSE, which is able to combine x/y and x%y into a single instruction,
- // for example. The single-result multiply instructions are introduced
- // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part
- // is not needed.
- setOperationAction(ISD::MUL , MVT::i8 , Expand);
+ // Scalar integer divide and remainder are lowered to use operations that
+ // produce two results, to match the available instructions. This exposes
+ // the two-result form to trivial CSE, which is able to combine x/y and x%y
+ // into a single instruction.
+ //
+ // Scalar integer multiply-high is also lowered to use two-result
+ // operations, to match the available instructions. However, plain multiply
+ // (low) operations are left as Legal, as there are single-result
+ // instructions for this in x86. Using the two-result multiply instructions
+ // when both high and low results are needed must be arranged by dagcombine.
setOperationAction(ISD::MULHS , MVT::i8 , Expand);
setOperationAction(ISD::MULHU , MVT::i8 , Expand);
setOperationAction(ISD::SDIV , MVT::i8 , Expand);
setOperationAction(ISD::UDIV , MVT::i8 , Expand);
setOperationAction(ISD::SREM , MVT::i8 , Expand);
setOperationAction(ISD::UREM , MVT::i8 , Expand);
- setOperationAction(ISD::MUL , MVT::i16 , Expand);
setOperationAction(ISD::MULHS , MVT::i16 , Expand);
setOperationAction(ISD::MULHU , MVT::i16 , Expand);
setOperationAction(ISD::SDIV , MVT::i16 , Expand);
setOperationAction(ISD::UDIV , MVT::i16 , Expand);
setOperationAction(ISD::SREM , MVT::i16 , Expand);
setOperationAction(ISD::UREM , MVT::i16 , Expand);
- setOperationAction(ISD::MUL , MVT::i32 , Expand);
setOperationAction(ISD::MULHS , MVT::i32 , Expand);
setOperationAction(ISD::MULHU , MVT::i32 , Expand);
setOperationAction(ISD::SDIV , MVT::i32 , Expand);
setOperationAction(ISD::UDIV , MVT::i32 , Expand);
setOperationAction(ISD::SREM , MVT::i32 , Expand);
setOperationAction(ISD::UREM , MVT::i32 , Expand);
- setOperationAction(ISD::MUL , MVT::i64 , Expand);
setOperationAction(ISD::MULHS , MVT::i64 , Expand);
setOperationAction(ISD::MULHU , MVT::i64 , Expand);
setOperationAction(ISD::SDIV , MVT::i64 , Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
+ setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
- // X86 wants to expand memset / memcpy itself.
- setOperationAction(ISD::MEMSET , MVT::Other, Custom);
- setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
+ }
+
+ if (Subtarget->hasSSE1())
+ setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+
+ if (!Subtarget->hasSSE2())
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom);
// Use the default ISD::LOCATION, ISD::DECLARE expansion.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(APFloat(+0.0)); // xorpd
addLegalFPImmediate(APFloat(+0.0f)); // xorps
// We don't support sin/cos/fmod
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
- // Expand FP immediates into loads from the stack, except for the special
- // cases we handle.
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ // Special cases we handle for FP constants.
addLegalFPImmediate(APFloat(+0.0f)); // xorps
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
-
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(APFloat(+0.0)); // FLD0
addLegalFPImmediate(APFloat(+1.0)); // FLD1
addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
- setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
APFloat TmpFlt(+0.0);
TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
addLegalFPImmediate(TmpFlt); // FLD0
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
- // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ }
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
}
+
+ if (Subtarget->hasSSE41()) {
+ // FIXME: Do we need to handle scalar-to-vector here?
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+
+ // i8 and i16 vectors are custom , because the source register and source
+ // source memory operand types are not the same width. f32 vectors are
+ // custom since the immediate controlling the insert encodes additional
+ // information.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::STORE);
computeRegisterProperties();
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
allowUnalignedMemoryAccesses = true; // x86 supports it!
+ setPrefLoopAlignment(16);
+}
+
+
+MVT::ValueType
+X86TargetLowering::getSetCCResultType(const SDOperand &) const {
+ return MVT::i8;
}
+
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
- else if (VTy->getBitWidth() == 64)
- if (MaxAlign < 8)
- MaxAlign = 8;
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
-/// that contains are placed at 16-byte boundaries while the rest are at
-/// 4-byte boundaries.
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
if (Subtarget->is64Bit())
return getTargetData()->getABITypeAlignment(Ty);
unsigned Align = 4;
- getMaxByValAlign(Ty, Align);
+ if (Subtarget->hasSSE1())
+ getMaxByValAlign(Ty, Align);
return Align;
}
// Regular return.
SDOperand Flag;
+ SmallVector<SDOperand, 6> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Operand #1 = Bytes To Pop
+ RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
+
// Copy the result values into the output registers.
- if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
- RVLocs[0].getLocReg() != X86::ST0) {
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
- Flag);
- Flag = Chain.getValue(1);
- }
- } else {
- // We need to handle a destination of ST0 specially, because it isn't really
- // a register.
- SDOperand Value = Op.getOperand(1);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDOperand ValToCopy = Op.getOperand(i*2+1);
- // an XMM register onto the fp-stack. Do this with an FP_EXTEND to f80.
- // This will get legalized into a load/store if it can't get optimized away.
- if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
- Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
+ // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+ // the RET instruction and handled by the FP Stackifier.
+ if (RVLocs[i].getLocReg() == X86::ST0 ||
+ RVLocs[i].getLocReg() == X86::ST1) {
+ // If this is a copy from an xmm register to ST(0), use an FPExtend to
+ // change the value to the FP stack register class.
+ if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT()))
+ ValToCopy = DAG.getNode(ISD::FP_EXTEND, MVT::f80, ValToCopy);
+ RetOps.push_back(ValToCopy);
+ // Don't emit a copytoreg.
+ continue;
+ }
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand Ops[] = { Chain, Value };
- Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
+ Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
- SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
if (Flag.Val)
- return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
- else
- return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, &RetOps[0], RetOps.size());
}
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
- if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag).getValue(1);
- InFlag = Chain.getValue(2);
- ResultVals.push_back(Chain.getValue(0));
- }
- } else {
- // Copies from the FP stack are special, as ST0 isn't a valid register
- // before the fp stackifier runs.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ MVT::ValueType CopyVT = RVLocs[i].getValVT();
- // Copy ST0 into an RFP register with FP_GET_RESULT. If this will end up
- // in an SSE register, copy it out as F80 and do a truncate, otherwise use
- // the specified value type.
- MVT::ValueType GetResultTy = RVLocs[0].getValVT();
- if (isScalarFPTypeInSSEReg(GetResultTy))
- GetResultTy = MVT::f80;
- SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
+ // If this is a call to a function that returns an fp value on the floating
+ // point stack, but where we prefer to use the value in xmm registers, copy
+ // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+ if (RVLocs[i].getLocReg() == X86::ST0 &&
+ isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+ CopyVT = MVT::f80;
+ }
- SDOperand GROps[] = { Chain, InFlag };
- SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
- Chain = RetVal.getValue(1);
- InFlag = RetVal.getValue(2);
-
- // If we want the result in an SSE register, use an FP_TRUNCATE to get it
- // there.
- if (GetResultTy != RVLocs[0].getValVT())
- RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
+ Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
+ CopyVT, InFlag).getValue(1);
+ SDOperand Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+
+ if (CopyVT != RVLocs[i].getValVT()) {
+ // Round the F80 the right size, which also moves to the appropriate xmm
+ // register.
+ Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ }
- ResultVals.push_back(RetVal);
+ ResultVals.push_back(Val);
}
// Merge everything together with a MERGE_VALUES node.
&ResultVals[0], ResultVals.size()).Val;
}
-/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64
-/// ISD::CALL where the results are known to be in two 64-bit registers,
-/// e.g. XMM0 and XMM1. This simplify store the two values back to the
-/// fixed stack slot allocated for StructRet.
-SDNode *X86TargetLowering::
-LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag,
- SDNode *TheCall, unsigned Reg1, unsigned Reg2,
- MVT::ValueType VT, SelectionDAG &DAG) {
- SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag);
- Chain = RetVal1.getValue(1);
- InFlag = RetVal1.getValue(2);
- SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag);
- Chain = RetVal2.getValue(1);
- InFlag = RetVal2.getValue(2);
- SDOperand FIN = TheCall->getOperand(5);
- Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
- Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0);
- return Chain.Val;
-}
-
-/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL
-/// where the results are known to be in ST0 and ST1.
-SDNode *X86TargetLowering::
-LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
- SDNode *TheCall, SelectionDAG &DAG) {
- SmallVector<SDOperand, 8> ResultVals;
- const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
- SDVTList Tys = DAG.getVTList(VTs, 4);
- SDOperand Ops[] = { Chain, InFlag };
- SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
- Chain = RetVal.getValue(2);
- SDOperand FIN = TheCall->getOperand(5);
- Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16));
- Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0);
- return Chain.Val;
-}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
return VReg;
}
-// Determines whether a CALL node uses struct return semantics.
+/// CallIsStructReturn - Determines whether a CALL node uses struct return
+/// semantics.
static bool CallIsStructReturn(SDOperand Op) {
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
if (!NumOps)
return false;
-
- ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6));
- return Flags->getValue() & ISD::ParamFlags::StructReturn;
+
+ return cast<ARG_FLAGSSDNode>(Op.getOperand(6))->getArgFlags().isSRet();
}
-// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics.
+/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
+/// return semantics.
static bool ArgsAreStructReturn(SDOperand Op) {
unsigned NumArgs = Op.Val->getNumValues() - 1;
if (!NumArgs)
return false;
-
- ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3));
- return Flags->getValue() & ISD::ParamFlags::StructReturn;
+
+ return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
}
-// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop
-// its own arguments. Callee pop is necessary to support tail calls.
+/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
+/// the callee to pop its own arguments. Callee pop is necessary to support tail
+/// calls.
bool X86TargetLowering::IsCalleePop(SDOperand Op) {
bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (IsVarArg)
}
}
-// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node.
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a CALL or
+/// FORMAL_ARGUMENTS node.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (Subtarget->is64Bit())
- if (CC == CallingConv::Fast && PerformTailCallOpt)
- return CC_X86_64_TailCall;
- else
- return CC_X86_64_C;
-
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return CC_X86_Win64_C;
+ else {
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
+ else
+ return CC_X86_64_C;
+ }
+ }
+
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_32_C;
}
-// Selects the appropriate decoration to apply to a MachineFunction containing a
-// given FORMAL_ARGUMENTS node.
+/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
+/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
NameDecorationStyle
X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
return None;
}
-
-// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly
-// be overwritten when lowering the outgoing arguments in a tail call. Currently
-// the implementation of this call is very conservative and assumes all
-// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
-// registers would be overwritten by direct lowering.
-// Possible improvement:
-// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
-// indicating inreg passed arguments which also need not be lowered to a safe
-// stack slot.
-static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) {
+/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could
+/// possibly be overwritten when lowering the outgoing arguments in a tail
+/// call. Currently the implementation of this call is very conservative and
+/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with
+/// virtual registers would be overwritten by direct lowering.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op,
+ MachineFrameInfo * MFI) {
RegisterSDNode * OpReg = NULL;
+ FrameIndexSDNode * FrameIdxNode = NULL;
+ int FrameIdx = 0;
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
(Op.getOpcode()== ISD::CopyFromReg &&
- (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
- OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
+ (OpReg = dyn_cast<RegisterSDNode>(Op.getOperand(1))) &&
+ (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) ||
+ (Op.getOpcode() == ISD::LOAD &&
+ (FrameIdxNode = dyn_cast<FrameIndexSDNode>(Op.getOperand(1))) &&
+ (MFI->isFixedObjectIndex((FrameIdx = FrameIdxNode->getIndex()))) &&
+ (MFI->getObjectOffset(FrameIdx) >= 0)))
return true;
return false;
}
-// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-// by "Src" to address "Dst" with size and alignment information specified by
-// the specific parameter attribute. The copy will be passed as a byval function
-// parameter.
+/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
+/// in a register before calling.
+bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
+ return !IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+
+/// CallRequiresFnAddressInReg - Check whether the call requires the function
+/// address to be loaded in a register.
+bool
+X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
+ return !Is64Bit && IsTailCall &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT();
+}
+
+/// CopyTailCallClobberedArgumentsToVRegs - Create virtual registers for all
+/// arguments to force loading and guarantee that arguments sourcing from
+/// incomming parameters are not overwriting each other.
+static SDOperand
+CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain,
+ SmallVector<std::pair<unsigned, SDOperand>, 8> &TailCallClobberedVRegs,
+ SelectionDAG &DAG,
+ MachineFunction &MF,
+ const TargetLowering * TL) {
+
+ SDOperand InFlag;
+ for (unsigned i = 0, e = TailCallClobberedVRegs.size(); i != e; i++) {
+ SDOperand Arg = TailCallClobberedVRegs[i].second;
+ unsigned Idx = TailCallClobberedVRegs[i].first;
+ unsigned VReg =
+ MF.getRegInfo().
+ createVirtualRegister(TL->getRegClassFor(Arg.getValueType()));
+ Chain = DAG.getCopyToReg(Chain, VReg, Arg, InFlag);
+ InFlag = Chain.getValue(1);
+ Arg = DAG.getCopyFromReg(Chain, VReg, Arg.getValueType(), InFlag);
+ TailCallClobberedVRegs[i] = std::make_pair(Idx, Arg);
+ Chain = Arg.getValue(1);
+ InFlag = Arg.getValue(2);
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
static SDOperand
CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
- unsigned Flags, SelectionDAG &DAG) {
- unsigned Align = 1 <<
- ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs);
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
- return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/true,
+ NULL, 0, NULL, 0);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
+ unsigned CC,
SDOperand Root, unsigned i) {
// Create the nodes corresponding to a load from this parameter slot.
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
- bool isByVal = Flags & ISD::ParamFlags::ByVal;
-
- // FIXME: For now, all byval parameter objects are marked mutable. This
- // can be changed with more analysis.
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
+ bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can be
+ // changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset(), !isByVal);
+ VA.getLocMemOffset(), isImmutable);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
- if (isByVal)
+ if (Flags.isByVal())
return FIN;
- return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
+ return DAG.getLoad(VA.getValVT(), Root, FIN,
+ PseudoSourceValue::getFixedStack(), FI);
}
SDOperand
RC = X86::GR32RegisterClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = X86::GR64RegisterClass;
- else if (Is64Bit && RegVT == MVT::f32)
+ else if (RegVT == MVT::f32)
RC = X86::FR32RegisterClass;
- else if (Is64Bit && RegVT == MVT::f64)
+ else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
else {
assert(MVT::isVector(RegVT));
ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
+ ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
}
}
unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ SDOperand Store =
+ DAG.getStore(Val.getValue(1), Val, FIN,
+ PseudoSourceValue::getFixedStack(),
+ RegSaveFrameIndex);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(8));
unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ SDOperand Store =
+ DAG.getStore(Val.getValue(1), Val, FIN,
+ PseudoSourceValue::getFixedStack(),
+ RegSaveFrameIndex);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(16));
const CCValAssign &VA,
SDOperand Chain,
SDOperand Arg) {
- SDOperand PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDOperand PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
- if (Flags & ISD::ParamFlags::ByVal) {
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->getArgFlags();
+ if (Flags.isByVal()) {
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
}
- return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
-}
-
-/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64
-/// struct return call to the specified function. X86-64 ABI specifies
-/// some SRet calls are actually returned in registers. Since current
-/// LLVM cannot represent multi-value calls, they are represent as
-/// calls where the results are passed in a hidden struct provided by
-/// the caller. This function examines the type of the struct to
-/// determine the correct way to implement the call.
-X86::X86_64SRet
-X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) {
- // FIXME: Disabled for now.
- return X86::InMemory;
-
- const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType());
- const Type *RTy = PTy->getElementType();
- unsigned Size = getTargetData()->getABITypeSize(RTy);
- if (Size != 16 && Size != 32)
- return X86::InMemory;
-
- if (Size == 32) {
- const StructType *STy = dyn_cast<StructType>(RTy);
- if (!STy) return X86::InMemory;
- if (STy->getNumElements() == 2 &&
- STy->getElementType(0) == Type::X86_FP80Ty &&
- STy->getElementType(1) == Type::X86_FP80Ty)
- return X86::InX87;
- }
-
- bool AllFP = true;
- for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end();
- I != E; ++I) {
- const Type *STy = I->get();
- if (!STy->isFPOrFPVector()) {
- AllFP = false;
- break;
- }
- }
+ return DAG.getStore(Chain, Arg, PtrOff,
+ PseudoSourceValue::getStack(), LocMemOffset);
+}
- if (AllFP)
- return X86::InSSE;
- return X86::InGPR64;
+/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call
+/// optimization is performed and it is required.
+SDOperand
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDOperand &OutRetAddr,
+ SDOperand Chain,
+ bool IsTailCall,
+ bool Is64Bit,
+ int FPDiff) {
+ if (!IsTailCall || FPDiff==0) return Chain;
+
+ // Adjust the Return address stack slot.
+ MVT::ValueType VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0);
+ return SDOperand(OutRetAddr.Val, 1);
}
-void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall,
- CCAssignFn *Fn,
- CCState &CCInfo) {
- unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
- for (unsigned i = 1; i != NumOps; ++i) {
- MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
- SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
- unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) {
- cerr << "Call operand #" << i << " has unhandled type "
- << MVT::getValueTypeString(ArgVT) << "\n";
- abort();
- }
- }
+/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDOperand
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDOperand Chain, SDOperand RetAddrFrIdx,
+ bool Is64Bit, int FPDiff) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(), NewReturnAddrFI);
+ return Chain;
+}
+
+/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target
+/// which might be overwritten by later byval tail call lowering to a virtual
+/// register.
+bool
+X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
+ SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
+ SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
+ std::pair<unsigned, SDOperand> &RegToPass,
+ SDOperand &OutChain,
+ SDOperand &OutFlag,
+ MachineFunction &MF,
+ SelectionDAG & DAG) {
+ if (!containsByValArg) return false;
+
+ std::pair<unsigned, unsigned> ArgRegVReg;
+ MVT::ValueType VT = RegToPass.second.getValueType();
+
+ ArgRegVReg.first = RegToPass.first;
+ ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT));
+
+ // Copy Argument to virtual register.
+ OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second,
+ RegToPass.second, OutFlag);
+ OutFlag = OutChain.getValue(1);
+ // Remember virtual register and type.
+ TailCallByValClobberedVRegs.push_back(ArgRegVReg);
+ TailCallByValClobberedVRegTypes.push_back(VT);
+ return true;
+}
+
+
+/// RestoreTailCallByValClobberedReg - Restore registers which were saved to
+/// virtual registers to prevent tail call byval lowering from overwriting
+/// parameter registers.
+static SDOperand
+RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain,
+ SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
+ SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes) {
+ if (TailCallByValClobberedVRegs.size()==0) return Chain;
+
+ SmallVector<SDOperand, 8> RegOpChains;
+ for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) {
+ SDOperand InFlag;
+ unsigned DestReg = TailCallByValClobberedVRegs[i].first;
+ unsigned VirtReg = TailCallByValClobberedVRegs[i].second;
+ MVT::ValueType VT = TailCallByValClobberedVRegTypes[i];
+ SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag);
+ Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag);
+ RegOpChains.push_back(Chain);
+ }
+ if (!RegOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &RegOpChains[0], RegOpChains.size());
+ return Chain;
}
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo * MFI = MF.getFrameInfo();
SDOperand Chain = Op.getOperand(0);
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCAssignFn *CCFn = CCAssignFnForNode(Op);
-
- X86::X86_64SRet SRetMethod = X86::InMemory;
- if (Is64Bit && IsStructRet)
- // FIXME: We can't figure out type of the sret structure for indirect
- // calls. We need to copy more information from CallSite to the ISD::CALL
- // node.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- SRetMethod =
- ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal()));
-
- // UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of
- // registers. Unfortunately, llvm does not support i128 yet so we pretend it's
- // a sret call.
- if (SRetMethod != X86::InMemory)
- X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CCFn);
+ CCInfo.AnalyzeCallOperands(Op.Val, CCAssignFnForNode(Op));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
- SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
- if (IsTailCall) {
- // Adjust the Return address stack slot.
- if (FPDiff) {
- MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
- RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
- // Load the "old" Return address.
- RetAddrFrIdx =
- DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
- // Calculate the new stack slot for the return address.
- int SlotSize = Is64Bit ? 8 : 4;
- int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
- Chain = SDOperand(RetAddrFrIdx.Val, 1);
- }
- }
+ SDOperand RetAddrFrIdx;
+ // Load return adress for tail calls.
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+ FPDiff);
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<std::pair<unsigned, SDOperand>, 8> TailCallClobberedVRegs;
+
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
+ bool containsTailCallByValArg = false;
+ SmallVector<std::pair<unsigned, unsigned>, 8> TailCallByValClobberedVRegs;
+ SmallVector<MVT::ValueType, 8> TailCallByValClobberedVRegTypes;
+
// Walk the register/memloc assignments, inserting copies/loads. For tail
- // calls, lower arguments which could otherwise be possibly overwritten to the
- // stack slot where they would go on normal function calls.
+ // calls, remember all arguments for later special lowering.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
+ bool isByVal = cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->
+ getArgFlags().isByVal();
+
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: assert(0 && "Unknown loc info!");
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ if (!IsTailCall || (IsTailCall && isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
+ // Remember fact that this call contains byval arguments.
+ containsTailCallByValArg |= IsTailCall && isByVal;
+ } else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
+ TailCallClobberedVRegs.push_back(std::make_pair(i,Arg));
}
}
}
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ // Tail call byval lowering might overwrite argument registers so arguments
+ // passed to be copied to a virtual register for
+ // later processing.
+ if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg,
+ TailCallByValClobberedVRegs,
+ TailCallByValClobberedVRegTypes,
+ RegsToPass[i], Chain, InFlag, MF,
+ DAG))
+ continue;
+
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
- if (IsTailCall)
- InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output?
-
// ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- // Does not work with tail call since ebx is not restored correctly by
- // tailcaller. TODO: at least for x86 - verify for x86-64
- if (!IsTailCall && !Is64Bit &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
+ // GOT pointer.
+ if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
Chain = DAG.getCopyToReg(Chain, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
}
+ // If we are tail calling and generating PIC/GOT style code load the address
+ // of the callee into ecx. The value in ecx is used as target of the tail
+ // jump. This is done to circumvent the ebx/callee-saved problem for tail
+ // calls on PIC/GOT architectures. Normally we would just put the address of
+ // GOT into ebx and then call target@PLT. But for tail callss ebx would be
+ // restored (since ebx is callee saved) before jumping to the target@PLT.
+ if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
+ // Note: The actual moving to ecx is done further down.
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ !G->getGlobal()->hasProtectedVisibility())
+ Callee = LowerGlobalAddress(Callee, DAG);
+ else if (isa<ExternalSymbolSDNode>(Callee))
+ Callee = LowerExternalSymbol(Callee,DAG);
+ }
if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
InFlag = Chain.getValue(1);
}
+
// For tail calls lower the arguments to the 'real' stack slot.
if (IsTailCall) {
SmallVector<SDOperand, 8> MemOpChains2;
SDOperand FIN;
int FI = 0;
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDOperand();
+
+ Chain = CopyTailCallClobberedArgumentsToVRegs(Chain, TailCallClobberedVRegs,
+ DAG, MF, this);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ ISD::ArgFlagsTy Flags =
+ cast<ARG_FLAGSSDNode>(FlagsOp)->getArgFlags();
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
- FIN = DAG.getFrameIndex(FI, MVT::i32);
- SDOperand Source = Arg;
- if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
- // Copy from stack slots to stack slot of a tail called function. This
- // needs to be done because if we would lower the arguments directly
- // to their real stack slot we might end up overwriting each other.
- // Get source stack slot.
- Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ // Find virtual register for this argument.
+ bool Found=false;
+ for (unsigned idx=0, e= TailCallClobberedVRegs.size(); idx < e; idx++)
+ if (TailCallClobberedVRegs[idx].first==i) {
+ Arg = TailCallClobberedVRegs[idx].second;
+ Found=true;
+ break;
+ }
+ assert(IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)==false ||
+ (Found==true && "No corresponding Argument was found"));
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
- if ((Flags & ISD::ParamFlags::ByVal)==0)
- Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0);
- }
- if (Flags & ISD::ParamFlags::ByVal) {
- // Copy relative to framepointer.
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
Flags, DAG));
} else {
// Store relative to framepointer.
- MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
+ MemOpChains2.push_back(
+ DAG.getStore(Chain, Arg, FIN,
+ PseudoSourceValue::getFixedStack(), FI));
}
}
}
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
+ // Restore byval lowering clobbered registers.
+ Chain = RestoreTailCallByValClobberedRegs(DAG, Chain,
+ TailCallByValClobberedVRegs,
+ TailCallByValClobberedVRegTypes);
+
// Store the return address to the appropriate stack slot.
- if (FPDiff)
- Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
- assert(Callee.getOpcode() == ISD::LOAD &&
- "Function destination must be loaded into virtual register");
unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
Chain = DAG.getCopyToReg(Chain,
- DAG.getRegister(Opc, getPointerTy()) ,
+ DAG.getRegister(Opc, getPointerTy()),
Callee,InFlag);
Callee = DAG.getRegister(Opc, getPointerTy());
// Add register as live out.
if (IsTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
- // Add an implicit use GOT pointer in EBX.
- if (!IsTailCall && !Is64Bit &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+ // Add an implicit use of AL for x86 vararg functions.
+ if (Is64Bit && isVarArg)
+ Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
if (InFlag.Val)
Ops.push_back(InFlag);
// Handle result values, copying them out of physregs into vregs that we
// return.
- switch (SRetMethod) {
- default:
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
- case X86::InGPR64:
- return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
- X86::RAX, X86::RDX,
- MVT::i64, DAG), Op.ResNo);
- case X86::InSSE:
- return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
- X86::XMM0, X86::XMM1,
- MVT::f64, DAG), Op.ResNo);
- case X86::InX87:
- return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG),
- Op.ResNo);
- }
+ return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
}
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
-// * elf/pic is disabled OR
-// * elf/pic enabled + callee is in module + callee has
-// visibility protected or hidden
+// On X86_64 architecture with GOT-style position independent code only local
+// (within module) calls are supported at the moment.
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
SDOperand Callee = Call.getOperand(4);
- // On elf/pic %ebx needs to be livein.
+ // On x86/32Bit PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
- !Subtarget->isPICStyleGOT())
+ !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
return true;
- // Can only do local tail calls with PIC.
+ // Can only do local tail calls (in same module, hidden or protected) on
+ // x86_64 PIC/GOT at the moment.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return G->getGlobal()->hasHiddenVisibility()
|| G->getGlobal()->hasProtectedVisibility();
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
-/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
-///
-static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
+/// getSwapEltZeroMask - Returns a vector_shuffle mask for a shuffle that swaps
+/// element #0 of a vector with the specified index, leaving the rest of the
+/// elements in place.
+static SDOperand getSwapEltZeroMask(unsigned NumElems, unsigned DestElt,
+ SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
+ MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
+ SmallVector<SDOperand, 8> MaskVec;
+ // Element #0 of the result gets the elt we are replacing.
+ MaskVec.push_back(DAG.getConstant(DestElt, BaseVT));
+ for (unsigned i = 1; i != NumElems; ++i)
+ MaskVec.push_back(DAG.getConstant(i == DestElt ? 0 : i, BaseVT));
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+}
+
+/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
+static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG, bool HasSSE2) {
+ MVT::ValueType PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32;
+ MVT::ValueType VT = Op.getValueType();
+ if (PVT == VT)
+ return Op;
SDOperand V1 = Op.getOperand(0);
SDOperand Mask = Op.getOperand(2);
- MVT::ValueType VT = Op.getValueType();
unsigned NumElems = Mask.getNumOperands();
- Mask = getUnpacklMask(NumElems, DAG);
- while (NumElems != 4) {
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
- NumElems >>= 1;
+ // Special handling of v4f32 -> v4i32.
+ if (VT != MVT::v4f32) {
+ Mask = getUnpacklMask(NumElems, DAG);
+ while (NumElems > 4) {
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+ NumElems >>= 1;
+ }
+ Mask = getZeroVector(MVT::v4i32, DAG);
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- Mask = getZeroVector(MVT::v4i32, DAG);
- SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
- DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
+ SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1,
+ DAG.getNode(ISD::UNDEF, PVT), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
-static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
- unsigned NumElems, unsigned Idx,
+static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
+ MVT::ValueType VT = V2.getValueType();
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
+ unsigned NumElems = MVT::getVectorNumElements(V2.getValueType());
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 16> MaskVec;
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
- bool HasNonImms = false;
+ bool IsAllConstants = true;
SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
- HasNonImms = true;
+ IsAllConstants = false;
if (isZeroNode(Elt))
NumZero++;
else {
return DAG.getNode(ISD::UNDEF, VT);
}
- // Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
- return SDOperand();
-
- // Special case for single non-zero element.
+ // Special case for single non-zero, non-undef, element.
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
- if (Idx == 0)
+
+ // If this is an insertion of an i64 value on x86-32, and if the top bits of
+ // the value are obviously zero, truncate the value to i32 and do the
+ // insertion that way. Only do this if the value is non-constant or if the
+ // value is a constant being inserted into element 0. It is cheaper to do
+ // a constant pool load than it is to do a movd + shuffle.
+ if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
+ (!IsAllConstants || Idx == 0)) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ // Handle MMX and SSE both.
+ MVT::ValueType VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
+ MVT::ValueType VecElts = VT == MVT::v2i64 ? 4 : 2;
+
+ // Truncate the value (which may itself be a constant) to i32, and
+ // convert it to a vector with movd (S2V+shuffle to zero extend).
+ Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, DAG);
+
+ // Now we have our 32-bit value zero extended in the low element of
+ // a vector. If Idx != 0, swizzle it into place.
+ if (Idx != 0) {
+ SDOperand Ops[] = {
+ Item, DAG.getNode(ISD::UNDEF, Item.getValueType()),
+ getSwapEltZeroMask(VecElts, Idx, DAG)
+ };
+ Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3);
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item);
+ }
+ }
+
+ // If we have a constant or non-constant insertion into the low element of
+ // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+ // the rest of the elements. This will be matched as movd/movq/movss/movsd
+ // depending on what the source datatype is. Because we can only get here
+ // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
+ if (Idx == 0 &&
+ // Don't do this for i64 values on x86-32.
+ (EVT != MVT::i64 || Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
- NumZero > 0, DAG);
- else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
+ return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
+ }
+
+ if (IsAllConstants) // Otherwise, it's better to do a constpool load.
return SDOperand();
+ // Otherwise, if this is a vector with i32 or f32 elements, and the element
+ // is a non-constant being inserted into an element other than the low one,
+ // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
+ // movd/movss) to move this into the low element, then shuffle it into
+ // place.
if (EVTBits == 32) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
+
// Turn it into a shuffle of zero and zero-extended scalar to vector.
- Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
- DAG);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
}
}
+ // Splat is obviously ok. Let legalizer expand it to a shuffle.
+ if (Values.size() == 1)
+ return SDOperand();
+
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
- if (!HasNonImms)
+ if (IsAllConstants)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
default: assert(false && "Unexpected!");
}
- if (NewWidth == 2)
+ if (NewWidth == 2) {
if (MVT::isInteger(VT))
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
+ }
unsigned Scale = NumElems / NewWidth;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
+ bool isMMX = MVT::getSizeInBits(VT) == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
return V2;
if (isSplatMask(PermMask.Val)) {
- if (NumElems <= 4) return Op;
- // Promote it to a v4i32 splat.
- return PromoteSplat(Op, DAG);
+ if (isMMX || NumElems < 4) return Op;
+ // Promote it to a v4{if}32 splat.
+ return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
return Op;
}
- // If VT is integer, try PSHUF* first, then SHUFP*.
- if (MVT::isInteger(VT)) {
- // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
- // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
- if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) &&
- X86::isPSHUFDMask(PermMask.Val)) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val)) {
- if (V2.getOpcode() != ISD::UNDEF)
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
- DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ // Try PSHUF* first, then SHUFP*.
+ // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
+ // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
+ if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, VT), PermMask);
+ return Op;
+ }
+
+ if (!isMMX) {
+ if (Subtarget->hasSSE2() &&
+ (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val))) {
+ MVT::ValueType RVT = VT;
+ if (VT == MVT::v4f32) {
+ RVT = MVT::v4i32;
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
+ DAG.getNode(ISD::BIT_CONVERT, RVT, V1),
+ DAG.getNode(ISD::UNDEF, RVT), PermMask);
+ } else if (V2.getOpcode() != ISD::UNDEF)
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1,
+ DAG.getNode(ISD::UNDEF, RVT), PermMask);
+ if (RVT != VT)
+ Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op);
return Op;
}
- if (X86::isSHUFPMask(PermMask.Val) &&
- MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
- return Op;
- } else {
- // Floating point cases in the other order.
- if (X86::isSHUFPMask(PermMask.Val))
+ // Binary or unary shufps.
+ if (X86::isSHUFPMask(PermMask.Val) ||
+ (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.Val)))
return Op;
- if (X86::isPSHUFDMask(PermMask.Val) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val)) {
- if (V2.getOpcode() != ISD::UNDEF)
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
- DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
- return Op;
- }
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
}
// Handle all 4 wide cases with a number of shuffles.
- if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ if (NumElems == 4 && !isMMX) {
// Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
return SDOperand();
}
+SDOperand
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
+ SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ if (MVT::getSizeInBits(VT) == 8) {
+ SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
+ Op.getOperand(0), Op.getOperand(1));
+ SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
+ DAG.getValueType(VT));
+ return DAG.getNode(ISD::TRUNCATE, VT, Assert);
+ } else if (VT == MVT::f32) {
+ // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+ // the result back to FR32 register. It's only worth matching if the
+ // result has a single use which is a store.
+ if (!Op.hasOneUse())
+ return SDOperand();
+ SDNode *User = Op.Val->use_begin()->getUser();
+ if (User->getOpcode() != ISD::STORE)
+ return SDOperand();
+ SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
+ Op.getOperand(1));
+ return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
+ }
+ return SDOperand();
+}
+
+
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
+ if (Subtarget->hasSSE41()) {
+ SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+ if (Res.Val)
+ return Res;
+ }
+
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
+ // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+ // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+ // to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
return SDOperand();
}
+SDOperand
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG){
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+
+ SDOperand N0 = Op.getOperand(0);
+ SDOperand N1 = Op.getOperand(1);
+ SDOperand N2 = Op.getOperand(2);
+
+ if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) == 16)) {
+ unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB
+ : X86ISD::PINSRW;
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument.
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
+ return DAG.getNode(Opc, VT, N0, N1, N2);
+ } else if (EVT == MVT::f32) {
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into these
+ // bits. For example (insert (extract, 3), 2) could be matched by putting
+ // the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // combine either bitwise AND or insert of float 0.0 to set these bits.
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4);
+ return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
+ }
+ return SDOperand();
+}
+
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
+
+ if (Subtarget->hasSSE41())
+ return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
if (EVT == MVT::i8)
return SDOperand();
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
- return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
+ MVT::ValueType VT = MVT::v2i32;
+ switch (Op.getValueType()) {
+ default: break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ VT = MVT::v4i32;
+ break;
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
- Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
+ Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
+ PseudoSourceValue::getGOT(), 0);
return Result;
}
SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
- Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
+ Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
+ PseudoSourceValue::getGOT(), 0);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- "Not an i64 shift!");
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ MVT::ValueType VT = Op.getValueType();
+ unsigned VTBits = MVT::getSizeInBits(VT);
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
SDOperand ShAmt = Op.getOperand(2);
SDOperand Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
- DAG.getConstant(0, MVT::i32);
+ DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) :
+ DAG.getConstant(0, VT);
SDOperand Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt);
} else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt);
}
const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
- DAG.getConstant(32, MVT::i8));
- SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i8));
+ SDOperand Cond = DAG.getNode(X86ISD::CMP, VT,
AndNode, DAG.getConstant(0, MVT::i8));
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ VTs = DAG.getNodeValueTypes(VT, MVT::Flag);
SmallVector<SDOperand, 4> Ops;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(Cond);
- Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ Hi = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(Cond);
- Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ Lo = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
} else {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(Cond);
- Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ Lo = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(Cond);
- Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ Hi = DAG.getNode(X86ISD::CMOV, VT, &Ops[0], Ops.size());
}
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ VTs = DAG.getNodeValueTypes(VT, VT);
Ops.clear();
Ops.push_back(Lo);
Ops.push_back(Hi);
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
- Op.getOperand(0).getValueType() >= MVT::i16 &&
- "Unknown SINT_TO_FP to lower!");
-
- SDOperand Result;
MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
+ assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
+ "Unknown SINT_TO_FP to lower!");
+
+ // These are really Legal; caller falls through into that case.
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+ return SDOperand();
+ if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
+ Subtarget->is64Bit())
+ return SDOperand();
+
unsigned Size = MVT::getSizeInBits(SrcVT)/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
- StackSlot, NULL, 0);
-
- // These are really Legal; caller falls through into that case.
- if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
- return Result;
- if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
- Subtarget->is64Bit())
- return Result;
+ StackSlot,
+ PseudoSourceValue::getFixedStack(),
+ SSFI);
// Build the FILD
SDVTList Tys;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
- Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
- Tys, &Ops[0], Ops.size());
+ SDOperand Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD,
+ Tys, &Ops[0], Ops.size());
if (useSSE) {
Chain = Result.getValue(1);
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
- Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
+ Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
+ PseudoSourceValue::getFixedStack(), SSFI);
}
return Result;
SDOperand Value = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
- Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
+ Chain = DAG.getStore(Chain, Value, StackSlot,
+ PseudoSourceValue::getFixedStack(), SSFI);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
if (MVT::isVector(VT)) {
return DAG.getNode(ISD::BIT_CONVERT, VT,
}
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
- SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
+ SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
- SDOperand InFlag(0, 0);
- SDOperand Chain = Op.getOperand(0);
- unsigned Align =
- (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
- if (Align == 0) Align = 1;
-
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned or size is more than the threshold, call memset.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
- MVT::ValueType IntPtr = getPointerTy();
- const Type *IntPtrTy = getTargetData()->getIntPtrType();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op.getOperand(1);
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- // Extend the unsigned i8 argument to be an int value for the call.
- Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Op.getOperand(3);
- Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
- DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
- return CallResult.second;
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ const Value *DstSV, uint64_t DstOff) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ /// If not DWORD aligned or size is more than the threshold, call the library.
+ /// The libc version is likely to be faster for these cases. It can use the
+ /// address value and run time information about the CPU.
+ if ((Align & 3) == 0 ||
+ !ConstantSize ||
+ ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
+ SDOperand InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+ if (const char *bzeroEntry =
+ V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
+ false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ Args, DAG);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDOperand();
}
+ uint64_t SizeVal = ConstantSize->getValue();
+ SDOperand InFlag(0, 0);
MVT::ValueType AVT;
SDOperand Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
bool TwoRepStos = false;
if (ValC) {
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
default: // Byte aligned
AVT = MVT::i8;
ValReg = X86::AL;
- Count = Op.getOperand(3);
+ Count = Size;
break;
}
if (AVT > MVT::i8) {
- if (I) {
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
- BytesLeft = I->getValue() % UBytes;
- } else {
- assert(AVT >= MVT::i32 &&
- "Do not use rep;stos if not at least DWORD aligned");
- Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
- Op.getOperand(3), DAG.getConstant(2, MVT::i8));
- TwoRepStos = true;
- }
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
}
Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
- Count = Op.getOperand(3);
- Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
+ Count = Size;
+ Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
InFlag = Chain.getValue(1);
}
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Op.getOperand(1), InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
if (TwoRepStos) {
InFlag = Chain.getValue(1);
- Count = Op.getOperand(3);
+ Count = Size;
MVT::ValueType CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
- // Issue stores for the last 1 - 7 bytes.
- SDOperand Value;
- unsigned Val = ValC->getValue() & 255;
- unsigned Offset = I->getValue() - BytesLeft;
- SDOperand DstAddr = Op.getOperand(1);
- MVT::ValueType AddrVT = DstAddr.getValueType();
- if (BytesLeft >= 4) {
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- Value = DAG.getConstant(Val, MVT::i32);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
- if (BytesLeft == 1) {
- Value = DAG.getConstant(Val, MVT::i8);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType AddrVT = Dst.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain,
+ DAG.getNode(ISD::ADD, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, DstSV, Offset);
}
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- unsigned Size,
- unsigned Align,
- SelectionDAG &DAG) {
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstOff,
+ const Value *SrcSV, uint64_t SrcOff){
+
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDOperand();
+ uint64_t SizeVal = ConstantSize->getValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDOperand();
+
+ SmallVector<SDOperand, 4> Results;
+
MVT::ValueType AVT;
unsigned BytesLeft = 0;
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
- AVT = MVT::i64;
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- break;
- }
+ if (Align >= 8 && Subtarget->is64Bit())
+ AVT = MVT::i64;
+ else if (Align >= 4)
+ AVT = MVT::i32;
+ else if (Align >= 2)
+ AVT = MVT::i16;
+ else
+ AVT = MVT::i8;
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
- BytesLeft = Size % UBytes;
+ unsigned CountVal = SizeVal / UBytes;
+ SDOperand Count = DAG.getIntPtrConstant(CountVal);
+ BytesLeft = SizeVal % UBytes;
SDOperand InFlag(0, 0);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Dest, InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
- Source, InFlag);
+ Src, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(AVT));
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+ Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()));
if (BytesLeft) {
- // Issue loads and stores for the last 1 - 7 bytes.
- unsigned Offset = Size - BytesLeft;
- SDOperand DstAddr = Dest;
- MVT::ValueType DstVT = DstAddr.getValueType();
- SDOperand SrcAddr = Source;
- MVT::ValueType SrcVT = SrcAddr.getValueType();
- SDOperand Value;
- if (BytesLeft >= 4) {
- Value = DAG.getLoad(MVT::i32, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getLoad(MVT::i16, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType DstVT = Dst.getValueType();
+ MVT::ValueType SrcVT = Src.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
- if (BytesLeft == 1) {
- Value = DAG.getLoad(MVT::i8, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- }
+ Results.push_back(DAG.getMemcpy(Chain,
+ DAG.getNode(ISD::ADD, DstVT, Dst,
+ DAG.getConstant(Offset,
+ DstVT)),
+ DAG.getNode(ISD::ADD, SrcVT, Src,
+ DAG.getConstant(Offset,
+ SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, AlwaysInline,
+ DstSV, Offset, SrcSV, Offset));
}
- return Chain;
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
}
/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
- SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
if (!Subtarget->is64Bit()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
- SV->getOffset());
+ return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV, 0);
}
// __va_list_tag:
// Store gp_offset
SDOperand Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsGPOffset, MVT::i32),
- FIN, SV->getValue(), SV->getOffset());
+ FIN, SV, 0);
MemOps.push_back(Store);
// Store fp_offset
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsFPOffset, MVT::i32),
- FIN, SV->getValue(), SV->getOffset());
+ FIN, SV, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
- SV->getOffset());
+ Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
- SV->getOffset());
+ Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
}
SDOperand Chain = Op.getOperand(0);
SDOperand DstPtr = Op.getOperand(1);
SDOperand SrcPtr = Op.getOperand(2);
- SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
- SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
- SrcSV->getValue(), SrcSV->getOffset());
+ SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, SrcSV, 0);
Chain = SrcPtr.getValue(1);
for (unsigned i = 0; i < 3; ++i) {
- SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
- SrcSV->getValue(), SrcSV->getOffset());
+ SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, SrcSV, 0);
Chain = Val.getValue(1);
- Chain = DAG.getStore(Chain, Val, DstPtr,
- DstSV->getValue(), DstSV->getOffset());
+ Chain = DAG.getStore(Chain, Val, DstPtr, DstSV, 0);
if (i == 2)
break;
SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
SDOperand FPtr = Op.getOperand(2); // nested function
SDOperand Nest = Op.getOperand(3); // 'nest' parameter value
- SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const X86InstrInfo *TII =
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
const unsigned char N86R10 =
- ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
const unsigned char N86R11 =
- ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
SDOperand Addr = Trmp;
OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
- TrmpSV->getValue(), TrmpSV->getOffset());
+ TrmpAddr, 0);
Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64));
- OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 2, false, 2);
+ OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpAddr, 2, false, 2);
// Load the 'nest' parameter value into R10.
// R10 is specified in X86CallingConv.td
OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
- TrmpSV->getValue(), TrmpSV->getOffset() + 10);
+ TrmpAddr, 10);
Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64));
- OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 12, false, 2);
+ OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 12, false, 2);
// Jump to the nested function.
OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
- TrmpSV->getValue(), TrmpSV->getOffset() + 20);
+ TrmpAddr, 20);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr,
- TrmpSV->getValue(), TrmpSV->getOffset() + 22);
+ TrmpAddr, 22);
SDOperand Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) };
// Check that ECX wasn't needed by an 'inreg' parameter.
const FunctionType *FTy = Func->getFunctionType();
- const ParamAttrsList *Attrs = Func->getParamAttrs();
+ const PAListPtr &Attrs = Func->getParamAttrs();
- if (Attrs && !Func->isVarArg()) {
+ if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
+ if (Attrs.paramHasAttr(Idx, ParamAttr::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;
const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
const unsigned char N86Reg =
- ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
+ ((const X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
- Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
+ Trmp, TrmpAddr, 0);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
- OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 1, false, 1);
+ OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpAddr, 1, false, 1);
const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
- TrmpSV->getValue() + 5, TrmpSV->getOffset());
+ TrmpAddr, 5, false, 1);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
- OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
- TrmpSV->getOffset() + 6, false, 1);
+ OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpAddr, 6, false, 1);
SDOperand Ops[] =
{ Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
return Op;
}
+SDOperand X86TargetLowering::LowerLCS(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType T = cast<AtomicSDNode>(Op.Val)->getVT();
+ unsigned Reg = 0;
+ unsigned size = 0;
+ switch(T) {
+ case MVT::i8: Reg = X86::AL; size = 1; break;
+ case MVT::i16: Reg = X86::AX; size = 2; break;
+ case MVT::i32: Reg = X86::EAX; size = 4; break;
+ case MVT::i64:
+ if (Subtarget->is64Bit()) {
+ Reg = X86::RAX; size = 8;
+ } else //Should go away when LowerType stuff lands
+ return SDOperand(ExpandATOMIC_LCS(Op.Val, DAG), 0);
+ break;
+ };
+ SDOperand cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg,
+ Op.getOperand(3), SDOperand());
+ SDOperand Ops[] = { cpIn.getValue(0),
+ Op.getOperand(1),
+ Op.getOperand(2),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5);
+ SDOperand cpOut =
+ DAG.getCopyFromReg(Result.getValue(0), Reg, T, Result.getValue(1));
+ return cpOut;
+}
+
+SDNode* X86TargetLowering::ExpandATOMIC_LCS(SDNode* Op, SelectionDAG &DAG) {
+ MVT::ValueType T = cast<AtomicSDNode>(Op)->getVT();
+ assert (T == MVT::i64 && "Only know how to expand i64 CAS");
+ SDOperand cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3),
+ DAG.getConstant(0, MVT::i32));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3),
+ DAG.getConstant(1, MVT::i32));
+ cpInL = DAG.getCopyToReg(Op->getOperand(0), X86::EAX,
+ cpInL, SDOperand());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX,
+ cpInH, cpInL.getValue(1));
+ SDOperand swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2),
+ DAG.getConstant(0, MVT::i32));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2),
+ DAG.getConstant(1, MVT::i32));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX,
+ swapInL, cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX,
+ swapInH, swapInL.getValue(1));
+ SDOperand Ops[] = { swapInH.getValue(0),
+ Op->getOperand(1),
+ swapInH.getValue(1)};
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDOperand Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3);
+ SDOperand cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32,
+ Result.getValue(1));
+ SDOperand cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32,
+ cpOutL.getValue(2));
+ SDOperand OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ SDOperand ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2);
+ Tys = DAG.getVTList(MVT::i64, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Should not custom lower this!");
+ case ISD::ATOMIC_LCS: return LowerLCS(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CALL: return LowerCALL(Op, DAG);
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
- case ISD::MEMSET: return LowerMEMSET(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
default: assert(0 && "Should not custom lower this!");
case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
+ case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG);
}
}
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
- case X86ISD::FP_GET_RESULT: return "X86ISD::FP_GET_RESULT";
- case X86ISD::FP_GET_RESULT2: return "X86ISD::FP_GET_RESULT2";
- case X86ISD::FP_SET_RESULT: return "X86ISD::FP_SET_RESULT";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
- case X86ISD::S2VEC: return "X86ISD::S2VEC";
+ case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
+ case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
+ case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
+ case X86ISD::LCMPXCHG_DAG: return "x86ISD::LCMPXCHG_DAG";
+ case X86ISD::LCMPXCHG8_DAG: return "x86ISD::LCMPXCHG8_DAG";
}
}
X86::isUNPCKH_v_undef_Mask(Mask.Val));
}
-bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
- MVT::ValueType EVT,
- SelectionDAG &DAG) const {
+bool
+X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps,
+ MVT::ValueType EVT,
+ SelectionDAG &DAG) const {
unsigned NumElts = BVOps.size();
// Only do shuffles on 128-bit vector types for now.
if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
//===----------------------------------------------------------------------===//
void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
- uint64_t Mask,
- uint64_t &KnownZero,
- uint64_t &KnownOne,
+ const APInt &Mask,
+ APInt &KnownZero,
+ APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned Opc = Op.getOpcode();
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- KnownZero = KnownOne = 0; // Don't know anything.
+ KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything.
switch (Opc) {
default: break;
case X86ISD::SETCC:
- KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+ KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+ Mask.getBitWidth() - 1);
break;
}
}
static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
const X86Subtarget *Subtarget) {
GlobalValue *GV;
- int64_t Offset;
+ int64_t Offset = 0;
if (isGAPlusOffset(Base, GV, Offset))
return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
// DAG combine handles the stack object case.
return SDOperand();
}
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
+ // the FP state in cases where an emms may be missing.
+ // A preferable solution to the general problem is to figure out the right
+ // places to insert EMMS. This qualifies as a quick hack.
+ if (MVT::isVector(St->getValue().getValueType()) &&
+ MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
+ isa<LoadSDNode>(St->getValue()) &&
+ !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+ St->getChain().hasOneUse() && !St->isVolatile()) {
+ SDNode* LdVal = St->getValue().Val;
+ LoadSDNode *Ld = 0;
+ int TokenFactorIndex = -1;
+ SmallVector<SDOperand, 8> Ops;
+ SDNode* ChainVal = St->getChain().Val;
+ // Must be a store of a load. We currently handle two cases: the load
+ // is a direct child, and it's under an intervening TokenFactor. It is
+ // possible to dig deeper under nested TokenFactors.
+ if (ChainVal == LdVal)
+ Ld = cast<LoadSDNode>(St->getChain());
+ else if (St->getValue().hasOneUse() &&
+ ChainVal->getOpcode() == ISD::TokenFactor) {
+ for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+ if (ChainVal->getOperand(i).Val == LdVal) {
+ TokenFactorIndex = i;
+ Ld = cast<LoadSDNode>(St->getValue());
+ } else
+ Ops.push_back(ChainVal->getOperand(i));
+ }
+ }
+ if (Ld) {
+ // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+ if (Subtarget->is64Bit()) {
+ SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getSrcValue(),
+ Ld->getSrcValueOffset(), Ld->isVolatile(),
+ Ld->getAlignment());
+ SDOperand NewChain = NewLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(NewChain);
+ NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+ return DAG.getStore(NewChain, NewLd, St->getBasePtr(),
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ }
+
+ // Otherwise, lower to two 32-bit copies.
+ SDOperand LoAddr = Ld->getBasePtr();
+ SDOperand HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+ DAG.getConstant(MVT::i32, 4));
+
+ SDOperand LoLd = DAG.getLoad(MVT::i32, Ld->getChain(), LoAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->getAlignment());
+ SDOperand HiLd = DAG.getLoad(MVT::i32, Ld->getChain(), HiAddr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
+ Ld->isVolatile(),
+ MinAlign(Ld->getAlignment(), 4));
+
+ SDOperand NewChain = LoLd.getValue(1);
+ if (TokenFactorIndex != -1) {
+ Ops.push_back(LoLd);
+ Ops.push_back(HiLd);
+ NewChain = DAG.getNode(ISD::TokenFactor, MVT::Other, &Ops[0],
+ Ops.size());
+ }
+
+ LoAddr = St->getBasePtr();
+ HiAddr = DAG.getNode(ISD::ADD, MVT::i32, LoAddr,
+ DAG.getConstant(MVT::i32, 4));
+
+ SDOperand LoSt = DAG.getStore(NewChain, LoLd, LoAddr,
+ St->getSrcValue(), St->getSrcValueOffset(),
+ St->isVolatile(), St->getAlignment());
+ SDOperand HiSt = DAG.getStore(NewChain, HiLd, HiAddr,
+ St->getSrcValue(), St->getSrcValueOffset()+4,
+ St->isVolatile(),
+ MinAlign(St->getAlignment(), 4));
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, LoSt, HiSt);
+ }
+ }
+ return SDOperand();
+}
+
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
default: break;
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::STORE:
+ return PerformSTORECombine(cast<StoreSDNode>(N), DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'A':
+ case 'f':
case 'r':
case 'R':
case 'l':
case 'q':
case 'Q':
case 'x':
+ case 'y':
case 'Y':
return C_RegisterClass;
default:
else if (VT == MVT::i8)
return std::make_pair(0U, X86::GR8RegisterClass);
break;
+ case 'f': // FP Stack registers.
+ // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+ // value to the correct fpstack register class.
+ if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP32RegisterClass);
+ if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+ return std::make_pair(0U, X86::RFP64RegisterClass);
+ return std::make_pair(0U, X86::RFP80RegisterClass);
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);