//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Chris Lattner and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;
setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // We don't accept any truncstore of integer registers.
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
-
+ setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom);
+
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
- setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
- setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
+ setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
}
if (Subtarget->hasMMX()) {
setOperationAction(ISD::SUB, MVT::v8i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i16, Legal);
setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
setOperationAction(ISD::MUL, MVT::v4i16, Legal);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
+ continue;
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
}
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
}
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
+ SelectionDAG &DAG) const {
+ if (usesGlobalOffsetTable())
+ return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy());
+ if (!Subtarget->isPICStyleRIPRel())
+ return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy());
+ return Table;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
/// exists skip possible ISD:TokenFactor.
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
- if (Chain.getOpcode()==X86ISD::TAILCALL) {
+ if (Chain.getOpcode() == X86ISD::TAILCALL) {
return Chain;
- } else if (Chain.getOpcode()==ISD::TokenFactor) {
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
if (Chain.getNumOperands() &&
- Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL)
+ Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
return Chain.getOperand(0);
}
return Chain;
}
-
+
/// LowerRET - Lower an ISD::RET node.
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
- if (DAG.getMachineFunction().liveout_empty()) {
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
SDOperand Chain = Op.getOperand(0);
SDOperand TailCall = Chain;
SDOperand TargetAddress = TailCall.getOperand(1);
SDOperand StackAdjustment = TailCall.getOperand(2);
- assert ( ((TargetAddress.getOpcode() == ISD::Register &&
+ assert(((TargetAddress.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting an global address, external symbol, or register");
- assert( StackAdjustment.getOpcode() == ISD::Constant &&
- "Expecting a const value");
+ assert(StackAdjustment.getOpcode() == ISD::Constant &&
+ "Expecting a const value");
SmallVector<SDOperand,8> Operands;
Operands.push_back(Chain.getOperand(0));
Operands.push_back(StackAdjustment);
// Copy registers used by the call. Last operand is a flag so it is not
// copied.
- for(unsigned i=3; i < TailCall.getNumOperands()-1;i++) {
+ for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
Operands.push_back(Chain.getOperand(i));
}
- return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], Operands.size());
+ return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0],
+ Operands.size());
}
// Regular return.
// If this is an FP return with ScalarSSE, we need to move the value from
// an XMM register onto the fp-stack.
- if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) ||
- (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
SDOperand MemLoc;
// If this is a load into a scalarsse value, don't store the loaded value
// back to the stack, only to reload it: just replace the scalar-sse load.
if (ISD::isNON_EXTLoad(Value.Val) &&
- (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
+ Chain.reachesChainWithoutSideEffects(Value.getOperand(0))) {
Chain = Value.getOperand(0);
MemLoc = Value.getOperand(1);
} else {
CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
-
SmallVector<SDOperand, 8> ResultVals;
// Copy all of the result registers out of their specified physreg.
// If we are using ScalarSSE, store ST(0) to the stack and reload it into
// an XMM register.
- if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
- (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
+ if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
+ SDOperand StoreLoc;
+ const Value *SrcVal = 0;
+ int SrcValOffset = 0;
+ MVT::ValueType RetStoreVT = RVLocs[0].getValVT();
+
+ // Determine where to store the value. If the call result is directly
+ // used by a store, see if we can store directly into the location. In
+ // this case, we'll end up producing a fst + movss[load] + movss[store] to
+ // the same location, and the two movss's will be nuked as dead. This
+ // optimizes common things like "*D = atof(..)" to not need an
+ // intermediate stack slot.
+ if (SDOperand(TheCall, 0).hasOneUse() &&
+ SDOperand(TheCall, 1).hasOneUse()) {
+ // In addition to direct uses, we also support a FP_ROUND that uses the
+ // value, if it is directly stored somewhere.
+ SDNode *User = *TheCall->use_begin();
+ if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse())
+ User = *User->use_begin();
+
+ // Ok, we have one use of the value and one use of the chain. See if
+ // they are the same node: a store.
+ if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) {
+ // Verify that the value being stored is either the call or a
+ // truncation of the call.
+ SDNode *StoreVal = N->getValue().Val;
+ if (StoreVal == TheCall)
+ ; // ok.
+ else if (StoreVal->getOpcode() == ISD::FP_ROUND &&
+ StoreVal->hasOneUse() &&
+ StoreVal->getOperand(0).Val == TheCall)
+ ; // ok.
+ else
+ N = 0; // not ok.
+
+ if (N && N->getChain().Val == TheCall &&
+ !N->isVolatile() && !N->isTruncatingStore() &&
+ N->getAddressingMode() == ISD::UNINDEXED) {
+ StoreLoc = N->getBasePtr();
+ SrcVal = N->getSrcValue();
+ SrcValOffset = N->getSrcValueOffset();
+ RetStoreVT = N->getValue().getValueType();
+ }
+ }
+ }
+
+ // If we weren't able to optimize the result, just create a temporary
+ // stack slot.
+ if (StoreLoc.Val == 0) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
+ }
+
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
- // multiple blocks. When stackifier is fixed, they can be uncoupled.
- MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ // multiple blocks (which could happen if a select gets lowered into
+ // multiple blocks and scheduled in between them). When stackifier is
+ // fixed, they can be uncoupled.
SDOperand Ops[] = {
- Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
+ Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag
};
Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
- RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
+ RetVal = DAG.getLoad(RetStoreVT, Chain,
+ StoreLoc, SrcVal, SrcValOffset);
Chain = RetVal.getValue(1);
+
+ // If we optimized a truncate, then extend the result back to its desired
+ // type.
+ if (RVLocs[0].getValVT() != RetStoreVT)
+ RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal);
}
ResultVals.push_back(RetVal);
}
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
- MF.addLiveIn(PReg, VReg);
+ unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+ MF.getRegInfo().addLiveIn(PReg, VReg);
return VReg;
}
-// align stack arguments according to platform alignment needed for tail calls
-unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG);
-
-SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
- const CCValAssign &VA,
- MachineFrameInfo *MFI,
- SDOperand Root, unsigned i) {
- // Create the nodes corresponding to a load from this parameter slot.
- int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
- VA.getLocMemOffset());
- SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
-
- unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
-
- if (Flags & ISD::ParamFlags::ByVal)
- return FIN;
- else
- return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
+// Determines whether a CALL node uses struct return semantics.
+static bool CallIsStructReturn(SDOperand Op) {
+ unsigned NumOps = (Op.getNumOperands() - 5) / 2;
+ if (!NumOps)
+ return false;
+
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
}
-SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
- bool isStdCall) {
+// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics.
+static bool ArgsAreStructReturn(SDOperand Op) {
unsigned NumArgs = Op.Val->getNumValues() - 1;
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- unsigned CC = MF.getFunction()->getCallingConv();
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
- // Check for possible tail call calling convention.
- if (CC == CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall);
- else
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
- }
- }
+ if (!NumArgs)
+ return false;
- unsigned StackSize = CCInfo.getNextStackOffset();
- // align stack specially for tail calls
- if (CC==CallingConv::Fast)
- StackSize = GetAlignedArgumentStackSize(StackSize,DAG);
-
- ArgValues.push_back(Root);
-
- // If the function takes variable number of arguments, make a frame index for
- // the start of the first vararg value... for expansion of llvm.va_start.
- if (isVarArg)
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
-
- // Tail call calling convention (CallingConv::Fast) does not support varargs.
- assert( !(isVarArg && CC == CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
-
- if (isStdCall && !isVarArg &&
- (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) {
- BytesToPopOnReturn = StackSize; // Callee pops everything..
- BytesCallerReserves = 0;
- } else {
- BytesToPopOnReturn = 0; // Callee pops nothing.
-
- // If this is an sret function, the return should pop the hidden pointer.
- if (NumArgs &&
- (cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
- ISD::ParamFlags::StructReturn))
- BytesToPopOnReturn = 4;
-
- BytesCallerReserves = StackSize;
- }
-
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
-
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
+ ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3));
+ return Flags->getValue() & ISD::ParamFlags::StructReturn;
}
-SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
- unsigned NumOps = (Op.getNumOperands() - 5) / 2;
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if(CC==CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
- NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- // If the first argument is an sret pointer, remember it.
- bool isSRet = NumOps &&
- (cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
- ISD::ParamFlags::StructReturn);
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
+// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop
+// its own arguments. Callee pop is necessary to support tail calls.
+bool X86TargetLowering::IsCalleePop(SDOperand Op) {
+ bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ if (IsVarArg)
+ return false;
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::X86_FastCall:
+ return !Subtarget->is64Bit();
+ case CallingConv::Fast:
+ return PerformTailCallOpt;
}
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+}
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node.
+CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (InFlag.Val)
- Ops.push_back(InFlag);
-
- Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPush = 0;
-
- if (CC == CallingConv::X86_StdCall ||
- (CC == CallingConv::Fast && PerformTailCallOpt)) {
- if (isVarArg)
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
+ if (Subtarget->is64Bit())
+ if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_64_TailCall;
else
- NumBytesForCalleeToPush = NumBytes;
- assert(!(isVarArg && CC==CallingConv::Fast) &&
- "CallingConv::Fast does not support varargs.");
- } else {
- // If this is is a call to a struct-return function, the callee
- // pops the hidden struct pointer, so we have to push it back.
- // This is common for Darwin/X86, Linux & Mingw32 targets.
- NumBytesForCalleeToPush = isSRet ? 4 : 0;
- }
+ return CC_X86_64_C;
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
+ if (CC == CallingConv::X86_FastCall)
+ return CC_X86_32_FastCall;
+ else if (CC == CallingConv::Fast && PerformTailCallOpt)
+ return CC_X86_32_TailCall;
+ else
+ return CC_X86_32_C;
}
+// Selects the appropriate decoration to apply to a MachineFunction containing a
+// given FORMAL_ARGUMENTS node.
+NameDecorationStyle
+X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (CC == CallingConv::X86_FastCall)
+ return FastCall;
+ else if (CC == CallingConv::X86_StdCall)
+ return StdCall;
+ return None;
+}
-//===----------------------------------------------------------------------===//
-// FastCall Calling Convention implementation
-//===----------------------------------------------------------------------===//
-//
-// The X86 'fastcall' calling convention passes up to two integer arguments in
-// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
-// and requires that the callee pop its arguments off the stack (allowing proper
-// tail calls), and has the same return value conventions as C calling convs.
-//
-// This calling convention always arranges for the callee pop value to be 8n+4
-// bytes, which is needed for tail recursion elimination and stack alignment
-// reasons.
-SDOperand
-X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- SDOperand Root = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg,
- getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
-
- SmallVector<SDOperand, 8> ArgValues;
- unsigned LastVal = ~0U;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
- // places.
- assert(VA.getValNo() != LastVal &&
- "Don't support value assigned to multiple locs yet");
- LastVal = VA.getValNo();
-
- if (VA.isRegLoc()) {
- MVT::ValueType RegVT = VA.getLocVT();
- TargetRegisterClass *RC;
- if (RegVT == MVT::i32)
- RC = X86::GR32RegisterClass;
- else {
- assert(MVT::isVector(RegVT));
- RC = X86::VR128RegisterClass;
- }
-
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
- SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
-
- // If this is an 8 or 16-bit value, it is really passed promoted to 32
- // bits. Insert an assert[sz]ext to capture this, then truncate to the
- // right size.
- if (VA.getLocInfo() == CCValAssign::SExt)
- ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- else if (VA.getLocInfo() == CCValAssign::ZExt)
- ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
-
- if (VA.getLocInfo() != CCValAssign::Full)
- ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
-
- ArgValues.push_back(ArgValue);
- } else {
- assert(VA.isMemLoc());
- ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
- }
- }
-
- ArgValues.push_back(Root);
-
- unsigned StackSize = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are
- // aligned.
- if ((StackSize & 7) == 0)
- StackSize += 4;
- }
-
- VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
- RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
- BytesToPopOnReturn = StackSize; // Callee pops all stack arguments.
- BytesCallerReserves = 0;
-
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
-
- // Return the new list of results.
- return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
- &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
-}
-
-SDOperand
-X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
- const SDOperand &StackPtr,
- const CCValAssign &VA,
- SDOperand Chain,
- SDOperand Arg) {
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
- if (Flags & ISD::ParamFlags::ByVal) {
- unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
- ISD::ParamFlags::ByValAlignOffs);
-
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
-
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
-
- return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
- AlignNode);
- } else {
- return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
- }
-}
-
-SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
- // Make sure the instruction takes 8n+4 bytes to make sure the start of the
- // arguments and the arguments after the retaddr has been pushed are
- // aligned.
- if ((NumBytes & 7) == 0)
- NumBytes += 4;
- }
-
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
-
- SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- Chain = DAG.getCopyToReg(Chain, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add an implicit use GOT pointer in EBX.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- if (InFlag.Val)
- Ops.push_back(InFlag);
-
- assert(isTailCall==false && "no tail call here");
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
-}
-
-//===----------------------------------------------------------------------===//
-// Fast Calling Convention (tail call) implementation
-//===----------------------------------------------------------------------===//
-
-// Like std call, callee cleans arguments, convention except that ECX is
-// reserved for storing the tail called function address. Only 2 registers are
-// free for argument passing (inreg). Tail call optimization is performed
-// provided:
-// * tailcallopt is enabled
-// * caller/callee are fastcc
-// * elf/pic is disabled OR
-// * elf/pic enabled + callee is in module + callee has
-// visibility protected or hidden
-// To ensure the stack is aligned according to platform abi pass
-// tail-call-align-stack. This makes sure that argument delta is always
-// multiples of stack alignment. (Dynamic linkers need this - darwin's dyld for
-// example)
-// If a tail called function callee has more arguments than the caller the
-// caller needs to make sure that there is room to move the RETADDR to. This is
-// achived by reserving an area the size of the argument delta right after the
-// original REtADDR, but before the saved framepointer or the spilled registers
-// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
-// stack layout:
-// arg1
-// arg2
-// RETADDR
-// [ new RETADDR
-// move area ]
-// (possible EBP)
-// ESI
-// EDI
-// local1 ..
-
-/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
-/// for a 16 byte align requirement.
-unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
- SelectionDAG& DAG) {
- if (PerformTailCallOpt) {
- MachineFunction &MF = DAG.getMachineFunction();
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameInfo &TFI = *TM.getFrameInfo();
- unsigned StackAlignment = TFI.getStackAlignment();
- uint64_t AlignMask = StackAlignment - 1;
- int64_t Offset = StackSize;
- unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
- if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
- // Number smaller than 12 so just add the difference.
- Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
- } else {
- // Mask out lower bits, add stackalignment once plus the 12 bytes.
- Offset = ((~AlignMask) & Offset) + StackAlignment +
- (StackAlignment-SlotSize);
- }
- StackSize = Offset;
- }
- return StackSize;
-}
-
-/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-// following the call is a return. A function is eligible if caller/callee
-// calling conventions match, currently only fastcc supports tail calls, and the
-// function CALL is immediatly followed by a RET.
-bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
- SDOperand Ret,
- SelectionDAG& DAG) const {
- bool IsEligible = false;
-
- // Check whether CALL node immediatly preceeds the RET node and whether the
- // return uses the result of the node or is a void return.
- if ((Ret.getNumOperands() == 1 &&
- (Ret.getOperand(0)== SDOperand(Call.Val,1) ||
- Ret.getOperand(0)== SDOperand(Call.Val,0))) ||
- (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
- Ret.getOperand(1)== SDOperand(Call.Val,0))) {
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned CallerCC = MF.getFunction()->getCallingConv();
- unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
- if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
- SDOperand Callee = Call.getOperand(4);
- // On elf/pic %ebx needs to be livein.
- if(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- // Can only do local tail calls with PIC.
- GlobalValue * GV = 0;
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if(G != 0 &&
- (GV = G->getGlobal()) &&
- (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
- IsEligible=true;
- } else {
- IsEligible=true;
- }
- }
- }
- return IsEligible;
-}
-
-SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
- SelectionDAG &DAG,
- unsigned CC) {
- SDOperand Chain = Op.getOperand(0);
- bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
- SDOperand Callee = Op.getOperand(4);
- bool is64Bit = Subtarget->is64Bit();
-
- assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls.");
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if (is64Bit)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall);
-
-
- // Lower arguments at fp - stackoffset + fpdiff.
- MachineFunction &MF = DAG.getMachineFunction();
-
- unsigned NumBytesToBePushed =
- GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG);
-
- unsigned NumBytesCallerPushed =
- MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
- int FPDiff = NumBytesCallerPushed - NumBytesToBePushed;
-
- // Set the delta of movement of the returnaddr stackslot.
- // But only set if delta is greater than previous delta.
- if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
- MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
-
- // Adjust the ret address stack slot.
- if (FPDiff) {
- MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32;
- SDOperand RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
- RetAddrFrIdx =
- DAG.getLoad(VT, DAG.getEntryNode(),RetAddrFrIdx, NULL, 0);
- // Emit a store of the saved ret value to the new location.
- int SlotSize = is64Bit ? 8 : 4;
- int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
- Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
- }
-
- Chain = DAG.
- getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy()));
-
- SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
- SmallVector<SDOperand, 8> MemOpChains;
- SmallVector<SDOperand, 8> MemOpChains2;
- SDOperand FramePtr, StackPtr;
- SDOperand PtrOff;
- SDOperand FIN;
- int FI = 0;
-
- // Walk the register/memloc assignments, inserting copies/loads. Lower
- // arguments first to the stack slot where they would normally - in case of a
- // normal function call - be.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDOperand InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
- InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag = SDOperand();
- // Copy from stack slots to stack slot of a tail called function. This needs
- // to be done because if we would lower the arguments directly to their real
- // stack slot we might end up overwriting each other.
- // TODO: To make this more efficient (sometimes saving a store/load) we could
- // analyse the arguments and emit this store/load/store sequence only for
- // arguments which would be overwritten otherwise.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- if (!VA.isRegLoc()) {
- SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
- unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
-
- // Get source stack slot.
- SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
- PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
- // Create frame index.
- int32_t Offset = VA.getLocMemOffset()+FPDiff;
- uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
- FIN = DAG.getFrameIndex(FI, MVT::i32);
- if (Flags & ISD::ParamFlags::ByVal) {
- // Copy relative to framepointer.
- unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
- ISD::ParamFlags::ByValAlignOffs);
-
- unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
- ISD::ParamFlags::ByValSizeOffs;
-
- SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- // Copy relative to framepointer.
- MemOpChains2.push_back(DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, FIN,
- PtrOff, SizeNode, AlignNode));
- } else {
- SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0);
- // Store relative to framepointer.
- MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
- }
- }
- }
-
- if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOpChains2[0], MemOpChains.size());
-
- // ELF / PIC requires GOT in the EBX register before function calls via PLT
- // GOT pointer.
- // Does not work with tail call since ebx is not restored correctly by
- // tailcaller. TODO: at least for x86 - verify for x86-64
-
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- // We should use extra load for direct calls to dllimported functions in
- // non-JIT mode.
- if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
- getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
- else {
- assert(Callee.getOpcode() == ISD::LOAD &&
- "Function destination must be loaded into virtual register");
- unsigned Opc = is64Bit ? X86::R9 : X86::ECX;
-
- Chain = DAG.getCopyToReg(Chain,
- DAG.getRegister(Opc, getPointerTy()) ,
- Callee,InFlag);
- Callee = DAG.getRegister(Opc, getPointerTy());
- // Add register as live out.
- DAG.getMachineFunction().addLiveOut(Opc);
- }
-
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- SmallVector<SDOperand, 8> Ops;
-
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy()));
- Ops.push_back(DAG.getConstant(0, getPointerTy()));
- if (InFlag.Val)
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- // Returns a chain & a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
- if (InFlag.Val)
- Ops.push_back(InFlag);
- assert(InFlag.Val &&
- "Flag must be set. Depend on flag being set in LowerRET");
- Chain = DAG.getNode(X86ISD::TAILCALL,
- Op.Val->getVTList(), &Ops[0], Ops.size());
-
- return SDOperand(Chain.Val, Op.ResNo);
+// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly
+// be overwritten when lowering the outgoing arguments in a tail call. Currently
+// the implementation of this call is very conservative and assumes all
+// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
+// registers would be overwritten by direct lowering.
+// Possible improvement:
+// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
+// indicating inreg passed arguments which also need not be lowered to a safe
+// stack slot.
+static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) {
+ RegisterSDNode * OpReg = NULL;
+ if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
+ (Op.getOpcode()== ISD::CopyFromReg &&
+ (OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
+ OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
+ return true;
+ return false;
}
-//===----------------------------------------------------------------------===//
-// X86-64 C Calling Convention implementation
-//===----------------------------------------------------------------------===//
+// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+// by "Src" to address "Dst" with size and alignment information specified by
+// the specific parameter attribute. The copy will be passed as a byval function
+// parameter.
+static SDOperand
+CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
+ unsigned Flags, SelectionDAG &DAG) {
+ unsigned Align = 1 <<
+ ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs);
+ unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
+ ISD::ParamFlags::ByValSizeOffs;
+ SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
+ SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+}
+
+SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ SDOperand Root, unsigned i) {
+ // Create the nodes corresponding to a load from this parameter slot.
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
+ bool isByVal = Flags & ISD::ParamFlags::ByVal;
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This
+ // can be changed with more analysis.
+ int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
+ VA.getLocMemOffset(), !isByVal);
+ SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
+ if (isByVal)
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0);
+}
SDOperand
-X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
+X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ const Function* Fn = MF.getFunction();
+ if (Fn->hasExternalLinkage() &&
+ Subtarget->isTargetCygMing() &&
+ Fn->getName() == "main")
+ FuncInfo->setForceFramePointer(true);
+
+ // Decorate the function name.
+ FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
+
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
- unsigned CC= MF.getFunction()->getCallingConv();
+ unsigned CC = MF.getFunction()->getCallingConv();
+ bool Is64Bit = Subtarget->is64Bit();
- static const unsigned GPR64ArgRegs[] = {
- X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
- };
- static const unsigned XMMArgRegs[] = {
- X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
- X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
- };
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
-
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, isVarArg,
- getTargetMachine(), ArgLocs);
- if (CC == CallingConv::Fast && PerformTailCallOpt)
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
+ CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+ CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op));
SmallVector<SDOperand, 8> ArgValues;
unsigned LastVal = ~0U;
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
- else if (RegVT == MVT::i64)
+ else if (Is64Bit && RegVT == MVT::i64)
RC = X86::GR64RegisterClass;
- else if (RegVT == MVT::f32)
+ else if (Is64Bit && RegVT == MVT::f32)
RC = X86::FR32RegisterClass;
- else if (RegVT == MVT::f64)
+ else if (Is64Bit && RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
else {
assert(MVT::isVector(RegVT));
- if (MVT::getSizeInBits(RegVT) == 64) {
+ if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) {
RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
RegVT = MVT::i64;
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
// Handle MMX values passed in GPRs.
- if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
+ if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
MVT::getSizeInBits(RegVT) == 64)
ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
}
}
-
+
unsigned StackSize = CCInfo.getNextStackOffset();
- if (CC==CallingConv::Fast)
- StackSize =GetAlignedArgumentStackSize(StackSize, DAG);
-
+ // align stack specially for tail calls
+ if (CC == CallingConv::Fast)
+ StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- assert(CC!=CallingConv::Fast
- && "Var arg not supported with calling convention fastcc");
- unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
- unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-
- // For X86-64, if there are vararg parameters that are passed via
- // registers, then we must store them to their spots on the stack so they
- // may be loaded by deferencing the result of va_next.
- VarArgsGPOffset = NumIntRegs * 8;
- VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
- RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
-
- // Store the integer parameter registers.
- SmallVector<SDOperand, 8> MemOps;
- SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
- SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsGPOffset, getPointerTy()));
- for (; NumIntRegs != 6; ++NumIntRegs) {
- unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ if (Is64Bit || CC != CallingConv::X86_FastCall) {
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
}
-
- // Now store the XMM (fp + vector) parameter registers.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
- DAG.getConstant(VarArgsFPOffset, getPointerTy()));
- for (; NumXMMRegs != 8; ++NumXMMRegs) {
- unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
- X86::VR128RegisterClass);
- SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
- SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ if (Is64Bit) {
+ static const unsigned GPR64ArgRegs[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+
+ unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+
+ // For X86-64, if there are vararg parameters that are passed via
+ // registers, then we must store them to their spots on the stack so they
+ // may be loaded by deferencing the result of va_next.
+ VarArgsGPOffset = NumIntRegs * 8;
+ VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
+ RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
+
+ // Store the integer parameter registers.
+ SmallVector<SDOperand, 8> MemOps;
+ SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
+ SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsGPOffset));
+ for (; NumIntRegs != 6; ++NumIntRegs) {
+ unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
+ X86::GR64RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(8));
+ }
+
+ // Now store the XMM (fp + vector) parameter registers.
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
+ DAG.getIntPtrConstant(VarArgsFPOffset));
+ for (; NumXMMRegs != 8; ++NumXMMRegs) {
+ unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
+ X86::VR128RegisterClass);
+ SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
+ SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
+ DAG.getIntPtrConstant(16));
+ }
+ if (!MemOps.empty())
+ Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOps[0], MemOps.size());
}
- if (!MemOps.empty())
- Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
- &MemOps[0], MemOps.size());
}
+
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are
+ // aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (StackSize & 7) == 0)
+ StackSize += 4;
ArgValues.push_back(Root);
- // Tail call convention (fastcc) needs callee pop.
- if (CC == CallingConv::Fast && PerformTailCallOpt){
- BytesToPopOnReturn = StackSize; // Callee pops everything.
+
+ // Some CCs need callee pop.
+ if (IsCalleePop(Op)) {
+ BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
- BytesToPopOnReturn = 0; // Callee pops nothing.
+ BytesToPopOnReturn = 0; // Callee pops nothing.
+ // If this is an sret function, the return should pop the hidden pointer.
+ if (!Is64Bit && ArgsAreStructReturn(Op))
+ BytesToPopOnReturn = 4;
BytesCallerReserves = StackSize;
}
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+ if (!Is64Bit) {
+ RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
+ if (CC == CallingConv::X86_FastCall)
+ VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
+ }
+
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
}
SDOperand
-X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
- unsigned CC) {
+X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
+ const SDOperand &StackPtr,
+ const CCValAssign &VA,
+ SDOperand Chain,
+ SDOperand Arg) {
+ SDOperand PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ if (Flags & ISD::ParamFlags::ByVal) {
+ return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ }
+ return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
+}
+
+SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
SDOperand Chain = Op.getOperand(0);
+ unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
+ bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0
+ && CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
-
+ bool Is64Bit = Subtarget->is64Bit();
+
+ assert(!(isVarArg && CC == CallingConv::Fast) &&
+ "Var args not supported with calling convention fastcc");
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- if (CC==CallingConv::Fast)
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall);
- else
- CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
-
+ CCInfo.AnalyzeCallOperands(Op.Val, CCAssignFnForNode(Op));
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (CC == CallingConv::Fast)
- NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG);
+ NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
+ // Make sure the instruction takes 8n+4 bytes to make sure the start of the
+ // arguments and the arguments after the retaddr has been pushed are aligned.
+ if (!Is64Bit && CC == CallingConv::X86_FastCall &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
+ (NumBytes & 7) == 0)
+ NumBytes += 4;
+
+ int FPDiff = 0;
+ if (IsTailCall) {
+ // Lower arguments at fp - stackoffset + fpdiff.
+ unsigned NumBytesCallerPushed =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+ FPDiff = NumBytesCallerPushed - NumBytes;
+
+ // Set the delta of movement of the returnaddr stackslot.
+ // But only set if delta is greater than previous delta.
+ if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+ MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+ }
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
+
+ SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
+ if (IsTailCall) {
+ // Adjust the Return address stack slot.
+ if (FPDiff) {
+ MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
+ RetAddrFrIdx =
+ DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = SDOperand(RetAddrFrIdx.Val, 1);
+ }
+ }
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
-
- // Walk the register/memloc assignments, inserting copies/loads.
+
+ // Walk the register/memloc assignments, inserting copies/loads. For tail
+ // calls, lower arguments which could otherwise be possibly overwritten to the
+ // stack slot where they would go on normal function calls.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- assert(VA.isMemLoc());
- if (StackPtr.Val == 0)
- StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
-
- MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
- Arg));
+ if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ assert(VA.isMemLoc());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+
+ MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
+ Arg));
+ }
}
}
InFlag = Chain.getValue(1);
}
- if (isVarArg) {
- assert ( CallingConv::Fast != CC &&
- "Var args not supported with calling convention fastcc");
+ if (IsTailCall)
+ InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output?
+
+ // ELF / PIC requires GOT in the EBX register before function calls via PLT
+ // GOT pointer.
+ // Does not work with tail call since ebx is not restored correctly by
+ // tailcaller. TODO: at least for x86 - verify for x86-64
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT()) {
+ Chain = DAG.getCopyToReg(Chain, X86::EBX,
+ DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
InFlag = Chain.getValue(1);
}
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (IsTailCall) {
+ SmallVector<SDOperand, 8> MemOpChains2;
+ SDOperand FIN;
+ int FI = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (!VA.isRegLoc()) {
+ assert(VA.isMemLoc());
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+ SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
+ unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
+ SDOperand Source = Arg;
+ if (IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
+ // Copy from stack slots to stack slot of a tail called function. This
+ // needs to be done because if we would lower the arguments directly
+ // to their real stack slot we might end up overwriting each other.
+ // Get source stack slot.
+ Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+ Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
+ if ((Flags & ISD::ParamFlags::ByVal)==0)
+ Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0);
+ }
+
+ if (Flags & ISD::ParamFlags::ByVal) {
+ // Copy relative to framepointer.
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
+ Flags, DAG));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
+ }
+ }
+ }
+
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &MemOpChains2[0], MemOpChains2.size());
+
+ // Store the return address to the appropriate stack slot.
+ if (FPDiff)
+ Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
+ }
+
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
- if (getTargetMachine().getCodeModel() != CodeModel::Large
+ if ((IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- if (getTargetMachine().getCodeModel() != CodeModel::Large)
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ if (IsTailCall || !Is64Bit ||
+ getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ } else if (IsTailCall) {
+ assert(Callee.getOpcode() == ISD::LOAD &&
+ "Function destination must be loaded into virtual register");
+ unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
+ Chain = DAG.getCopyToReg(Chain,
+ DAG.getRegister(Opc, getPointerTy()) ,
+ Callee,InFlag);
+ Callee = DAG.getRegister(Opc, getPointerTy());
+ // Add register as live out.
+ DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
+ }
+
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
+
+ if (IsTailCall) {
+ Ops.push_back(Chain);
+ Ops.push_back(DAG.getIntPtrConstant(NumBytes));
+ Ops.push_back(DAG.getIntPtrConstant(0));
+ if (InFlag.Val)
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Returns a chain & a flag for retval copy to use.
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ Ops.clear();
+ }
+
Ops.push_back(Chain);
Ops.push_back(Callee);
+ if (IsTailCall)
+ Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+ // Add an implicit use GOT pointer in EBX.
+ if (!IsTailCall && !Is64Bit &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ Subtarget->isPICStyleGOT())
+ Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
-
+
if (InFlag.Val)
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::CALL,
- NodeTys, &Ops[0], Ops.size());
+ if (IsTailCall) {
+ assert(InFlag.Val &&
+ "Flag must be set. Depend on flag being set in LowerRET");
+ Chain = DAG.getNode(X86ISD::TAILCALL,
+ Op.Val->getVTList(), &Ops[0], Ops.size());
+
+ return SDOperand(Chain.Val, Op.ResNo);
+ }
+
+ Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
- int NumBytesForCalleeToPush = 0;
- if (CC==CallingConv::Fast) {
- NumBytesForCalleeToPush = NumBytes; // Callee pops everything
-
- } else {
+
+ // Create the CALLSEQ_END node.
+ unsigned NumBytesForCalleeToPush;
+ if (IsCalleePop(Op))
+ NumBytesForCalleeToPush = NumBytes; // Callee pops everything
+ else if (!Is64Bit && CallIsStructReturn(Op))
+ // If this is is a call to a struct-return function, the callee
+ // pops the hidden struct pointer, so we have to push it back.
+ // This is common for Darwin/X86, Linux & Mingw32 targets.
+ NumBytesForCalleeToPush = 4;
+ else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
- }
+
// Returns a flag for retval copy to use.
- NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
- Ops.clear();
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
- Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
- Ops.push_back(InFlag);
- Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(NumBytes),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush),
+ InFlag);
InFlag = Chain.getValue(1);
-
+
// Handle result values, copying them out of physregs into vregs that we
// return.
return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
}
+//===----------------------------------------------------------------------===//
+// Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+// Like std call, callee cleans arguments, convention except that ECX is
+// reserved for storing the tail called function address. Only 2 registers are
+// free for argument passing (inreg). Tail call optimization is performed
+// provided:
+// * tailcallopt is enabled
+// * caller/callee are fastcc
+// * elf/pic is disabled OR
+// * elf/pic enabled + callee is in module + callee has
+// visibility protected or hidden
+// To keep the stack aligned according to platform abi the function
+// GetAlignedArgumentStackSize ensures that argument delta is always multiples
+// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+// If a tail called function callee has more arguments than the caller the
+// caller needs to make sure that there is room to move the RETADDR to. This is
+// achieved by reserving an area the size of the argument delta right after the
+// original REtADDR, but before the saved framepointer or the spilled registers
+// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+// stack layout:
+// arg1
+// arg2
+// RETADDR
+// [ new RETADDR
+// move area ]
+// (possible EBP)
+// ESI
+// EDI
+// local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+ SelectionDAG& DAG) {
+ if (PerformTailCallOpt) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ uint64_t AlignMask = StackAlignment - 1;
+ int64_t Offset = StackSize;
+ unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
+ if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+ // Number smaller than 12 so just add the difference.
+ Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+ } else {
+ // Mask out lower bits, add stackalignment once plus the 12 bytes.
+ Offset = ((~AlignMask) & Offset) + StackAlignment +
+ (StackAlignment-SlotSize);
+ }
+ StackSize = Offset;
+ }
+ return StackSize;
+}
+
+/// IsEligibleForTailCallElimination - Check to see whether the next instruction
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
+ SDOperand Ret,
+ SelectionDAG& DAG) const {
+ if (!PerformTailCallOpt)
+ return false;
+
+ // Check whether CALL node immediatly preceeds the RET node and whether the
+ // return uses the result of the node or is a void return.
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0))) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned CallerCC = MF.getFunction()->getCallingConv();
+ unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ SDOperand Callee = Call.getOperand(4);
+ // On elf/pic %ebx needs to be livein.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT())
+ return true;
+
+ // Can only do local tail calls with PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
- if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ if (NumElts != 2 && NumElts != 4)
return false;
if (!isUndefOrEqual(Elts[0], NumElts))
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
- if (Val > 4)
+ if (Val >= 4)
return false;
}
return true;
}
-/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as
/// values in ther permute mask.
static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
SDOperand &V2, SDOperand &Mask,
}
std::swap(V1, V2);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static
+SDOperand CommuteVectorShuffleMask(SDOperand Mask, SelectionDAG &DAG) {
+ MVT::ValueType MaskVT = Mask.getValueType();
+ MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT);
+ unsigned NumElems = Mask.getNumOperands();
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDOperand Arg = Mask.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
+ continue;
+ }
+ assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
+ unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
+ if (Val < NumElems)
+ MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
+ else
+ MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], NumElems);
+}
+
+
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
- if (Arg.getOpcode() != ISD::UNDEF) {
- unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
- if (Idx < NumElems) {
- unsigned Opc = V1.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V1.Val->getOperand(Idx)))
- return false;
- } else if (Idx >= NumElems) {
- unsigned Opc = V2.Val->getOpcode();
- if (Opc == ISD::UNDEF)
- continue;
- if (Opc != ISD::BUILD_VECTOR ||
- !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
- return false;
- }
+ if (Arg.getOpcode() == ISD::UNDEF)
+ continue;
+
+ unsigned Idx = cast<ConstantSDNode>(Arg)->getValue();
+ if (Idx < NumElems) {
+ unsigned Opc = V1.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V1.Val->getOperand(Idx)))
+ return false;
+ } else if (Idx >= NumElems) {
+ unsigned Opc = V2.Val->getOpcode();
+ if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.Val))
+ continue;
+ if (Opc != ISD::BUILD_VECTOR ||
+ !isZeroNode(V2.Val->getOperand(Idx - NumElems)))
+ return false;
}
}
return true;
}
-/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
+ assert(MVT::isVector(VT) && "Expected a vector type");
+
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
///
-static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
+static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
- unsigned NumElems = MVT::getVectorNumElements(VT);
- MVT::ValueType EVT = MVT::getVectorElementType(VT);
- bool isFP = MVT::isFloatingPoint(EVT);
- SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
- return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
+
+ // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+ // type. This ensures they get CSE'd.
+ SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
+ SDOperand Vec;
+ if (MVT::getSizeInBits(VT) == 64) // MMX
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ else // SSE
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
+
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- Mask = getZeroVector(MaskVT, DAG);
+ Mask = getZeroVector(MVT::v4i32, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector.
+/// vector of zero or undef vector. This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
- SDOperand Zero = DAG.getConstant(0, EVT);
- SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
- MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
+ SmallVector<SDOperand, 16> MaskVec;
+ for (unsigned i = 0; i != NumElems; ++i)
+ if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
+ MaskVec.push_back(DAG.getConstant(NumElems, EVT));
+ else
+ MaskVec.push_back(DAG.getConstant(i, EVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
- DAG.getConstant(i/2, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i/2));
}
}
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
- DAG.getConstant(i, TLI.getPointerTy()));
+ DAG.getIntPtrConstant(i));
}
}
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
- // All zero's are handled with pxor.
- if (ISD::isBuildVectorAllZeros(Op.Val))
- return Op;
+ // All zero's are handled with pxor, all one's are handled with pcmpeqd.
+ if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
+ // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
+ // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+ // eliminated on x86-32 hosts.
+ if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
+ return Op;
- // All one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllOnes(Op.Val))
- return Op;
+ if (ISD::isBuildVectorAllOnes(Op.Val))
+ return getOnesVector(Op.getValueType(), DAG);
+ return getZeroVector(Op.getValueType(), DAG);
+ }
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
- unsigned NumNonZeroImms = 0;
- std::set<SDOperand> Values;
+ bool HasNonImms = false;
+ SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
- if (Elt.getOpcode() != ISD::UNDEF) {
- Values.insert(Elt);
- if (isZeroNode(Elt))
- NumZero++;
- else {
- NonZeros |= (1 << i);
- NumNonZero++;
- if (Elt.getOpcode() == ISD::Constant ||
- Elt.getOpcode() == ISD::ConstantFP)
- NumNonZeroImms++;
- }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Values.insert(Elt);
+ if (Elt.getOpcode() != ISD::Constant &&
+ Elt.getOpcode() != ISD::ConstantFP)
+ HasNonImms = true;
+ if (isZeroNode(Elt))
+ NumZero++;
+ else {
+ NonZeros |= (1 << i);
+ NumNonZero++;
}
}
if (NumNonZero == 0) {
- if (NumZero == 0)
- // All undef vector. Return an UNDEF.
- return DAG.getNode(ISD::UNDEF, VT);
- else
- // A mix of zero and undef. Return a zero vector.
- return getZeroVector(VT, DAG);
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
return SDOperand();
// Special case for single non-zero element.
- if (NumNonZero == 1) {
+ if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
+ else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
+ return SDOperand();
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
- if (NumNonZero == NumNonZeroImms)
+ if (!HasNonImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
return SDOperand();
}
+static
+SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ SDOperand NewV;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
+ MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
+ MVT::ValueType PtrVT = TLI.getPointerTy();
+ SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
+ PermMask.Val->op_end());
+
+ // First record which half of which vector the low elements come from.
+ SmallVector<unsigned, 4> LowQuad(4);
+ for (unsigned i = 0; i < 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++LowQuad[QuadIdx];
+ }
+ int BestLowQuad = -1;
+ unsigned MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (LowQuad[i] > MaxQuad) {
+ BestLowQuad = i;
+ MaxQuad = LowQuad[i];
+ }
+ }
+
+ // Record which half of which vector the high elements come from.
+ SmallVector<unsigned, 4> HighQuad(4);
+ for (unsigned i = 4; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ int QuadIdx = EltIdx / 4;
+ ++HighQuad[QuadIdx];
+ }
+ int BestHighQuad = -1;
+ MaxQuad = 1;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (HighQuad[i] > MaxQuad) {
+ BestHighQuad = i;
+ MaxQuad = HighQuad[i];
+ }
+ }
+
+ // If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
+ if (BestLowQuad != -1 || BestHighQuad != -1) {
+ // First sort the 4 chunks in order using shufpd.
+ SmallVector<SDOperand, 8> MaskVec;
+ if (BestLowQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ if (BestHighQuad != -1)
+ MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
+ NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
+
+ // Now sort high and low parts separately.
+ BitVector InOrder(8);
+ if (BestLowQuad != -1) {
+ // Sort lower half in order using PSHUFLW.
+ MaskVec.clear();
+ bool AnyOutOrder = false;
+ for (unsigned i = 0; i != 4; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestLowQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ for (unsigned i = 4; i != 8; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ if (BestHighQuad != -1) {
+ // Sort high half in order using PSHUFHW if possible.
+ MaskVec.clear();
+ for (unsigned i = 0; i != 4; ++i)
+ MaskVec.push_back(DAG.getConstant(i, MaskEVT));
+ bool AnyOutOrder = false;
+ for (unsigned i = 4; i != 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(Elt);
+ InOrder.set(i);
+ } else {
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx != i)
+ AnyOutOrder = true;
+ MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
+ // If this element is in the right place after this shuffle, then
+ // remember it.
+ if ((int)(EltIdx / 4) == BestHighQuad)
+ InOrder.set(i);
+ }
+ }
+ if (AnyOutOrder) {
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
+ }
+ }
+
+ // The other elements are put in the right place using pextrw and pinsrw.
+ for (unsigned i = 0; i != 8; ++i) {
+ if (InOrder[i])
+ continue;
+ SDOperand Elt = MaskElts[i];
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i)
+ continue;
+ SDOperand ExtOp = (EltIdx < 8)
+ ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT))
+ : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+
+ // PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
+ ///as few as possible.
+ // First, let's find out how many elements are already in the right order.
+ unsigned V1InOrder = 0;
+ unsigned V1FromV1 = 0;
+ unsigned V2InOrder = 0;
+ unsigned V2FromV2 = 0;
+ SmallVector<SDOperand, 8> V1Elts;
+ SmallVector<SDOperand, 8> V2Elts;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = MaskElts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(Elt);
+ ++V1InOrder;
+ ++V2InOrder;
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx == i) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
+ ++V1InOrder;
+ } else if (EltIdx == i+8) {
+ V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i, MaskEVT));
+ ++V2InOrder;
+ } else if (EltIdx < 8) {
+ V1Elts.push_back(Elt);
+ ++V1FromV1;
+ } else {
+ V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
+ ++V2FromV2;
+ }
+ }
+
+ if (V2InOrder > V1InOrder) {
+ PermMask = CommuteVectorShuffleMask(PermMask, DAG);
+ std::swap(V1, V2);
+ std::swap(V1Elts, V2Elts);
+ std::swap(V1FromV1, V2FromV2);
+ }
+
+ if ((V1FromV1 + V1InOrder) != 8) {
+ // Some elements are from V2.
+ if (V1FromV1) {
+ // If there are elements that are from V1 but out of place,
+ // then first sort them in place
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF) {
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ continue;
+ }
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx >= 8)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
+ else
+ MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
+ }
+ SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
+ }
+
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (EltIdx < 8)
+ continue;
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
+ DAG.getConstant(EltIdx - 8, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ } else {
+ // All elements are from V1.
+ NewV = V1;
+ for (unsigned i = 0; i < 8; ++i) {
+ SDOperand Elt = V1Elts[i];
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
+ DAG.getConstant(EltIdx, PtrVT));
+ NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
+ DAG.getConstant(i, PtrVT));
+ }
+ return NewV;
+ }
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
+static
+SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
+ MVT::ValueType VT,
+ SDOperand PermMask, SelectionDAG &DAG,
+ TargetLowering &TLI) {
+ unsigned NumElems = PermMask.getNumOperands();
+ unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+ MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ MVT::ValueType NewVT = MaskVT;
+ switch (VT) {
+ case MVT::v4f32: NewVT = MVT::v2f64; break;
+ case MVT::v4i32: NewVT = MVT::v2i64; break;
+ case MVT::v8i16: NewVT = MVT::v4i32; break;
+ case MVT::v16i8: NewVT = MVT::v4i32; break;
+ default: assert(false && "Unexpected!");
+ }
+
+ if (NewWidth == 2)
+ if (MVT::isInteger(VT))
+ NewVT = MVT::v2i64;
+ else
+ NewVT = MVT::v2f64;
+ unsigned Scale = NumElems / NewWidth;
+ SmallVector<SDOperand, 8> MaskVec;
+ for (unsigned i = 0; i < NumElems; i += Scale) {
+ unsigned StartIdx = ~0U;
+ for (unsigned j = 0; j < Scale; ++j) {
+ SDOperand Elt = PermMask.getOperand(i+j);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
+ if (StartIdx == ~0U)
+ StartIdx = EltIdx - (EltIdx % Scale);
+ if (EltIdx != StartIdx + j)
+ return SDOperand();
+ }
+ if (StartIdx == ~0U)
+ MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
+ else
+ MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
+ &MaskVec[0], MaskVec.size()));
+}
+
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
return PromoteSplat(Op, DAG);
}
+ // If the shuffle can be profitably rewritten as a narrower shuffle, then
+ // do it!
+ if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ // FIXME: Figure out a cleaner way to do this.
+ // Try to make use of movq to zero out the top part.
+ if (ISD::isBuildVectorAllZeros(V2.Val)) {
+ SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val) {
+ SDOperand NewV1 = NewOp.getOperand(0);
+ SDOperand NewV2 = NewOp.getOperand(1);
+ SDOperand NewMask = NewOp.getOperand(2);
+ if (isCommutedMOVL(NewMask.Val, true, false)) {
+ NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
+ NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
+ NewV1, NewV2, getMOVLMask(2, DAG));
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+ } else if (ISD::isBuildVectorAllZeros(V1.Val)) {
+ SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
+ if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
+ return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
+ }
+ }
+
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
+ // FIXME: This should also accept a bitcast of a splat? Be careful, not
+ // 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
+
+ // Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
+ // FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
-
- // Handle v8i16 shuffle high / low shuffle node pair.
- if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size());
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- MaskVec.clear();
- for (unsigned i = 0; i != 4; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- for (unsigned i = 4; i != 8; ++i)
- MaskVec.push_back(PermMask.getOperand(i));
- Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
- }
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
}
}
- if (NumElems == 4 &&
- // Don't do this for MMX.
- MVT::getSizeInBits(VT) != 64) {
+ // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+ if (VT == MVT::v8i16) {
+ SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
+ if (NewOp.Val)
+ return NewOp;
+ }
+
+ // Handle all 4 wide cases with a number of shuffles.
+ if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ // Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
- SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
- SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask1(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
+ SmallVector<SDOperand, 8> Mask2(NumElems,
+ DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
+ SDOperand Vec = Op.getOperand(0);
+ unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
+ if (Idx == 0)
+ return DAG.getNode(ISD::TRUNCATE, MVT::i16,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
+ DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
+ Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
- SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
+ SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
return SDOperand();
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
- // Transform it so it match pinsrw which expects a 16-bit value in a GR32
- // as its second argument.
MVT::ValueType VT = Op.getValueType();
- MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
+ MVT::ValueType EVT = MVT::getVectorElementType(VT);
+ if (EVT == MVT::i8)
+ return SDOperand();
+
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
- if (MVT::getSizeInBits(BaseVT) == 16) {
+
+ if (MVT::getSizeInBits(EVT) == 16) {
+ // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+ // as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
- N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
+ N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
- } else if (MVT::getSizeInBits(BaseVT) == 32) {
- unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
- if (Idx == 0) {
- // Use a movss.
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
- MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
- MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
- SmallVector<SDOperand, 8> MaskVec;
- MaskVec.push_back(DAG.getConstant(4, BaseVT));
- for (unsigned i = 1; i <= 3; ++i)
- MaskVec.push_back(DAG.getConstant(i, BaseVT));
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
- DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
- &MaskVec[0], MaskVec.size()));
- } else {
- // Use two pinsrw instructions to insert a 32 bit value.
- Idx <<= 1;
- if (MVT::isFloatingPoint(N1.getValueType())) {
- N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
- N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
- N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
- DAG.getConstant(0, getPointerTy()));
- }
- N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx, getPointerTy()));
- N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
- N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
- DAG.getConstant(Idx+1, getPointerTy()));
- return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
- }
}
-
return SDOperand();
}
return Result;
}
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- "Not an i64 shift!");
- bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
- SDOperand ShOpLo = Op.getOperand(0);
- SDOperand ShOpHi = Op.getOperand(1);
- SDOperand ShAmt = Op.getOperand(2);
- SDOperand Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
- DAG.getConstant(0, MVT::i32);
-
- SDOperand Tmp2, Tmp3;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
- } else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
- }
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ "Not an i64 shift!");
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDOperand ShOpLo = Op.getOperand(0);
+ SDOperand ShOpHi = Op.getOperand(1);
+ SDOperand ShAmt = Op.getOperand(2);
+ SDOperand Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
+ DAG.getConstant(0, MVT::i32);
+
+ SDOperand Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ }
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
- DAG.getConstant(32, MVT::i8));
- SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
- AndNode, DAG.getConstant(0, MVT::i8));
-
- SDOperand Hi, Lo;
- SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- unsigned Opc = X86ISD::CMOV;
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
- SmallVector<SDOperand, 4> Ops;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- } else {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- }
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDOperand Hi, Lo;
+ SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
- Ops.push_back(Lo);
- Ops.push_back(Hi);
- return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ } else {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ }
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ Ops.clear();
+ Ops.push_back(Lo);
+ Ops.push_back(Hi);
+ return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
StackSlot, NULL, 0);
// These are really Legal; caller falls through into that case.
- if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
+ if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Result;
- if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
- return Result;
- if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
+ if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
return Result;
// Build the FILD
SDVTList Tys;
- bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
- (X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
+ bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
return Result;
}
-SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+std::pair<SDOperand,SDOperand> X86TargetLowering::
+FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
- // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
- // stack slot.
- SDOperand Result;
- MachineFunction &MF = DAG.getMachineFunction();
- unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
- int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
- SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
- X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
- return Result;
- if (Op.getValueType() == MVT::i32 &&
- X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
- return Result;
+ isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+ return std::make_pair(SDOperand(), SDOperand());
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
- return Result;
+ return std::make_pair(SDOperand(), SDOperand());
+ // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+ // stack slot.
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType()) {
- default: assert(0 && "Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ default: assert(0 && "Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
- if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
- (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
+ if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
+ return std::make_pair(FIST, StackSlot);
+}
+
+SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return SDOperand();
+
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
}
+SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
+ std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
+ SDOperand FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.Val == 0) return 0;
+
+ // Return an i64 load from the stack slot.
+ SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);
+
+ // Use a MERGE_VALUES node to drop the chain result value.
+ return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
+}
+
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EltVT = VT;
SrcVT = VT;
SrcTy = MVT::getTypeForValueType(SrcVT);
}
+ // And if it is bigger, shrink it first.
+ if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
+ SrcVT = VT;
+ SrcTy = MVT::getTypeForValueType(SrcVT);
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
- DAG.getConstant(0, getPointerTy()));
+ DAG.getIntPtrConstant(0));
}
// Clear first operand sign bit.
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT::ValueType VT = Op.getValueType();
+
bool IllegalFPCMov = false;
- if (VT == MVT::f32 && !X86ScalarSSEf32)
- IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
- else if (VT == MVT::f64 && !X86ScalarSSEf64)
+ if (MVT::isFloatingPoint(VT) && !MVT::isVector(VT) &&
+ !isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
Chain, Op.getOperand(2), CC, Cond);
}
-SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
- unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
-
- if (Subtarget->is64Bit())
- if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
- return LowerX86_TailCallTo(Op, DAG, CallingConv);
- else
- return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
- else
- switch (CallingConv) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- if (isTailCall && PerformTailCallOpt)
- return LowerX86_TailCallTo(Op, DAG, CallingConv);
- else
- return LowerCCCCallTo(Op,DAG, CallingConv);
- case CallingConv::C:
- case CallingConv::X86_StdCall:
- return LowerCCCCallTo(Op, DAG, CallingConv);
- case CallingConv::X86_FastCall:
- return LowerFastCCCallTo(Op, DAG, CallingConv);
- }
-}
-
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
SDOperand Flag;
MVT::ValueType IntPtr = getPointerTy();
- MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ MVT::ValueType SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand
-X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
- MachineFunction &MF = DAG.getMachineFunction();
- const Function* Fn = MF.getFunction();
- if (Fn->hasExternalLinkage() &&
- Subtarget->isTargetCygMing() &&
- Fn->getName() == "main")
- MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
-
- unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
- if (Subtarget->is64Bit())
- return LowerX86_64CCCArguments(Op, DAG);
- else
- switch(CC) {
- default:
- assert(0 && "Unsupported calling convention");
- case CallingConv::Fast:
- return LowerCCCArguments(Op,DAG, true);
- // Falls through
- case CallingConv::C:
- return LowerCCCArguments(Op, DAG);
- case CallingConv::X86_StdCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
- return LowerCCCArguments(Op, DAG, true);
- case CallingConv::X86_FastCall:
- MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
- return LowerFastCCArguments(Op, DAG);
- }
-}
-
SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
SDOperand InFlag(0, 0);
SDOperand Chain = Op.getOperand(0);
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
if (AVT > MVT::i8) {
if (I) {
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
+ Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
BytesLeft = I->getValue() % UBytes;
} else {
assert(AVT >= MVT::i32 &&
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
- SDOperand ChainOp = Op.getOperand(0);
- SDOperand DestOp = Op.getOperand(1);
- SDOperand SourceOp = Op.getOperand(2);
- SDOperand CountOp = Op.getOperand(3);
- SDOperand AlignOp = Op.getOperand(4);
- unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
- if (Align == 0) Align = 1;
-
- // The libc version is likely to be faster for the following cases. It can
- // use the address value and run time information about the CPU.
- // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-
- // If not DWORD aligned, call memcpy.
- if ((Align & 3) != 0)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is unknown, call memcpy.
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
- if (!I)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is more than the threshold, call memcpy.
- unsigned Size = I->getValue();
- if (Size > Subtarget->getMinRepStrSizeThreshold())
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
-}
-
-SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- SDOperand Count,
- SelectionDAG &DAG) {
- MVT::ValueType IntPtr = getPointerTy();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = getTargetData()->getIntPtrType();
- Entry.Node = Dest; Args.push_back(Entry);
- Entry.Node = Source; Args.push_back(Entry);
- Entry.Node = Count; Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
- DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
- return CallResult.second;
-}
-
SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
SDOperand Dest,
SDOperand Source,
}
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
+ SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
BytesLeft = Size % UBytes;
SDOperand InFlag(0, 0);
return Chain;
}
-SDOperand
-X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
+/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
+SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDOperand TheOp = Op.getOperand(0);
- SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
+ SDOperand TheChain = N->getOperand(0);
+ SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
if (Subtarget->is64Bit()) {
- SDOperand Copy1 =
- DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
- MVT::i64, Copy1.getValue(2));
- SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
+ SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
+ SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX,
+ MVT::i64, rax.getValue(2));
+ SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
DAG.getConstant(32, MVT::i8));
SDOperand Ops[] = {
- DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
+ DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
};
Tys = DAG.getVTList(MVT::i64, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
- SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
- SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
- MVT::i32, Copy1.getValue(2));
- SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
- Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
+ SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
+ SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX,
+ MVT::i32, eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDOperand Ops[] = { eax, edx };
+ Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2);
+
+ // Use a MERGE_VALUES to return the value and chain.
+ Ops[1] = edx.getValue(1);
+ Tys = DAG.getVTList(MVT::i64, MVT::Other);
+ return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}
SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0),
DAG.getConstant(VarArgsFPOffset, MVT::i32),
FIN, SV->getValue(), SV->getOffset());
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(4));
SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
SV->getOffset());
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
+ FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
SV->getOffset());
if (i == 2)
break;
SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
- DAG.getConstant(8, getPointerTy()));
+ DAG.getIntPtrConstant(8));
}
return Chain;
}
SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
- DAG.getConstant(4, getPointerTy()));
+ DAG.getIntPtrConstant(4));
}
SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
if (Subtarget->is64Bit())
return SDOperand();
- return DAG.getConstant(8, getPointerTy());
+ return DAG.getIntPtrConstant(8);
}
SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
getPointerTy());
SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
- DAG.getConstant(-4UL, getPointerTy()));
+ DAG.getIntPtrConstant(-4UL));
StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
- MF.addLiveOut(X86::ECX);
+ MF.getRegInfo().addLiveOut(X86::ECX);
return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
Chain, DAG.getRegister(X86::ECX, getPointerTy()));
SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));
+ const X86InstrInfo *TII =
+ ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+
if (Subtarget->is64Bit()) {
- return SDOperand(); // not yet supported
+ SDOperand OutChains[6];
+
+ // Large code-model.
+
+ const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r);
+ const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri);
+
+ const unsigned char N86R10 =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R10);
+ const unsigned char N86R11 =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(X86::R11);
+
+ const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+ // Load the pointer to the nested function into R11.
+ unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+ SDOperand Addr = Trmp;
+ OutChains[0] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset());
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64));
+ OutChains[1] = DAG.getStore(Root, FPtr, Addr, TrmpSV->getValue(),
+ TrmpSV->getOffset() + 2, false, 2);
+
+ // Load the 'nest' parameter value into R10.
+ // R10 is specified in X86CallingConv.td
+ OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64));
+ OutChains[2] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 10);
+
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64));
+ OutChains[3] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
+ TrmpSV->getOffset() + 12, false, 2);
+
+ // Jump to the nested function.
+ OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64));
+ OutChains[4] = DAG.getStore(Root, DAG.getConstant(OpCode, MVT::i16), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 20);
+
+ unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+ Addr = DAG.getNode(ISD::ADD, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64));
+ OutChains[5] = DAG.getStore(Root, DAG.getConstant(ModRM, MVT::i8), Addr,
+ TrmpSV->getValue(), TrmpSV->getOffset() + 22);
+
+ SDOperand Ops[] =
+ { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 6) };
+ return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
} else {
Function *Func = (Function *)
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
default:
assert(0 && "Unsupported calling convention");
case CallingConv::C:
- case CallingConv::Fast:
case CallingConv::X86_StdCall: {
// Pass 'nest' parameter in ECX.
// Must be kept in sync with X86CallingConv.td
// Check that ECX wasn't needed by an 'inreg' parameter.
const FunctionType *FTy = Func->getFunctionType();
- const ParamAttrsList *Attrs = FTy->getParamAttrs();
+ const ParamAttrsList *Attrs = Func->getParamAttrs();
if (Attrs && !Func->isVarArg()) {
unsigned InRegCount = 0;
break;
}
- const X86InstrInfo *TII =
- ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
-
SDOperand OutChains[4];
SDOperand Addr, Disp;
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);
- unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
- unsigned char N86Reg = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
+ const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
+ const unsigned char N86Reg =
+ ((X86RegisterInfo*)RegInfo)->getX86RegNum(NestReg);
OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, TrmpSV->getValue(), TrmpSV->getOffset());
OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
TrmpSV->getOffset() + 1, false, 1);
- unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
+ const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
TrmpSV->getValue() + 5, TrmpSV->getOffset());
}
}
+SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetMachine &TM = MF.getTarget();
+ const TargetFrameInfo &TFI = *TM.getFrameInfo();
+ unsigned StackAlignment = TFI.getStackAlignment();
+ MVT::ValueType VT = Op.getValueType();
+
+ // Save FP Control Word to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+ SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other,
+ DAG.getEntryNode(), StackSlot);
+
+ // Load FP Control Word from stack slot
+ SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0);
+
+ // Transform as necessary
+ SDOperand CWD1 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x800, MVT::i16)),
+ DAG.getConstant(11, MVT::i8));
+ SDOperand CWD2 =
+ DAG.getNode(ISD::SRL, MVT::i16,
+ DAG.getNode(ISD::AND, MVT::i16,
+ CWD, DAG.getConstant(0x400, MVT::i16)),
+ DAG.getConstant(9, MVT::i8));
+
+ SDOperand RetVal =
+ DAG.getNode(ISD::AND, MVT::i16,
+ DAG.getNode(ISD::ADD, MVT::i16,
+ DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2),
+ DAG.getConstant(1, MVT::i16)),
+ DAG.getConstant(3, MVT::i16));
+
+
+ return DAG.getNode((MVT::getSizeInBits(VT) < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
+}
+
+SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, VTs, Op);
+
+ // If src is zero (i.e. bsr sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits+NumBits-1, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ // Finally xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
+}
+
+SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
+ MVT::ValueType VT = Op.getValueType();
+ MVT::ValueType OpVT = VT;
+ unsigned NumBits = MVT::getSizeInBits(VT);
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
+ }
+
+ // Issue a bsf (scan bits forward) which also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSF, VTs, Op);
+
+ // If src is zero (i.e. bsf sets ZF), returns NumBits.
+ SmallVector<SDOperand, 4> Ops;
+ Ops.push_back(Op);
+ Ops.push_back(DAG.getConstant(NumBits, OpVT));
+ Ops.push_back(DAG.getConstant(X86::COND_E, MVT::i8));
+ Ops.push_back(Op.getValue(1));
+ Op = DAG.getNode(X86ISD::CMOV, OpVT, &Ops[0], 4);
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
+ return Op;
+}
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
case ISD::MEMSET: return LowerMEMSET(Op, DAG);
case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
- case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
+ case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+
+ // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands.
+ case ISD::READCYCLECOUNTER:
+ return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0);
+ }
+}
+
+/// ExpandOperation - Provide custom lowering hooks for expanding operations.
+SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {
+ switch (N->getOpcode()) {
+ default: assert(0 && "Should not custom lower this!");
+ case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
+ case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
}
- return SDOperand();
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
+ case X86ISD::BSF: return "X86ISD::BSF";
+ case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
+ case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
}
}
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
+ MVT::ValueType VT2) const {
+ if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2))
+ return false;
+ unsigned NumBits1 = MVT::getSizeInBits(VT1);
+ unsigned NumBits2 = MVT::getSizeInBits(VT2);
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
// Load the old value of the high byte of the control word...
unsigned OldCW =
- F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
+ F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
// Set the high part to be round to zero...
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
- AM.Base.FrameIndex = Op.getFrameIndex();
+ AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImmediate())
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
- break;
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
}
}