setOperationAction(ISD::SUB, MVT::v8i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i16, Legal);
setOperationAction(ISD::SUB, MVT::v2i32, Legal);
+ setOperationAction(ISD::SUB, MVT::v1i64, Legal);
setOperationAction(ISD::MULHS, MVT::v4i16, Legal);
setOperationAction(ISD::MUL, MVT::v4i16, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
- return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode,
- AlignNode);
+ return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
+ AlwaysInline);
} else {
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
}
}
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
-// following the call is a return. A function is eligible if caller/callee
-// calling conventions match, currently only fastcc supports tail calls, and the
-// function CALL is immediatly followed by a RET.
+/// following the call is a return. A function is eligible if caller/callee
+/// calling conventions match, currently only fastcc supports tail calls, and
+/// the function CALL is immediatly followed by a RET.
bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
SDOperand Ret,
SelectionDAG& DAG) const {
- bool IsEligible = false;
+ if (!PerformTailCallOpt)
+ return false;
// Check whether CALL node immediatly preceeds the RET node and whether the
// return uses the result of the node or is a void return.
- if ((Ret.getNumOperands() == 1 &&
- (Ret.getOperand(0)== SDOperand(Call.Val,1) ||
- Ret.getOperand(0)== SDOperand(Call.Val,0))) ||
- (Ret.getOperand(0)== SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
- Ret.getOperand(1)== SDOperand(Call.Val,0))) {
+ unsigned NumOps = Ret.getNumOperands();
+ if ((NumOps == 1 &&
+ (Ret.getOperand(0) == SDOperand(Call.Val,1) ||
+ Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
+ (NumOps > 1 &&
+ Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
+ Ret.getOperand(1) == SDOperand(Call.Val,0))) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned CallerCC = MF.getFunction()->getCallingConv();
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
SDOperand Callee = Call.getOperand(4);
// On elf/pic %ebx needs to be livein.
- if(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- Subtarget->isPICStyleGOT()) {
- // Can only do local tail calls with PIC.
- GlobalValue * GV = 0;
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if(G != 0 &&
- (GV = G->getGlobal()) &&
- (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
- IsEligible=true;
- } else {
- IsEligible=true;
- }
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
+ !Subtarget->isPICStyleGOT())
+ return true;
+
+ // Can only do local tail calls with PIC.
+ GlobalValue * GV = 0;
+ GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ if(G != 0 &&
+ (GV = G->getGlobal()) &&
+ (GV->hasHiddenVisibility() || GV->hasProtectedVisibility()))
+ return true;
}
}
- return IsEligible;
+
+ return false;
}
SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op,
return Result;
}
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
- assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
- "Not an i64 shift!");
- bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
- SDOperand ShOpLo = Op.getOperand(0);
- SDOperand ShOpHi = Op.getOperand(1);
- SDOperand ShAmt = Op.getOperand(2);
- SDOperand Tmp1 = isSRA ?
- DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
- DAG.getConstant(0, MVT::i32);
-
- SDOperand Tmp2, Tmp3;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
- } else {
- Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
- }
+ assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
+ "Not an i64 shift!");
+ bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+ SDOperand ShOpLo = Op.getOperand(0);
+ SDOperand ShOpHi = Op.getOperand(1);
+ SDOperand ShAmt = Op.getOperand(2);
+ SDOperand Tmp1 = isSRA ?
+ DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
+ DAG.getConstant(0, MVT::i32);
+
+ SDOperand Tmp2, Tmp3;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
+ } else {
+ Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
+ }
- const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
- SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
- DAG.getConstant(32, MVT::i8));
- SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
- AndNode, DAG.getConstant(0, MVT::i8));
-
- SDOperand Hi, Lo;
- SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- unsigned Opc = X86ISD::CMOV;
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
- SmallVector<SDOperand, 4> Ops;
- if (Op.getOpcode() == ISD::SHL_PARTS) {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- } else {
- Ops.push_back(Tmp2);
- Ops.push_back(Tmp3);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
-
- Ops.clear();
- Ops.push_back(Tmp3);
- Ops.push_back(Tmp1);
- Ops.push_back(CC);
- Ops.push_back(Cond);
- Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size());
- }
+ const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
+ SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
+ DAG.getConstant(32, MVT::i8));
+ SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
+ AndNode, DAG.getConstant(0, MVT::i8));
+
+ SDOperand Hi, Lo;
+ SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
+ SmallVector<SDOperand, 4> Ops;
+ if (Op.getOpcode() == ISD::SHL_PARTS) {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
- VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
- Ops.push_back(Lo);
- Ops.push_back(Hi);
- return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ } else {
+ Ops.push_back(Tmp2);
+ Ops.push_back(Tmp3);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+
+ Ops.clear();
+ Ops.push_back(Tmp3);
+ Ops.push_back(Tmp1);
+ Ops.push_back(CC);
+ Ops.push_back(Cond);
+ Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
+ }
+
+ VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
+ Ops.clear();
+ Ops.push_back(Lo);
+ Ops.push_back(Hi);
+ return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
- // Load the result.
- return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
+ // Load the result. If this is an i64 load on an x86-32 host, expand the
+ // load.
+ if (Op.getValueType() != MVT::i64 || Subtarget->is64Bit())
+ return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
+
+ SDOperand Lo = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0);
+ StackSlot = DAG.getNode(ISD::ADD, StackSlot.getValueType(), StackSlot,
+ DAG.getConstant(StackSlot.getValueType(), 4));
+ SDOperand Hi = DAG.getLoad(MVT::i32, FIST, StackSlot, NULL, 0);
+
+
+ return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
}
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
SrcVT = VT;
SrcTy = MVT::getTypeForValueType(SrcVT);
}
+ // And if it is bigger, shrink it first.
+ if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+ Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1);
+ SrcVT = VT;
+ SrcTy = MVT::getTypeForValueType(SrcVT);
+ }
+
+ // At this point the operands and the result should have the same
+ // type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
else if (VT == MVT::f64 && !X86ScalarSSEf64)
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+ else if (VT == MVT::f80)
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
- SDOperand ChainOp = Op.getOperand(0);
- SDOperand DestOp = Op.getOperand(1);
- SDOperand SourceOp = Op.getOperand(2);
- SDOperand CountOp = Op.getOperand(3);
- SDOperand AlignOp = Op.getOperand(4);
- unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
- if (Align == 0) Align = 1;
-
- // The libc version is likely to be faster for the following cases. It can
- // use the address value and run time information about the CPU.
- // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-
- // If not DWORD aligned, call memcpy.
- if ((Align & 3) != 0)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is unknown, call memcpy.
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
- if (!I)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is more than the threshold, call memcpy.
- unsigned Size = I->getValue();
- if (Size > Subtarget->getMinRepStrSizeThreshold())
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
-}
-
-SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- SDOperand Count,
- SelectionDAG &DAG) {
- MVT::ValueType IntPtr = getPointerTy();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = getTargetData()->getIntPtrType();
- Entry.Node = Dest; Args.push_back(Entry);
- Entry.Node = Source; Args.push_back(Entry);
- Entry.Node = Count; Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
- DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
- return CallResult.second;
-}
-
SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
SDOperand Dest,
SDOperand Source,
}
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+ if (!Ty1->isInteger() || !Ty2->isInteger())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
+bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1,
+ MVT::ValueType VT2) const {
+ if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2))
+ return false;
+ unsigned NumBits1 = MVT::getSizeInBits(VT1);
+ unsigned NumBits2 = MVT::getSizeInBits(VT2);
+ if (NumBits1 <= NumBits2)
+ return false;
+ return Subtarget->is64Bit() || NumBits1 < 64;
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
else if (VT == MVT::i8)
return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
- break;
+ else if (VT == MVT::i64)
+ return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+ break;
}
}