X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=0482dad4aae9b0c3571a13a4f1f4747e3516e574;hb=27a6c7380fa4dfc8e1837a8dd67967d063b26544;hp=f4e1cb11c9342cf3e502f04f7a58e06f095f4269;hpb=1a35edba13f35ef251e047e5654f35310239bf76;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f4e1cb11c93..0482dad4aae 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31,8 +31,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SSARegMap.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ParameterAttributes.h" @@ -44,6 +45,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + RegInfo = TM.getRegisterInfo(); @@ -155,26 +157,41 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); } - // Divide and remainder are lowered to use div or idiv in legalize in - // order to expose the intermediate computations to trivial CSE. This is - // most noticeable when both x/y and x%y are being computed; they can be - // done with a single div or idiv. - setOperationAction(ISD::SDIV , MVT::i8 , Custom); - setOperationAction(ISD::UDIV , MVT::i8 , Custom); - setOperationAction(ISD::SREM , MVT::i8 , Custom); - setOperationAction(ISD::UREM , MVT::i8 , Custom); - setOperationAction(ISD::SDIV , MVT::i16 , Custom); - setOperationAction(ISD::UDIV , MVT::i16 , Custom); - setOperationAction(ISD::SREM , MVT::i16 , Custom); - setOperationAction(ISD::UREM , MVT::i16 , Custom); - setOperationAction(ISD::SDIV , MVT::i32 , Custom); - setOperationAction(ISD::UDIV , MVT::i32 , Custom); - setOperationAction(ISD::SREM , MVT::i32 , Custom); - setOperationAction(ISD::UREM , MVT::i32 , Custom); - setOperationAction(ISD::SDIV , MVT::i64 , Custom); - setOperationAction(ISD::UDIV , MVT::i64 , Custom); - setOperationAction(ISD::SREM , MVT::i64 , Custom); - setOperationAction(ISD::UREM , MVT::i64 , Custom); + // Scalar integer multiply, multiply-high, divide, and remainder are + // lowered to use operations that produce two results, to match the + // available instructions. This exposes the two-result form to trivial + // CSE, which is able to combine x/y and x%y into a single instruction, + // for example. The single-result multiply instructions are introduced + // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part + // is not needed. + setOperationAction(ISD::MUL , MVT::i8 , Expand); + setOperationAction(ISD::MULHS , MVT::i8 , Expand); + setOperationAction(ISD::MULHU , MVT::i8 , Expand); + setOperationAction(ISD::SDIV , MVT::i8 , Expand); + setOperationAction(ISD::UDIV , MVT::i8 , Expand); + setOperationAction(ISD::SREM , MVT::i8 , Expand); + setOperationAction(ISD::UREM , MVT::i8 , Expand); + setOperationAction(ISD::MUL , MVT::i16 , Expand); + setOperationAction(ISD::MULHS , MVT::i16 , Expand); + setOperationAction(ISD::MULHU , MVT::i16 , Expand); + setOperationAction(ISD::SDIV , MVT::i16 , Expand); + setOperationAction(ISD::UDIV , MVT::i16 , Expand); + setOperationAction(ISD::SREM , MVT::i16 , Expand); + setOperationAction(ISD::UREM , MVT::i16 , Expand); + setOperationAction(ISD::MUL , MVT::i32 , Expand); + setOperationAction(ISD::MULHS , MVT::i32 , Expand); + setOperationAction(ISD::MULHU , MVT::i32 , Expand); + setOperationAction(ISD::SDIV , MVT::i32 , Expand); + setOperationAction(ISD::UDIV , MVT::i32 , Expand); + setOperationAction(ISD::SREM , MVT::i32 , Expand); + setOperationAction(ISD::UREM , MVT::i32 , Expand); + setOperationAction(ISD::MUL , MVT::i64 , Expand); + setOperationAction(ISD::MULHS , MVT::i64 , Expand); + setOperationAction(ISD::MULHU , MVT::i64 , Expand); + setOperationAction(ISD::SDIV , MVT::i64 , Expand); + setOperationAction(ISD::UDIV , MVT::i64 , Expand); + setOperationAction(ISD::SREM , MVT::i64 , Expand); + setOperationAction(ISD::UREM , MVT::i64 , Expand); setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); @@ -188,7 +205,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); setOperationAction(ISD::FREM , MVT::f64 , Expand); - + setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom); + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Expand); setOperationAction(ISD::CTLZ , MVT::i8 , Expand); @@ -418,6 +436,15 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + if (!UnsafeFPMath) { + setOperationAction(ISD::FSIN , MVT::f80 , Expand); + setOperationAction(ISD::FCOS , MVT::f80 , Expand); + } + + // Always use a library call for pow. + setOperationAction(ISD::FPOW , MVT::f32 , Expand); + setOperationAction(ISD::FPOW , MVT::f64 , Expand); + setOperationAction(ISD::FPOW , MVT::f80 , Expand); // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -446,6 +473,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); } if (Subtarget->hasMMX()) { @@ -464,6 +499,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SUB, MVT::v8i8, Legal); setOperationAction(ISD::SUB, MVT::v4i16, Legal); setOperationAction(ISD::SUB, MVT::v2i32, Legal); + setOperationAction(ISD::SUB, MVT::v1i64, Legal); setOperationAction(ISD::MULHS, MVT::v4i16, Legal); setOperationAction(ISD::MUL, MVT::v4i16, Legal); @@ -573,7 +609,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); + if (Subtarget->is64Bit()) + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { @@ -614,11 +651,35 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } +/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC +/// jumptable. +SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table, + SelectionDAG &DAG) const { + if (usesGlobalOffsetTable()) + return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); + if (!Subtarget->isPICStyleRIPRel()) + return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()); + return Table; +} + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// #include "X86GenCallingConv.inc" + +/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it +/// exists skip possible ISD:TokenFactor. +static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) { + if (Chain.getOpcode()==X86ISD::TAILCALL) { + return Chain; + } else if (Chain.getOpcode()==ISD::TokenFactor) { + if (Chain.getNumOperands() && + Chain.getOperand(0).getOpcode()==X86ISD::TAILCALL) + return Chain.getOperand(0); + } + return Chain; +} /// LowerRET - Lower an ISD::RET node. SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { @@ -629,8 +690,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); - - + // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().liveout_empty()) { @@ -638,10 +698,39 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { if (RVLocs[i].isRegLoc()) DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); } - SDOperand Chain = Op.getOperand(0); - SDOperand Flag; + // Handle tail call return. + Chain = GetPossiblePreceedingTailCall(Chain); + if (Chain.getOpcode() == X86ISD::TAILCALL) { + SDOperand TailCall = Chain; + SDOperand TargetAddress = TailCall.getOperand(1); + SDOperand StackAdjustment = TailCall.getOperand(2); + assert ( ((TargetAddress.getOpcode() == ISD::Register && + (cast(TargetAddress)->getReg() == X86::ECX || + cast(TargetAddress)->getReg() == X86::R9)) || + TargetAddress.getOpcode() == ISD::TargetExternalSymbol || + TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && + "Expecting an global address, external symbol, or register"); + assert( StackAdjustment.getOpcode() == ISD::Constant && + "Expecting a const value"); + + SmallVector Operands; + Operands.push_back(Chain.getOperand(0)); + Operands.push_back(TargetAddress); + Operands.push_back(StackAdjustment); + // Copy registers used by the call. Last operand is a flag so it is not + // copied. + for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { + Operands.push_back(Chain.getOperand(i)); + } + return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], + Operands.size()); + } + + // Regular return. + SDOperand Flag; + // Copy the result values into the output registers. if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() || RVLocs[0].getLocReg() != X86::ST0) { @@ -662,7 +751,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { SDOperand MemLoc; - + // If this is a load into a scalarsse value, don't store the loaded value // back to the stack, only to reload it: just replace the scalar-sse load. if (ISD::isNON_EXTLoad(Value.Val) && @@ -762,12 +851,14 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, //===----------------------------------------------------------------------===// -// C & StdCall Calling Convention implementation +// C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// // StdCall calling convention seems to be standard for many Windows' API // routines and around. It differs from C calling convention just a little: // callee should clean up the stack, not caller. Symbols should be also // decorated in some fancy way :) It doesn't support any vector arguments. +// For info on fast calling convention see Fast Calling Convention (tail call) +// implementation LowerX86_32FastCCCallTo. /// AddLiveIn - This helper function adds the specified physical register to the /// MachineFunction as a live in value. It also creates a corresponding virtual @@ -780,6 +871,9 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, return VReg; } +// align stack arguments according to platform alignment needed for tail calls +unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG); + SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, @@ -804,13 +898,17 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - + unsigned CC = MF.getFunction()->getCallingConv(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); - + // Check for possible tail call calling convention. + if (CC == CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_TailCall); + else + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); + SmallVector ArgValues; unsigned LastVal = ~0U; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -855,6 +953,9 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, } unsigned StackSize = CCInfo.getNextStackOffset(); + // align stack specially for tail calls + if (CC==CallingConv::Fast) + StackSize = GetAlignedArgumentStackSize(StackSize,DAG); ArgValues.push_back(Root); @@ -863,7 +964,12 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, if (isVarArg) VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); - if (isStdCall && !isVarArg) { + // Tail call calling convention (CallingConv::Fast) does not support varargs. + assert( !(isVarArg && CC == CallingConv::Fast) && + "CallingConv::Fast does not support varargs."); + + if (isStdCall && !isVarArg && + (CC==CallingConv::Fast && PerformTailCallOpt || CC!=CallingConv::Fast)) { BytesToPopOnReturn = StackSize; // Callee pops everything.. BytesCallerReserves = 0; } else { @@ -892,17 +998,21 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; - + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); + if(CC==CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + if (CC==CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); @@ -1001,33 +1111,33 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, - NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush = 0; - if (CC == CallingConv::X86_StdCall) { + if (CC == CallingConv::X86_StdCall || + (CC == CallingConv::Fast && PerformTailCallOpt)) { if (isVarArg) NumBytesForCalleeToPush = isSRet ? 4 : 0; else NumBytesForCalleeToPush = NumBytes; + assert(!(isVarArg && CC==CallingConv::Fast) && + "CallingConv::Fast does not support varargs."); } else { // If this is is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. NumBytesForCalleeToPush = isSRet ? 4 : 0; } - - NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Ops.clear(); - Ops.push_back(Chain); - Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); - Ops.push_back(InFlag); - Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, getPointerTy()), + DAG.getConstant(NumBytesForCalleeToPush, + getPointerTy()), + InFlag); InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we @@ -1110,7 +1220,8 @@ X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. + // arguments and the arguments after the retaddr has been pushed are + // aligned. if ((StackSize & 7) == 0) StackSize += 4; } @@ -1147,9 +1258,10 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); - return DAG.getNode(ISD::MEMCPY, MVT::Other, Chain, PtrOff, Arg, SizeNode, - AlignNode); + return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode, + AlwaysInline); } else { return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); } @@ -1172,7 +1284,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. + // arguments and the arguments after the retaddr has been pushed are + // aligned. if ((NumBytes & 7) == 0) NumBytes += 4; } @@ -1270,8 +1383,8 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - // FIXME: Do not generate X86ISD::TAILCALL for now. - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, + assert(isTailCall==false && "no tail call here"); + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -1290,6 +1403,324 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo); } +//===----------------------------------------------------------------------===// +// Fast Calling Convention (tail call) implementation +//===----------------------------------------------------------------------===// + +// Like std call, callee cleans arguments, convention except that ECX is +// reserved for storing the tail called function address. Only 2 registers are +// free for argument passing (inreg). Tail call optimization is performed +// provided: +// * tailcallopt is enabled +// * caller/callee are fastcc +// * elf/pic is disabled OR +// * elf/pic enabled + callee is in module + callee has +// visibility protected or hidden +// To keep the stack aligned according to platform abi the function +// GetAlignedArgumentStackSize ensures that argument delta is always multiples +// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) +// If a tail called function callee has more arguments than the caller the +// caller needs to make sure that there is room to move the RETADDR to. This is +// achieved by reserving an area the size of the argument delta right after the +// original REtADDR, but before the saved framepointer or the spilled registers +// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) +// stack layout: +// arg1 +// arg2 +// RETADDR +// [ new RETADDR +// move area ] +// (possible EBP) +// ESI +// EDI +// local1 .. + +/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned +/// for a 16 byte align requirement. +unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG& DAG) { + if (PerformTailCallOpt) { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + uint64_t AlignMask = StackAlignment - 1; + int64_t Offset = StackSize; + unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4; + if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { + // Number smaller than 12 so just add the difference. + Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); + } else { + // Mask out lower bits, add stackalignment once plus the 12 bytes. + Offset = ((~AlignMask) & Offset) + StackAlignment + + (StackAlignment-SlotSize); + } + StackSize = Offset; + } + return StackSize; +} + +/// IsEligibleForTailCallElimination - Check to see whether the next instruction +/// following the call is a return. A function is eligible if caller/callee +/// calling conventions match, currently only fastcc supports tail calls, and +/// the function CALL is immediatly followed by a RET. +bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, + SDOperand Ret, + SelectionDAG& DAG) const { + if (!PerformTailCallOpt) + return false; + + // Check whether CALL node immediatly preceeds the RET node and whether the + // return uses the result of the node or is a void return. + unsigned NumOps = Ret.getNumOperands(); + if ((NumOps == 1 && + (Ret.getOperand(0) == SDOperand(Call.Val,1) || + Ret.getOperand(0) == SDOperand(Call.Val,0))) || + (NumOps > 1 && + Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) && + Ret.getOperand(1) == SDOperand(Call.Val,0))) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned CallerCC = MF.getFunction()->getCallingConv(); + unsigned CalleeCC = cast(Call.getOperand(1))->getValue(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + SDOperand Callee = Call.getOperand(4); + // On elf/pic %ebx needs to be livein. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || + !Subtarget->isPICStyleGOT()) + return true; + + // Can only do local tail calls with PIC. + GlobalValue * GV = 0; + GlobalAddressSDNode *G = dyn_cast(Callee); + if(G != 0 && + (GV = G->getGlobal()) && + (GV->hasHiddenVisibility() || GV->hasProtectedVisibility())) + return true; + } + } + + return false; +} + +SDOperand X86TargetLowering::LowerX86_TailCallTo(SDOperand Op, + SelectionDAG &DAG, + unsigned CC) { + SDOperand Chain = Op.getOperand(0); + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + SDOperand Callee = Op.getOperand(4); + bool is64Bit = Subtarget->is64Bit(); + + assert(isTailCall && PerformTailCallOpt && "Should only emit tail calls."); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + if (is64Bit) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_TailCall); + + + // Lower arguments at fp - stackoffset + fpdiff. + MachineFunction &MF = DAG.getMachineFunction(); + + unsigned NumBytesToBePushed = + GetAlignedArgumentStackSize(CCInfo.getNextStackOffset(), DAG); + + unsigned NumBytesCallerPushed = + MF.getInfo()->getBytesToPopOnReturn(); + int FPDiff = NumBytesCallerPushed - NumBytesToBePushed; + + // Set the delta of movement of the returnaddr stackslot. + // But only set if delta is greater than previous delta. + if (FPDiff < (MF.getInfo()->getTCReturnAddrDelta())) + MF.getInfo()->setTCReturnAddrDelta(FPDiff); + + Chain = DAG. + getCALLSEQ_START(Chain, DAG.getConstant(NumBytesToBePushed, getPointerTy())); + + // Adjust the Return address stack slot. + SDOperand RetAddrFrIdx, NewRetAddrFrIdx; + if (FPDiff) { + MVT::ValueType VT = is64Bit ? MVT::i64 : MVT::i32; + RetAddrFrIdx = getReturnAddressFrameIndex(DAG); + // Load the "old" Return address. + RetAddrFrIdx = + DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); + // Calculate the new stack slot for the return address. + int SlotSize = is64Bit ? 8 : 4; + int NewReturnAddrFI = + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + Chain = SDOperand(RetAddrFrIdx.Val, 1); + } + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + SmallVector MemOpChains2; + SDOperand FramePtr, StackPtr; + SDOperand PtrOff; + SDOperand FIN; + int FI = 0; + + // Walk the register/memloc assignments, inserting copies/loads. Lower + // arguments first to the stack slot where they would normally - in case of a + // normal function call - be. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.Val == 0) + StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, + Arg)); + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDOperand InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, + InFlag); + InFlag = Chain.getValue(1); + } + InFlag = SDOperand(); + + // Copy from stack slots to stack slot of a tail called function. This needs + // to be done because if we would lower the arguments directly to their real + // stack slot we might end up overwriting each other. + // TODO: To make this more efficient (sometimes saving a store/load) we could + // analyse the arguments and emit this store/load/store sequence only for + // arguments which would be overwritten otherwise. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (!VA.isRegLoc()) { + SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); + unsigned Flags = cast(FlagsOp)->getValue(); + + // Get source stack slot. + SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); + PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); + // Create frame index. + int32_t Offset = VA.getLocMemOffset()+FPDiff; + uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FIN = DAG.getFrameIndex(FI, MVT::i32); + if (Flags & ISD::ParamFlags::ByVal) { + // Copy relative to framepointer. + unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> + ISD::ParamFlags::ByValAlignOffs); + + unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> + ISD::ParamFlags::ByValSizeOffs; + + SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1); + + MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode, + AlignNode,AlwaysInline)); + } else { + SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, NULL,0); + // Store relative to framepointer. + MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0)); + } + } + } + + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &MemOpChains2[0], MemOpChains.size()); + + // Store the return address to the appropriate stack slot. + if (FPDiff) + Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0); + + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + // Does not work with tail call since ebx is not restored correctly by + // tailcaller. TODO: at least for x86 - verify for x86-64 + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + // We should use extra load for direct calls to dllimported functions in + // non-JIT mode. + if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), + getTargetMachine(), true)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + else { + assert(Callee.getOpcode() == ISD::LOAD && + "Function destination must be loaded into virtual register"); + unsigned Opc = is64Bit ? X86::R9 : X86::ECX; + + Chain = DAG.getCopyToReg(Chain, + DAG.getRegister(Opc, getPointerTy()) , + Callee,InFlag); + Callee = DAG.getRegister(Opc, getPointerTy()); + // Add register as live out. + DAG.getMachineFunction().addLiveOut(Opc); + } + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + + Ops.push_back(Chain); + Ops.push_back(DAG.getConstant(NumBytesToBePushed, getPointerTy())); + Ops.push_back(DAG.getConstant(0, getPointerTy())); + if (InFlag.Val) + Ops.push_back(InFlag); + Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Returns a chain & a flag for retval copy to use. + NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + Ops.clear(); + Ops.push_back(Chain); + Ops.push_back(Callee); + Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + if (InFlag.Val) + Ops.push_back(InFlag); + assert(InFlag.Val && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(X86ISD::TAILCALL, + Op.Val->getVTList(), &Ops[0], Ops.size()); + + return SDOperand(Chain.Val, Op.ResNo); +} //===----------------------------------------------------------------------===// // X86-64 C Calling Convention implementation @@ -1301,6 +1732,7 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + unsigned CC= MF.getFunction()->getCallingConv(); static const unsigned GPR64ArgRegs[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 @@ -1313,9 +1745,12 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); + if (CC == CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); SmallVector ArgValues; unsigned LastVal = ~0U; @@ -1376,10 +1811,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { } unsigned StackSize = CCInfo.getNextStackOffset(); + if (CC==CallingConv::Fast) + StackSize =GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { + assert(CC!=CallingConv::Fast + && "Var arg not supported with calling convention fastcc"); unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); @@ -1424,10 +1863,14 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { } ArgValues.push_back(Root); - - BytesToPopOnReturn = 0; // Callee pops nothing. - BytesCallerReserves = StackSize; - + // Tail call convention (fastcc) needs callee pop. + if (CC == CallingConv::Fast && PerformTailCallOpt) { + BytesToPopOnReturn = StackSize; // Callee pops everything. + BytesCallerReserves = 0; + } else { + BytesToPopOnReturn = 0; // Callee pops nothing. + BytesCallerReserves = StackSize; + } X86MachineFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); @@ -1441,16 +1884,21 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); + if (CC==CallingConv::Fast && PerformTailCallOpt) + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_TailCall); + else + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + if (CC == CallingConv::Fast) + NumBytes = GetAlignedArgumentStackSize(NumBytes,DAG); + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); SmallVector, 8> RegsToPass; @@ -1504,6 +1952,9 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, } if (isVarArg) { + assert ( CallingConv::Fast != CC && + "Var args not supported with calling convention fastcc"); + // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -1552,17 +2003,21 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, if (InFlag.Val) Ops.push_back(InFlag); - // FIXME: Do not generate X86ISD::TAILCALL for now. - Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, + Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - + int NumBytesForCalleeToPush = 0; + if (CC==CallingConv::Fast && PerformTailCallOpt) { + NumBytesForCalleeToPush = NumBytes; // Callee pops everything + } else { + NumBytesForCalleeToPush = 0; // Callee pops nothing. + } // Returns a flag for retval copy to use. NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); Ops.clear(); Ops.push_back(Chain); Ops.push_back(DAG.getConstant(NumBytes, getPointerTy())); - Ops.push_back(DAG.getConstant(0, getPointerTy())); + Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy())); Ops.push_back(InFlag); Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -3084,10 +3539,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // SHUFPS the element to the lowest double word, then movss. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -3106,7 +3565,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec. + push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -3329,105 +3789,71 @@ SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { return Result; } +/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and +/// take a 2 x i32 value to shift plus a shift amount. SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) { - assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && - "Not an i64 shift!"); - bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; - SDOperand ShOpLo = Op.getOperand(0); - SDOperand ShOpHi = Op.getOperand(1); - SDOperand ShAmt = Op.getOperand(2); - SDOperand Tmp1 = isSRA ? - DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : - DAG.getConstant(0, MVT::i32); - - SDOperand Tmp2, Tmp3; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); - Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); - } else { - Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); - Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); - } - - const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); - SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, - DAG.getConstant(32, MVT::i8)); - SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)}; - SDOperand Cond = NewCCModeling - ? DAG.getNode(X86ISD::CMP_NEW, MVT::i32, - AndNode, DAG.getConstant(0, MVT::i8)) - : DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1); - - SDOperand Hi, Lo; - SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); - unsigned Opc = NewCCModeling ? X86ISD::CMOV_NEW : X86ISD::CMOV; - VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); - SmallVector Ops; - if (Op.getOpcode() == ISD::SHL_PARTS) { - Ops.push_back(Tmp2); - Ops.push_back(Tmp3); - Ops.push_back(CC); - Ops.push_back(Cond); - if (NewCCModeling) - Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); - else { - Hi = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); - Cond = Hi.getValue(1); - } - - Ops.clear(); - Ops.push_back(Tmp3); - Ops.push_back(Tmp1); - Ops.push_back(CC); - Ops.push_back(Cond); - if (NewCCModeling) - Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); - else - Lo = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); - } else { - Ops.push_back(Tmp2); - Ops.push_back(Tmp3); - Ops.push_back(CC); - Ops.push_back(Cond); - if (NewCCModeling) - Lo = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); - else { - Lo = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); - Cond = Lo.getValue(1); - } + assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && + "Not an i64 shift!"); + bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; + SDOperand ShOpLo = Op.getOperand(0); + SDOperand ShOpHi = Op.getOperand(1); + SDOperand ShAmt = Op.getOperand(2); + SDOperand Tmp1 = isSRA ? + DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) : + DAG.getConstant(0, MVT::i32); + + SDOperand Tmp2, Tmp3; + if (Op.getOpcode() == ISD::SHL_PARTS) { + Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt); + } else { + Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt); + Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt); + } - Ops.clear(); - Ops.push_back(Tmp3); - Ops.push_back(Tmp1); - Ops.push_back(CC); - Ops.push_back(Cond); - if (NewCCModeling) - Hi = DAG.getNode(Opc, MVT::i32, &Ops[0], Ops.size()); - else - Hi = DAG.getNode(Opc, VTs, 2, &Ops[0], Ops.size()); - } + const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); + SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt, + DAG.getConstant(32, MVT::i8)); + SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32, + AndNode, DAG.getConstant(0, MVT::i8)); + + SDOperand Hi, Lo; + SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8); + VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag); + SmallVector Ops; + if (Op.getOpcode() == ISD::SHL_PARTS) { + Ops.push_back(Tmp2); + Ops.push_back(Tmp3); + Ops.push_back(CC); + Ops.push_back(Cond); + Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); - VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); Ops.clear(); - Ops.push_back(Lo); - Ops.push_back(Hi); - return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); -} - -SDOperand X86TargetLowering::LowerIntegerDivOrRem(SDOperand Op, SelectionDAG &DAG) { - unsigned Opcode = Op.getOpcode(); - MVT::ValueType NVT = Op.getValueType(); - bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; - bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; - unsigned Opc = isSigned ? X86ISD::IDIV : X86ISD::DIV; - - SDOperand Ops[] = { Op.getOperand(0), Op.getOperand(1) }; - SDOperand DR = DAG.getNode(Opc, DAG.getVTList(NVT, NVT), Ops, 2); + Ops.push_back(Tmp3); + Ops.push_back(Tmp1); + Ops.push_back(CC); + Ops.push_back(Cond); + Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); + } else { + Ops.push_back(Tmp2); + Ops.push_back(Tmp3); + Ops.push_back(CC); + Ops.push_back(Cond); + Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); - if (isDiv) - return DR; + Ops.clear(); + Ops.push_back(Tmp3); + Ops.push_back(Tmp1); + Ops.push_back(CC); + Ops.push_back(Cond); + Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size()); + } - return SDOperand(DR.Val, 1); + VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32); + Ops.clear(); + Ops.push_back(Lo); + Ops.push_back(Hi); + return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size()); } SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { @@ -3492,35 +3918,35 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { return Result; } -SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { +std::pair X86TargetLowering:: +FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) { assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 && "Unknown FP_TO_SINT to lower!"); - // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary - // stack slot. - SDOperand Result; - MachineFunction &MF = DAG.getMachineFunction(); - unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; - int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); - SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); // These are really Legal. if (Op.getValueType() == MVT::i32 && X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) - return Result; + return std::make_pair(SDOperand(), SDOperand()); if (Op.getValueType() == MVT::i32 && X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) - return Result; + return std::make_pair(SDOperand(), SDOperand()); if (Subtarget->is64Bit() && Op.getValueType() == MVT::i64 && Op.getOperand(0).getValueType() != MVT::f80) - return Result; + return std::make_pair(SDOperand(), SDOperand()); + // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary + // stack slot. + MachineFunction &MF = DAG.getMachineFunction(); + unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8; + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); unsigned Opc; switch (Op.getValueType()) { - default: assert(0 && "Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + default: assert(0 && "Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; } SDOperand Chain = DAG.getEntryNode(); @@ -3543,10 +3969,33 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { SDOperand Ops[] = { Chain, Value, StackSlot }; SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3); + return std::make_pair(FIST, StackSlot); +} + +SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { + assert((Op.getValueType() != MVT::i64 || Subtarget->is64Bit()) && + "This FP_TO_SINT must be expanded!"); + + std::pair Vals = FP_TO_SINTHelper(Op, DAG); + SDOperand FIST = Vals.first, StackSlot = Vals.second; + if (FIST.Val == 0) return SDOperand(); + // Load the result. return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0); } +SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) { + std::pair Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG); + SDOperand FIST = Vals.first, StackSlot = Vals.second; + if (FIST.Val == 0) return 0; + + // Return an i64 load from the stack slot. + SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0); + + // Use a MERGE_VALUES node to drop the chain result value. + return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val; +} + SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); MVT::ValueType EltVT = VT; @@ -3620,6 +4069,15 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { SrcVT = VT; SrcTy = MVT::getTypeForValueType(SrcVT); } + // And if it is bigger, shrink it first. + if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) { + Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1); + SrcVT = VT; + SrcTy = MVT::getTypeForValueType(SrcVT); + } + + // At this point the operands and the result should have the same + // type, and that won't be f80 since that is not custom lowered. // First get the sign bit of second operand. std::vector CV; @@ -3670,86 +4128,39 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::FOR, VT, Val, SignBit); } -SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG, - SDOperand Chain) { +SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDOperand Cond; SDOperand Op0 = Op.getOperand(0); SDOperand Op1 = Op.getOperand(1); SDOperand CC = Op.getOperand(2); ISD::CondCode SetCCOpcode = cast(CC)->get(); - const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); - const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); unsigned X86CC; if (translateX86CC(cast(CC)->get(), isFP, X86CC, Op0, Op1, DAG)) { - SDOperand Ops1[] = { Chain, Op0, Op1 }; - Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1); - SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; - return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); - } - - assert(isFP && "Illegal integer SetCC!"); - - SDOperand COps[] = { Chain, Op0, Op1 }; - Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1); - - switch (SetCCOpcode) { - default: assert(false && "Illegal floating point SetCC!"); - case ISD::SETOEQ: { // !PF & ZF - SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond }; - SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); - SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8), - Tmp1.getValue(1) }; - SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); - return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); - } - case ISD::SETUNE: { // PF | !ZF - SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond }; - SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2); - SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8), - Tmp1.getValue(1) }; - SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2); - return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); - } - } -} - -SDOperand X86TargetLowering::LowerSETCC_New(SDOperand Op, SelectionDAG &DAG) { - assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); - SDOperand Cond; - SDOperand Op0 = Op.getOperand(0); - SDOperand Op1 = Op.getOperand(1); - SDOperand CC = Op.getOperand(2); - ISD::CondCode SetCCOpcode = cast(CC)->get(); - bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType()); - unsigned X86CC; - - if (translateX86CC(cast(CC)->get(), isFP, X86CC, - Op0, Op1, DAG)) { - Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Op0, Op1); - return DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, + Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); + return DAG.getNode(X86ISD::SETCC, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } assert(isFP && "Illegal integer SetCC!"); - Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Op0, Op1); + Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1); switch (SetCCOpcode) { default: assert(false && "Illegal floating point SetCC!"); case ISD::SETOEQ: { // !PF & ZF - SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, + SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, DAG.getConstant(X86::COND_NP, MVT::i8), Cond); - SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, + SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, DAG.getConstant(X86::COND_E, MVT::i8), Cond); return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2); } case ISD::SETUNE: { // PF | !ZF - SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, + SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8, DAG.getConstant(X86::COND_P, MVT::i8), Cond); - SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, + SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8, DAG.getConstant(X86::COND_NE, MVT::i8), Cond); return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2); } @@ -3759,90 +4170,38 @@ SDOperand X86TargetLowering::LowerSETCC_New(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { bool addTest = true; - SDOperand Chain = DAG.getEntryNode(); SDOperand Cond = Op.getOperand(0); SDOperand CC; - const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG, Chain); + Cond = LowerSETCC(Cond, DAG); + // If condition flag is set by a X86ISD::CMP, then use it as the condition + // setting operand in place of the X86ISD::SETCC. if (Cond.getOpcode() == X86ISD::SETCC) { CC = Cond.getOperand(0); - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). Use it as the condition setting - // operand in place of the X86ISD::SETCC. - // If the X86ISD::SETCC has more than one use, then perhaps it's better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason)? - SDOperand Cmp = Cond.getOperand(1); - unsigned Opc = Cmp.getOpcode(); - bool IllegalFPCMov = - ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || - (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && - !hasFPCMov(cast(CC)->getSignExtended()); - if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && - !IllegalFPCMov) { - SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; - Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); - addTest = false; - } - } - - if (addTest) { - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; - Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); - } - - VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag); - SmallVector Ops; - // X86ISD::CMOV means set the result (which is operand 1) to the RHS if - // condition is true. - Ops.push_back(Op.getOperand(2)); - Ops.push_back(Op.getOperand(1)); - Ops.push_back(CC); - Ops.push_back(Cond.getValue(1)); - return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); -} - -SDOperand X86TargetLowering::LowerSELECT_New(SDOperand Op, SelectionDAG &DAG) { - bool addTest = true; - SDOperand Cond = Op.getOperand(0); - SDOperand CC; - - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC_New(Cond, DAG); - - if (Cond.getOpcode() == X86ISD::SETCC_NEW) { - CC = Cond.getOperand(0); - - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). Use it as the condition setting - // operand in place of the X86ISD::SETCC. - // If the X86ISD::SETCC has more than one use, then perhaps it's better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason)? SDOperand Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - bool IllegalFPCMov = - ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || - (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && - !hasFPCMov(cast(CC)->getSignExtended()); - if ((Opc == X86ISD::CMP_NEW || - Opc == X86ISD::COMI_NEW || - Opc == X86ISD::UCOMI_NEW) && - !IllegalFPCMov) { - Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); + MVT::ValueType VT = Op.getValueType(); + bool IllegalFPCMov = false; + if (VT == MVT::f32 && !X86ScalarSSEf32) + IllegalFPCMov = !hasFPCMov(cast(CC)->getSignExtended()); + else if (VT == MVT::f64 && !X86ScalarSSEf64) + IllegalFPCMov = !hasFPCMov(cast(CC)->getSignExtended()); + else if (VT == MVT::f80) + IllegalFPCMov = !hasFPCMov(cast(CC)->getSignExtended()); + if ((Opc == X86ISD::CMP || + Opc == X86ISD::COMI || + Opc == X86ISD::UCOMI) && !IllegalFPCMov) { + Cond = Cmp; addTest = false; } } if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond = DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Cond, - DAG.getConstant(0, MVT::i8)); + Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); } const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(), @@ -3854,7 +4213,7 @@ SDOperand X86TargetLowering::LowerSELECT_New(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(Op.getOperand(1)); Ops.push_back(CC); Ops.push_back(Cond); - return DAG.getNode(X86ISD::CMOV_NEW, VTs, 2, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size()); } SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { @@ -3863,87 +4222,51 @@ SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) { SDOperand Cond = Op.getOperand(1); SDOperand Dest = Op.getOperand(2); SDOperand CC; - const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC(Cond, DAG, Chain); + Cond = LowerSETCC(Cond, DAG); + // If condition flag is set by a X86ISD::CMP, then use it as the condition + // setting operand in place of the X86ISD::SETCC. if (Cond.getOpcode() == X86ISD::SETCC) { CC = Cond.getOperand(0); - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). Use it as the condition setting - // operand in place of the X86ISD::SETCC. - // If the X86ISD::SETCC has more than one use, then perhaps it's better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason)? SDOperand Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) { - SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) }; - Cond = DAG.getNode(Opc, VTs, 2, Ops, 3); + if (Opc == X86ISD::CMP || + Opc == X86ISD::COMI || + Opc == X86ISD::UCOMI) { + Cond = Cmp; addTest = false; } } if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) }; - Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3); + Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); } return DAG.getNode(X86ISD::BRCOND, Op.getValueType(), - Cond, Op.getOperand(2), CC, Cond.getValue(1)); -} - -SDOperand X86TargetLowering::LowerBRCOND_New(SDOperand Op, SelectionDAG &DAG) { - bool addTest = true; - SDOperand Chain = Op.getOperand(0); - SDOperand Cond = Op.getOperand(1); - SDOperand Dest = Op.getOperand(2); - SDOperand CC; - - if (Cond.getOpcode() == ISD::SETCC) - Cond = LowerSETCC_New(Cond, DAG); - - if (Cond.getOpcode() == X86ISD::SETCC_NEW) { - CC = Cond.getOperand(0); - - // If condition flag is set by a X86ISD::CMP, then make a copy of it - // (since flag operand cannot be shared). Use it as the condition setting - // operand in place of the X86ISD::SETCC. - // If the X86ISD::SETCC has more than one use, then perhaps it's better - // to use a test instead of duplicating the X86ISD::CMP (for register - // pressure reason)? - SDOperand Cmp = Cond.getOperand(1); - unsigned Opc = Cmp.getOpcode(); - if (Opc == X86ISD::CMP_NEW || - Opc == X86ISD::COMI_NEW || - Opc == X86ISD::UCOMI_NEW) { - Cond = DAG.getNode(Opc, MVT::i32, Cmp.getOperand(0), Cmp.getOperand(1)); - addTest = false; - } - } - - if (addTest) { - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond= DAG.getNode(X86ISD::CMP_NEW, MVT::i32, Cond, DAG.getConstant(0, MVT::i8)); - } - return DAG.getNode(X86ISD::BRCOND_NEW, Op.getValueType(), Chain, Op.getOperand(2), CC, Cond); } SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { - unsigned CallingConv= cast(Op.getOperand(1))->getValue(); - - if (Subtarget->is64Bit()) - return LowerX86_64CCCCallTo(Op, DAG, CallingConv); + unsigned CallingConv = cast(Op.getOperand(1))->getValue(); + bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + + if (Subtarget->is64Bit()) + if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt) + return LowerX86_TailCallTo(Op, DAG, CallingConv); + else + return LowerX86_64CCCCallTo(Op, DAG, CallingConv); else switch (CallingConv) { default: assert(0 && "Unsupported calling convention"); case CallingConv::Fast: - // TODO: Implement fastcc - // Falls through + if (isTailCall && PerformTailCallOpt) + return LowerX86_TailCallTo(Op, DAG, CallingConv); + else + return LowerCCCCallTo(Op,DAG, CallingConv); case CallingConv::C: case CallingConv::X86_StdCall: return LowerCCCCallTo(Op, DAG, CallingConv); @@ -4011,8 +4334,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { default: assert(0 && "Unsupported calling convention"); case CallingConv::Fast: - // TODO: implement fastcc. - + return LowerCCCArguments(Op,DAG, true); // Falls through case CallingConv::C: return LowerCCCArguments(Op, DAG); @@ -4037,7 +4359,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { + (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { MVT::ValueType IntPtr = getPointerTy(); const Type *IntPtrTy = getTargetData()->getIntPtrType(); TargetLowering::ArgListTy Args; @@ -4183,36 +4505,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { return Chain; } -SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned or size is more than the threshold, call memcpy. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster - if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { - MVT::ValueType IntPtr = getPointerTy(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = getTargetData()->getIntPtrType(); - Entry.Node = Op.getOperand(1); Args.push_back(Entry); - Entry.Node = Op.getOperand(2); Args.push_back(Entry); - Entry.Node = Op.getOperand(3); Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false, - DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG); - return CallResult.second; - } - +SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, + SDOperand Dest, + SDOperand Source, + unsigned Size, + unsigned Align, + SelectionDAG &DAG) { MVT::ValueType AVT; - SDOperand Count; unsigned BytesLeft = 0; - bool TwoRepMovs = false; switch (Align & 3) { case 2: // WORD aligned AVT = MVT::i16; @@ -4224,33 +4524,22 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { break; default: // Byte aligned AVT = MVT::i8; - Count = Op.getOperand(3); break; } - if (AVT > MVT::i8) { - if (I) { - unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy()); - BytesLeft = I->getValue() % UBytes; - } else { - assert(AVT >= MVT::i32 && - "Do not use rep;movs if not at least DWORD aligned"); - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), - Op.getOperand(3), DAG.getConstant(2, MVT::i8)); - TwoRepMovs = true; - } - } + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy()); + BytesLeft = Size % UBytes; SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Op.getOperand(1), InFlag); + Dest, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Op.getOperand(2), InFlag); + Source, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4260,27 +4549,12 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); - if (TwoRepMovs) { - InFlag = Chain.getValue(1); - Count = Op.getOperand(3); - MVT::ValueType CVT = Count.getValueType(); - SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); - Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); - Ops.clear(); - Ops.push_back(Chain); - Ops.push_back(DAG.getValueType(MVT::i8)); - Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); - } else if (BytesLeft) { + if (BytesLeft) { // Issue loads and stores for the last 1 - 7 bytes. - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); + unsigned Offset = Size - BytesLeft; + SDOperand DstAddr = Dest; MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Op.getOperand(2); + SDOperand SrcAddr = Source; MVT::ValueType SrcVT = SrcAddr.getValueType(); SDOperand Value; if (BytesLeft >= 4) { @@ -4326,31 +4600,36 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { return Chain; } -SDOperand -X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) { +/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain +SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDOperand TheOp = Op.getOperand(0); - SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1); + SDOperand TheChain = N->getOperand(0); + SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1); if (Subtarget->is64Bit()) { - SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); - SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX, - MVT::i64, Copy1.getValue(2)); - SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2, + SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1)); + SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, + MVT::i64, rax.getValue(2)); + SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx, DAG.getConstant(32, MVT::i8)); SDOperand Ops[] = { - DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1) + DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1) }; Tys = DAG.getVTList(MVT::i64, MVT::Other); - return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2); + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; } - SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); - SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX, - MVT::i32, Copy1.getValue(2)); - SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) }; - Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3); + SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)); + SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, + MVT::i32, eax.getValue(2)); + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + SDOperand Ops[] = { eax, edx }; + Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2); + + // Use a MERGE_VALUES to return the value and chain. + Ops[1] = edx.getValue(1); + Tys = DAG.getVTList(MVT::i64, MVT::Other); + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val; } SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { @@ -4531,21 +4810,10 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { SDOperand RHS = Op.getOperand(2); translateX86CC(CC, true, X86CC, LHS, RHS, DAG); - if (NewCCModeling) { - Opc = (Opc == X86ISD::UCOMI) ? X86ISD::UCOMI_NEW : X86ISD::COMI_NEW; - SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); - SDOperand SetCC = DAG.getNode(X86ISD::SETCC_NEW, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); - return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); - } else { - const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag); - SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS }; - SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3); - VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag); - SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond }; - SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2); - return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); - } + SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS); + SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond); + return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC); } } } @@ -4624,7 +4892,6 @@ SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, default: assert(0 && "Unsupported calling convention"); case CallingConv::C: - case CallingConv::Fast: case CallingConv::X86_StdCall: { // Pass 'nest' parameter in ECX. // Must be kept in sync with X86CallingConv.td @@ -4691,6 +4958,66 @@ SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op, } } +SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) { + /* + The rounding mode is in bits 11:10 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to -inf + 10 Round to +inf + 11 Round to 0 + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetMachine &TM = MF.getTarget(); + const TargetFrameInfo &TFI = *TM.getFrameInfo(); + unsigned StackAlignment = TFI.getStackAlignment(); + MVT::ValueType VT = Op.getValueType(); + + // Save FP Control Word to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, + DAG.getEntryNode(), StackSlot); + + // Load FP Control Word from stack slot + SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); + + // Transform as necessary + SDOperand CWD1 = + DAG.getNode(ISD::SRL, MVT::i16, + DAG.getNode(ISD::AND, MVT::i16, + CWD, DAG.getConstant(0x800, MVT::i16)), + DAG.getConstant(11, MVT::i8)); + SDOperand CWD2 = + DAG.getNode(ISD::SRL, MVT::i16, + DAG.getNode(ISD::AND, MVT::i16, + CWD, DAG.getConstant(0x400, MVT::i16)), + DAG.getConstant(9, MVT::i8)); + + SDOperand RetVal = + DAG.getNode(ISD::AND, MVT::i16, + DAG.getNode(ISD::ADD, MVT::i16, + DAG.getNode(ISD::OR, MVT::i16, CWD1, CWD2), + DAG.getConstant(1, MVT::i16)), + DAG.getConstant(3, MVT::i16)); + + + return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -4708,31 +5035,20 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SHL_PARTS: case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShift(Op, DAG); - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: return LowerIntegerDivOrRem(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FNEG: return LowerFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::SETCC: return NewCCModeling - ? LowerSETCC_New(Op, DAG) - : LowerSETCC(Op, DAG, DAG.getEntryNode()); - case ISD::SELECT: return NewCCModeling - ? LowerSELECT_New(Op, DAG) - : LowerSELECT(Op, DAG); - case ISD::BRCOND: return NewCCModeling - ? LowerBRCOND_New(Op, DAG) - : LowerBRCOND(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::MEMSET: return LowerMEMSET(Op, DAG); case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); - case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); @@ -4743,8 +5059,22 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); + case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG); + + + // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands. + case ISD::READCYCLECOUNTER: + return SDOperand(ExpandREADCYCLECOUNTER(Op.Val, DAG), 0); + } +} + +/// ExpandOperation - Provide custom lowering hooks for expanding operations. +SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); + case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); } - return SDOperand(); } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -4769,17 +5099,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::CMP: return "X86ISD::CMP"; - case X86ISD::CMP_NEW: return "X86ISD::CMP_NEW"; case X86ISD::COMI: return "X86ISD::COMI"; - case X86ISD::COMI_NEW: return "X86ISD::COMI_NEW"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; - case X86ISD::UCOMI_NEW: return "X86ISD::UCOMI_NEW"; case X86ISD::SETCC: return "X86ISD::SETCC"; - case X86ISD::SETCC_NEW: return "X86ISD::SETCC_NEW"; case X86ISD::CMOV: return "X86ISD::CMOV"; - case X86ISD::CMOV_NEW: return "X86ISD::CMOV_NEW"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; - case X86ISD::BRCOND_NEW: return "X86ISD::BRCOND_NEW"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; @@ -4795,8 +5119,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; - case X86ISD::DIV: return "X86ISD::DIV"; - case X86ISD::IDIV: return "X86ISD::IDIV"; + case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; + case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; } } @@ -4849,6 +5173,27 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, } +bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { + if (!Ty1->isInteger() || !Ty2->isInteger()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return Subtarget->is64Bit() || NumBits1 < 64; +} + +bool X86TargetLowering::isTruncateFree(MVT::ValueType VT1, + MVT::ValueType VT2) const { + if (!MVT::isInteger(VT1) || !MVT::isInteger(VT2)) + return false; + unsigned NumBits1 = MVT::getSizeInBits(VT1); + unsigned NumBits2 = MVT::getSizeInBits(VT2); + if (NumBits1 <= NumBits2) + return false; + return Subtarget->is64Bit() || NumBits1 < 64; +} + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -4898,13 +5243,7 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, case X86::CMOV_FR64: case X86::CMOV_V4F32: case X86::CMOV_V2F64: - case X86::CMOV_V2I64: - - case X86::NEW_CMOV_FR32: - case X86::NEW_CMOV_FR64: - case X86::NEW_CMOV_V4F32: - case X86::NEW_CMOV_V2F64: - case X86::NEW_CMOV_V2I64: { + case X86::CMOV_V2I64: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the @@ -5061,7 +5400,6 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, switch (Opc) { default: break; case X86ISD::SETCC: - case X86ISD::SETCC_NEW: KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); break; } @@ -5077,7 +5415,7 @@ static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { i %= NumElems; if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { return (i == 0) - ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); + ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { SDOperand Idx = PermMask.getOperand(i); if (Idx.getOpcode() == ISD::UNDEF) @@ -5423,7 +5761,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, 0); else if (VT == MVT::i8) return make_vector(X86::AL, X86::DL, X86::CL, X86::BL, 0); - break; + else if (VT == MVT::i64) + return make_vector(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); + break; } }