X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=23f9e9500c2dde606a1cc11adde7835c8aaf3e29;hb=a394117bc0a7a94d99869e2e83ff0647d3aff9a6;hp=111bfb69e2e56422be260a68c11222630713af7b;hpb=fcf1a3de7a54d410678654bdbcd58b5bc924759c;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 111bfb69e2e..23f9e9500c2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/ADT/VectorExtras.h" @@ -32,19 +33,17 @@ #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; -// FIXME: temporary. -static cl::opt EnableFastCC("enable-x86-fastcc", cl::Hidden, - cl::desc("Enable fastcc on X86")); X86TargetLowering::X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) { Subtarget = &TM.getSubtarget(); X86ScalarSSE = Subtarget->hasSSE2(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; + RegInfo = TM.getRegisterInfo(); + // Set up the TargetLowering object. // X86 is weird, it always uses i8 for shift amounts and setcc results. @@ -68,16 +67,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setUseUnderscoreLongJmp(true); } - // Add legal addressing mode scale values. - addLegalAddressScale(8); - addLegalAddressScale(4); - addLegalAddressScale(2); - // Enter the ones which require both scale + index last. These are more - // expensive. - addLegalAddressScale(9); - addLegalAddressScale(5); - addLegalAddressScale(3); - // Set up the register classes. addRegisterClass(MVT::i8, X86::GR8RegisterClass); addRegisterClass(MVT::i16, X86::GR16RegisterClass); @@ -210,10 +199,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } // X86 ret instruction may pop stack. setOperationAction(ISD::RET , MVT::Other, Custom); + if (!Subtarget->is64Bit()) + setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); + // Darwin ABI issue. setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); setOperationAction(ISD::JumpTable , MVT::i32 , Custom); setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); @@ -238,18 +231,36 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) !Subtarget->isTargetCygMing()) setOperationAction(ISD::LABEL, MVT::Other, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + if (Subtarget->is64Bit()) { + // FIXME: Verify + setExceptionPointerRegister(X86::RAX); + setExceptionSelectorRegister(X86::RDX); + } else { + setExceptionPointerRegister(X86::EAX); + setExceptionSelectorRegister(X86::EDX); + } + // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - - // Use the default implementation. setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::VACOPY , MVT::Other, Custom); + else + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + if (Subtarget->isTargetCygMing()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); if (X86ScalarSSE) { // Set up the FP register classes. @@ -283,11 +294,14 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addLegalFPImmediate(+0.0); // xorps / xorpd } else { // Set up the FP register classes. - addRegisterClass(MVT::f64, X86::RFPRegisterClass); + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + addRegisterClass(MVT::f32, X86::RFP32RegisterClass); setOperationAction(ISD::UNDEF, MVT::f64, Expand); + setOperationAction(ISD::UNDEF, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); if (!UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); @@ -295,6 +309,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); addLegalFPImmediate(+0.0); // FLD0 addLegalFPImmediate(+1.0); // FLD1 addLegalFPImmediate(-0.0); // FLD0/FCHS @@ -303,11 +318,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - for (unsigned VT = (unsigned)MVT::Vector + 1; - VT != (unsigned)MVT::LAST_VALUETYPE; VT++) { + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand); setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand); setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand); @@ -320,17 +336,81 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand); } if (Subtarget->hasMMX()) { addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); // FIXME: add MMX packed arithmetics - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand); + + setOperationAction(ISD::ADD, MVT::v8i8, Legal); + setOperationAction(ISD::ADD, MVT::v4i16, Legal); + setOperationAction(ISD::ADD, MVT::v2i32, Legal); + setOperationAction(ISD::ADD, MVT::v1i64, Legal); + + setOperationAction(ISD::SUB, MVT::v8i8, Legal); + setOperationAction(ISD::SUB, MVT::v4i16, Legal); + setOperationAction(ISD::SUB, MVT::v2i32, Legal); + + setOperationAction(ISD::MULHS, MVT::v4i16, Legal); + setOperationAction(ISD::MUL, MVT::v4i16, Legal); + + setOperationAction(ISD::AND, MVT::v8i8, Promote); + AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v4i16, Promote); + AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v2i32, Promote); + AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::AND, MVT::v1i64, Legal); + + setOperationAction(ISD::OR, MVT::v8i8, Promote); + AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v4i16, Promote); + AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v2i32, Promote); + AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::OR, MVT::v1i64, Legal); + + setOperationAction(ISD::XOR, MVT::v8i8, Promote); + AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v4i16, Promote); + AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v2i32, Promote); + AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::XOR, MVT::v1i64, Legal); + + setOperationAction(ISD::LOAD, MVT::v8i8, Promote); + AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v4i16, Promote); + AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v2i32, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v1i64, Legal); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); + + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); } if (Subtarget->hasSSE1()) { @@ -340,6 +420,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v4f32, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); @@ -357,14 +439,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::ADD, MVT::v16i8, Legal); setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); + setOperationAction(ISD::ADD, MVT::v2i64, Legal); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); + setOperationAction(ISD::SUB, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::FADD, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::FNEG, MVT::v2f64, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); @@ -437,16 +523,10 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, getTargetMachine(), RVLocs); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeReturn(Op.Val, RetCC_X86); - // Determine which register each value should be copied into. - for (unsigned i = 0; i != Op.getNumOperands() / 2; ++i) { - MVT::ValueType VT = Op.getOperand(i*2+1).getValueType(); - if (RetCC_X86(i, VT, VT, CCValAssign::Full, - cast(Op.getOperand(i*2+2))->getValue(), - CCInfo)) - assert(0 && "Unhandled result type!"); - } // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -493,7 +573,7 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { MemLoc = DAG.getFrameIndex(SSFI, getPointerTy()); Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0); } - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other); SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())}; Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3); Chain = Value.getValue(1); @@ -521,16 +601,15 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { SDNode *X86TargetLowering:: LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, unsigned CallingConv, SelectionDAG &DAG) { - SmallVector ResultVals; - + + // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, getTargetMachine(), RVLocs); + bool isVarArg = cast(TheCall->getOperand(2))->getValue() != 0; + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); + - for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) { - MVT::ValueType VT = TheCall->getValueType(i); - if (RetCC_X86(i, VT, VT, CCValAssign::Full, 0, CCInfo)) - assert(0 && "Unhandled result type!"); - } + SmallVector ResultVals; // Copy all of the result registers out of their specified physreg. if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) { @@ -545,7 +624,7 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, // before the fp stackifier runs. // Copy ST0 into an RFP register with FP_GET_RESULT. - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag); SDOperand GROps[] = { Chain, InFlag }; SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2); Chain = RetVal.getValue(1); @@ -567,11 +646,6 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0); Chain = RetVal.getValue(1); } - - if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE) - // FIXME: we would really like to remember that this FP_ROUND - // operation is okay to eliminate if we allow excess FP precision. - RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal); ResultVals.push_back(RetVal); } @@ -601,66 +675,6 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, return VReg; } -/// HowToPassArgument - Returns how an formal argument of the specified type -/// should be passed. If it is through stack, returns the size of the stack -/// slot; if it is through integer or XMM register, returns the number of -/// integer or XMM registers are needed. -static void -HowToPassCallArgument(MVT::ValueType ObjectVT, - bool ArgInReg, - unsigned NumIntRegs, unsigned NumXMMRegs, - unsigned MaxNumIntRegs, - unsigned &ObjSize, unsigned &ObjIntRegs, - unsigned &ObjXMMRegs) { - ObjSize = 0; - ObjIntRegs = 0; - ObjXMMRegs = 0; - - if (MaxNumIntRegs>3) { - // We don't have too much registers on ia32! :) - MaxNumIntRegs = 3; - } - - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i8: - if (ArgInReg && (NumIntRegs < MaxNumIntRegs)) - ObjIntRegs = 1; - else - ObjSize = 1; - break; - case MVT::i16: - if (ArgInReg && (NumIntRegs < MaxNumIntRegs)) - ObjIntRegs = 1; - else - ObjSize = 2; - break; - case MVT::i32: - if (ArgInReg && (NumIntRegs < MaxNumIntRegs)) - ObjIntRegs = 1; - else - ObjSize = 4; - break; - case MVT::f32: - ObjSize = 4; - break; - case MVT::f64: - ObjSize = 8; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - if (NumXMMRegs < 4) - ObjXMMRegs = 1; - else - ObjSize = 16; - break; - } -} - SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, bool isStdCall) { unsigned NumArgs = Op.Val->getNumValues() - 1; @@ -669,17 +683,12 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, SDOperand Root = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); - - for (unsigned i = 0; i != NumArgs; ++i) { - MVT::ValueType ArgVT = Op.getValue(i).getValueType(); - unsigned ArgFlags = cast(Op.getOperand(3+i))->getValue(); - if (CC_X86_32_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); - } - + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C); + SmallVector ArgValues; unsigned LastVal = ~0U; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -700,8 +709,8 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, RC = X86::VR128RegisterClass; } - SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT); - AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the @@ -741,10 +750,12 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, BytesToPopOnReturn = StackSize; // Callee pops everything.. BytesCallerReserves = 0; } else { - BytesToPopOnReturn = 0; // Callee pops hidden struct pointer. + BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (NumArgs && (cast(Op.getOperand(3))->getValue() & 4)) + if (NumArgs && + (cast(Op.getOperand(3))->getValue() & + ISD::ParamFlags::StructReturn)) BytesToPopOnReturn = 4; BytesCallerReserves = StackSize; @@ -753,7 +764,8 @@ SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG, RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. ReturnAddrIndex = 0; // No return address slot generated yet. - MF.getInfo()->setBytesToPopOnReturn(BytesToPopOnReturn); + MF.getInfo() + ->setBytesToPopOnReturn(BytesToPopOnReturn); // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), @@ -768,15 +780,10 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); - - for (unsigned i = 0; i != NumOps; ++i) { - MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType(); - unsigned ArgFlags =cast(Op.getOperand(5+2*i+1))->getValue(); - if (CC_X86_32_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); - } + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -821,7 +828,9 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, } // If the first argument is an sret pointer, remember it. - bool isSRet = NumOps &&(cast(Op.getOperand(6))->getValue()&4); + bool isSRet = NumOps && + (cast(Op.getOperand(6))->getValue() & + ISD::ParamFlags::StructReturn); if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, @@ -925,141 +934,84 @@ SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG, // reasons. SDOperand X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) { - unsigned NumArgs = Op.Val->getNumValues()-1; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); - SmallVector ArgValues; - - // Add DAG nodes to load the arguments... On entry to a function the stack - // frame looks like this: - // - // [ESP] -- return address - // [ESP + 4] -- first nonreg argument (leftmost lexically) - // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size - // ... - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - - // Keep track of the number of integer regs passed so far. This can be either - // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX - // are both used). - unsigned NumIntRegs = 0; - unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. - - static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; - - static const unsigned GPRArgRegs[][2] = { - { X86::CL, X86::DL }, - { X86::CX, X86::DX }, - { X86::ECX, X86::EDX } - }; - - static const TargetRegisterClass* GPRClasses[3] = { - X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass - }; + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; - for (unsigned i = 0; i < NumArgs; ++i) { - MVT::ValueType ObjectVT = Op.getValue(i).getValueType(); - unsigned ArgIncrement = 4; - unsigned ObjSize = 0; - unsigned ObjXMMRegs = 0; - unsigned ObjIntRegs = 0; - unsigned Reg = 0; - SDOperand ArgValue; - - HowToPassCallArgument(ObjectVT, - true, // Use as much registers as possible - NumIntRegs, NumXMMRegs, 2, - ObjSize, ObjIntRegs, ObjXMMRegs); + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall); + + SmallVector ArgValues; + unsigned LastVal = ~0U; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + // TODO: If an arg is passed in two places (e.g. reg and stack), skip later + // places. + assert(VA.getValNo() != LastVal && + "Don't support value assigned to multiple locs yet"); + LastVal = VA.getValNo(); - if (ObjSize > 4) - ArgIncrement = ObjSize; - - if (ObjIntRegs || ObjXMMRegs) { - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: { - unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs]; - Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]); - ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); - break; - } - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: { - Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass); - ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT); - break; - } + if (VA.isRegLoc()) { + MVT::ValueType RegVT = VA.getLocVT(); + TargetRegisterClass *RC; + if (RegVT == MVT::i32) + RC = X86::GR32RegisterClass; + else { + assert(MVT::isVector(RegVT)); + RC = X86::VR128RegisterClass; } - NumIntRegs += ObjIntRegs; - NumXMMRegs += ObjXMMRegs; - } - if (ObjSize) { - // XMM arguments have to be aligned on 16-byte boundary. - if (ObjSize == 16) - ArgOffset = ((ArgOffset + 15) / 16) * 16; - // Create the SelectionDAG nodes corresponding to a load from this - // parameter. - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0); - ArgOffset += ArgIncrement; // Move on to the next argument. + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); + + // If this is an 8 or 16-bit value, it is really passed promoted to 32 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); + + ArgValues.push_back(ArgValue); + } else { + assert(VA.isMemLoc()); + + // Create the nodes corresponding to a load from this parameter slot. + int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8, + VA.getLocMemOffset()); + SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0)); } - - ArgValues.push_back(ArgValue); } - + ArgValues.push_back(Root); - // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. - if ((ArgOffset & 7) == 0) - ArgOffset += 4; + unsigned StackSize = CCInfo.getNextStackOffset(); + + if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { + // Make sure the instruction takes 8n+4 bytes to make sure the start of the + // arguments and the arguments after the retaddr has been pushed are aligned. + if ((StackSize & 7) == 0) + StackSize += 4; + } VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only. ReturnAddrIndex = 0; // No return address slot generated yet. - BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments. + BytesToPopOnReturn = StackSize; // Callee pops all stack arguments. BytesCallerReserves = 0; - MF.getInfo()->setBytesToPopOnReturn(BytesToPopOnReturn); - - // Finally, inform the code generator which regs we return values in. - switch (getValueType(MF.getFunction()->getReturnType())) { - default: assert(0 && "Unknown type!"); - case MVT::isVoid: break; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - MF.addLiveOut(X86::EAX); - break; - case MVT::i64: - MF.addLiveOut(X86::EAX); - MF.addLiveOut(X86::EDX); - break; - case MVT::f32: - case MVT::f64: - MF.addLiveOut(X86::ST0); - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - MF.addLiveOut(X86::XMM0); - break; - } + MF.getInfo() + ->setBytesToPopOnReturn(BytesToPopOnReturn); // Return the new list of results. return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), @@ -1070,122 +1022,60 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, unsigned CC) { SDOperand Chain = Op.getOperand(0); bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; + bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; SDOperand Callee = Op.getOperand(4); - unsigned NumOps = (Op.getNumOperands() - 5) / 2; - // Count how many bytes are to be pushed on the stack. - unsigned NumBytes = 0; - - // Keep track of the number of integer regs passed so far. This can be either - // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX - // are both used). - unsigned NumIntRegs = 0; - unsigned NumXMMRegs = 0; // XMM regs used for parameter passing. - - static const unsigned GPRArgRegs[][2] = { - { X86::CL, X86::DL }, - { X86::CX, X86::DX }, - { X86::ECX, X86::EDX } - }; - static const unsigned XMMArgRegs[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - - switch (Arg.getValueType()) { - default: assert(0 && "Unknown value type!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - if (NumIntRegs < 2) { - ++NumIntRegs; - break; - } // Fall through - case MVT::f32: + if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) { + // Make sure the instruction takes 8n+4 bytes to make sure the start of the + // arguments and the arguments after the retaddr has been pushed are aligned. + if ((NumBytes & 7) == 0) NumBytes += 4; - break; - case MVT::f64: - NumBytes += 8; - break; - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - if (NumXMMRegs < 4) - NumXMMRegs++; - else { - // XMM arguments have to be aligned on 16-byte boundary. - NumBytes = ((NumBytes + 15) / 16) * 16; - NumBytes += 16; - } - break; - } } - // Make sure the instruction takes 8n+4 bytes to make sure the start of the - // arguments and the arguments after the retaddr has been pushed are aligned. - if ((NumBytes & 7) == 0) - NumBytes += 4; - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy())); - - // Arguments go on the stack in reverse order, as specified by the ABI. - unsigned ArgOffset = 0; - NumIntRegs = 0; + SmallVector, 8> RegsToPass; SmallVector MemOpChains; - SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy()); - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - - switch (Arg.getValueType()) { - default: assert(0 && "Unexpected ValueType for argument!"); - case MVT::i8: - case MVT::i16: - case MVT::i32: - if (NumIntRegs < 2) { - unsigned RegToUse = - GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs]; - RegsToPass.push_back(std::make_pair(RegToUse, Arg)); - ++NumIntRegs; + + SDOperand StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); break; - } // Fall through - case MVT::f32: { - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); - PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 4; - break; } - case MVT::f64: { - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + if (StackPtr.Val == 0) + StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy()); + SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy()); PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 8; - break; - } - case MVT::v16i8: - case MVT::v8i16: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v4f32: - case MVT::v2f64: - if (NumXMMRegs < 4) { - RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg)); - NumXMMRegs++; - } else { - // XMM arguments have to be aligned on 16-byte boundary. - ArgOffset = ((ArgOffset + 15) / 16) * 16; - SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); - PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - ArgOffset += 16; - } - break; } } @@ -1270,7 +1160,6 @@ SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG, SDOperand X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { - unsigned NumArgs = Op.Val->getNumValues() - 1; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); SDOperand Root = Op.getOperand(0); @@ -1284,16 +1173,12 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - SmallVector ArgLocs; - CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(), - ArgLocs); - for (unsigned i = 0; i != NumArgs; ++i) { - MVT::ValueType ArgVT = Op.getValue(i).getValueType(); - unsigned ArgFlags = cast(Op.getOperand(3+i))->getValue(); - if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); - } + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(MF.getFunction()->getCallingConv(), isVarArg, + getTargetMachine(), ArgLocs); + CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C); SmallVector ArgValues; unsigned LastVal = ~0U; @@ -1318,11 +1203,15 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { RC = X86::FR64RegisterClass; else { assert(MVT::isVector(RegVT)); - RC = X86::VR128RegisterClass; + if (MVT::getSizeInBits(RegVT) == 64) { + RC = X86::GR64RegisterClass; // MMX values are passed in GPRs. + RegVT = MVT::i64; + } else + RC = X86::VR128RegisterClass; } - - SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT); - AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); + SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the @@ -1337,6 +1226,11 @@ X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) { if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue); + // Handle MMX values passed in GPRs. + if (RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass && + MVT::getSizeInBits(RegVT) == 64) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue); + ArgValues.push_back(ArgValue); } else { assert(VA.isMemLoc()); @@ -1415,17 +1309,11 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; bool isTailCall = cast(Op.getOperand(3))->getValue() != 0; SDOperand Callee = Op.getOperand(4); - unsigned NumOps = (Op.getNumOperands() - 5) / 2; - + + // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, getTargetMachine(), ArgLocs); - - for (unsigned i = 0; i != NumOps; ++i) { - MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType(); - unsigned ArgFlags =cast(Op.getOperand(5+2*i+1))->getValue(); - if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) - assert(0 && "Unhandled argument type!"); - } + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1507,11 +1395,13 @@ X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG, if (GlobalAddressSDNode *G = dyn_cast(Callee)) { // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), - getTargetMachine(), true)) + if (getTargetMachine().getCodeModel() != CodeModel::Large + && !Subtarget->GVRequiresExtraLoad(G->getGlobal(), + getTargetMachine(), true)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + if (getTargetMachine().getCodeModel() != CodeModel::Large) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -1769,7 +1659,7 @@ bool X86::isSHUFPMask(SDNode *N) { return ::isSHUFPMask(N->op_begin(), N->getNumOperands()); } -/// isCommutedSHUFP - Returns true if the shuffle mask is except +/// isCommutedSHUFP - Returns true if the shuffle mask is exactly /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. @@ -1930,7 +1820,7 @@ bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR); unsigned NumElems = N->getNumOperands(); - if (NumElems != 4 && NumElems != 8 && NumElems != 16) + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { @@ -1946,6 +1836,29 @@ bool X86::isUNPCKL_v_undef_Mask(SDNode *N) { return true; } +/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form +/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, +/// <2, 2, 3, 3> +bool X86::isUNPCKH_v_undef_Mask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i + 1); + + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } + + return true; +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. @@ -2061,6 +1974,16 @@ bool X86::isMOVSLDUPMask(SDNode *N) { return HasHi; } +/// isIdentityMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a identity operation on the LHS or RHS. +static bool isIdentityMask(SDNode *N, bool RHS = false) { + unsigned NumElems = N->getNumOperands(); + for (unsigned i = 0; i < NumElems; ++i) + if (!isUndefOrEqual(N->getOperand(i), i + (RHS ? NumElems : 0))) + return false; + return true; +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. static bool isSplatMask(SDNode *N) { @@ -2213,7 +2136,7 @@ static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); MVT::ValueType MaskVT = Mask.getValueType(); - MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EltVT = MVT::getVectorElementType(MaskVT); unsigned NumElems = Mask.getNumOperands(); SmallVector MaskVec; @@ -2304,7 +2227,7 @@ static bool isSplatVector(SDNode *N) { /// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved /// to an undef. static bool isUndefShuffle(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) return false; SDOperand V1 = N->getOperand(0); @@ -2324,6 +2247,61 @@ static bool isUndefShuffle(SDNode *N) { return true; } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +static inline bool isZeroNode(SDOperand Elt) { + return ((isa(Elt) && + cast(Elt)->getValue() == 0) || + (isa(Elt) && + cast(Elt)->isExactlyValue(0.0))); +} + +/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved +/// to an zero vector. +static bool isZeroShuffle(SDNode *N) { + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return false; + + SDOperand V1 = N->getOperand(0); + SDOperand V2 = N->getOperand(1); + SDOperand Mask = N->getOperand(2); + unsigned NumElems = Mask.getNumOperands(); + for (unsigned i = 0; i != NumElems; ++i) { + SDOperand Arg = Mask.getOperand(i); + if (Arg.getOpcode() != ISD::UNDEF) { + unsigned Idx = cast(Arg)->getValue(); + if (Idx < NumElems) { + unsigned Opc = V1.Val->getOpcode(); + if (Opc == ISD::UNDEF) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V1.Val->getOperand(Idx))) + return false; + } else if (Idx >= NumElems) { + unsigned Opc = V2.Val->getOpcode(); + if (Opc == ISD::UNDEF) + continue; + if (Opc != ISD::BUILD_VECTOR || + !isZeroNode(V2.Val->getOperand(Idx - NumElems))) + return false; + } + } + } + return true; +} + +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { + assert(MVT::isVector(VT) && "Expected a vector type"); + unsigned NumElems = MVT::getVectorNumElements(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); + bool isFP = MVT::isFloatingPoint(EVT); + SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); + SmallVector ZeroVec(NumElems, Zero); + return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); +} + /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { @@ -2354,7 +2332,7 @@ static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) { /// operation of specified width. static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; MaskVec.push_back(DAG.getConstant(NumElems, BaseVT)); @@ -2367,7 +2345,7 @@ static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) { /// of specified width. static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0, e = NumElems/2; i != e; ++i) { MaskVec.push_back(DAG.getConstant(i, BaseVT)); @@ -2380,7 +2358,7 @@ static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) { /// of specified width. static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); unsigned Half = NumElems/2; SmallVector MaskVec; for (unsigned i = 0; i != Half; ++i) { @@ -2390,18 +2368,6 @@ static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); } -/// getZeroVector - Returns a vector of specified type with all zero elements. -/// -static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) { - assert(MVT::isVector(VT) && "Expected a vector type"); - unsigned NumElems = getVectorNumElements(VT); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); - bool isFP = MVT::isFloatingPoint(EVT); - SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT); - SmallVector ZeroVec(NumElems, Zero); - return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size()); -} - /// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. /// static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { @@ -2423,23 +2389,14 @@ static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDOperand Elt) { - return ((isa(Elt) && - cast(Elt)->getValue() == 0) || - (isa(Elt) && - cast(Elt)->isExactlyValue(0.0))); -} - /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified -/// vector and zero or undef vector. +/// vector of zero or undef vector. static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT, unsigned NumElems, unsigned Idx, bool isZero, SelectionDAG &DAG) { SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); SDOperand Zero = DAG.getConstant(0, EVT); SmallVector MaskVec(NumElems, Zero); MaskVec[Idx] = DAG.getConstant(NumElems, EVT); @@ -2492,7 +2449,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros, return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V); } -/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16. +/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. /// static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, @@ -2531,13 +2488,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { return Op; MVT::ValueType VT = Op.getValueType(); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); unsigned EVTBits = MVT::getSizeInBits(EVT); unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; + unsigned NumNonZeroImms = 0; std::set Values; for (unsigned i = 0; i < NumElems; ++i) { SDOperand Elt = Op.getOperand(i); @@ -2548,13 +2506,21 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { else { NonZeros |= (1 << i); NumNonZero++; + if (Elt.getOpcode() == ISD::Constant || + Elt.getOpcode() == ISD::ConstantFP) + NumNonZeroImms++; } } } - if (NumNonZero == 0) - // Must be a mix of zero and undef. Return a zero vector. - return getZeroVector(VT, DAG); + if (NumNonZero == 0) { + if (NumZero == 0) + // All undef vector. Return an UNDEF. + return DAG.getNode(ISD::UNDEF, VT); + else + // A mix of zero and undef. Return a zero vector. + return getZeroVector(VT, DAG); + } // Splat is obviously ok. Let legalizer expand it to a shuffle. if (Values.size() == 1) @@ -2575,7 +2541,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0, DAG); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT)); @@ -2586,18 +2552,23 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { } } - // Let legalizer expand 2-wide build_vector's. + // A vector full of immediates; various special cases are already + // handled, so this is best done with a single constant-pool load. + if (NumNonZero == NumNonZeroImms) + return SDOperand(); + + // Let legalizer expand 2-wide build_vectors. if (EVTBits == 64) return SDOperand(); // If element VT is < 32 bits, convert it to inserts into a zero vector. - if (EVTBits == 8) { + if (EVTBits == 8 && NumElems == 16) { SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, *this); if (V.Val) return V; } - if (EVTBits == 16) { + if (EVTBits == 16 && NumElems == 8) { SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, *this); if (V.Val) return V; @@ -2644,7 +2615,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0) return V[0]; MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType EVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; bool Reverse = (NonZeros & 0x3) == 2; for (unsigned i = 0; i < 2; ++i) @@ -2700,6 +2671,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { if (isUndefShuffle(Op.Val)) return DAG.getNode(ISD::UNDEF, VT); + if (isZeroShuffle(Op.Val)) + return getZeroVector(VT, DAG); + + if (isIdentityMask(PermMask.Val)) + return V1; + else if (isIdentityMask(PermMask.Val, true)) + return V2; + if (isSplatMask(PermMask.Val)) { if (NumElems <= 4) return Op; // Promote it to a v4i32 splat. @@ -2745,6 +2724,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || + X86::isUNPCKH_v_undef_Mask(PermMask.Val) || X86::isUNPCKLMask(PermMask.Val) || X86::isUNPCKHMask(PermMask.Val)) return Op; @@ -2773,6 +2753,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { // Commute is back and try unpck* again. Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG); if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) || + X86::isUNPCKH_v_undef_Mask(PermMask.Val) || X86::isUNPCKLMask(PermMask.Val) || X86::isUNPCKHMask(PermMask.Val)) return Op; @@ -2789,13 +2770,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return Op; } - if (X86::isSHUFPMask(PermMask.Val)) + if (X86::isSHUFPMask(PermMask.Val) && + MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. return Op; // Handle v8i16 shuffle high / low shuffle node pair. if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) { MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; for (unsigned i = 0; i != 4; ++i) MaskVec.push_back(PermMask.getOperand(i)); @@ -2826,9 +2808,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } } - if (NumElems == 4) { + if (NumElems == 4 && + // Don't do this for MMX. + MVT::getSizeInBits(VT) != 64) { MVT::ValueType MaskVT = PermMask.getValueType(); - MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); SmallVector, 8> Locs; Locs.reserve(NumElems); SmallVector Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT)); @@ -2953,10 +2937,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // SHUFPS the element to the lowest double word, then movss. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -2974,8 +2958,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // to a f64mem, the whole operation is folded into a single MOVHPDmr. MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); SmallVector IdxVec; - IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT))); - IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT))); + IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT))); + IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT))); SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &IdxVec[0], IdxVec.size()); Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(), @@ -2992,7 +2976,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. MVT::ValueType VT = Op.getValueType(); - MVT::ValueType BaseVT = MVT::getVectorBaseType(VT); + MVT::ValueType BaseVT = MVT::getVectorElementType(VT); SDOperand N0 = Op.getOperand(0); SDOperand N1 = Op.getOperand(1); SDOperand N2 = Op.getOperand(2); @@ -3000,7 +2984,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1); if (N2.getValueType() != MVT::i32) - N2 = DAG.getConstant(cast(N2)->getValue(), MVT::i32); + N2 = DAG.getConstant(cast(N2)->getValue(),getPointerTy()); return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2); } else if (MVT::getSizeInBits(BaseVT) == 32) { unsigned Idx = cast(N2)->getValue(); @@ -3008,7 +2992,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) { // Use a movss. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1); MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4); - MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT); SmallVector MaskVec; MaskVec.push_back(DAG.getConstant(4, BaseVT)); for (unsigned i = 1; i <= 3; ++i) @@ -3099,6 +3083,81 @@ X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { return Result; } +// Lower ISD::GlobalTLSAddress using the "general dynamic" model +static SDOperand +LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT::ValueType PtrVT) { + SDOperand InFlag; + SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + PtrVT), InFlag); + InFlag = Chain.getValue(1); + + // emit leal symbol@TLSGD(,%ebx,1), %eax + SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag); + SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + SDOperand Ops[] = { Chain, TGA, InFlag }; + SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3); + InFlag = Result.getValue(2); + Chain = Result.getValue(1); + + // call ___tls_get_addr. This function receives its argument in + // the register EAX. + Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag); + InFlag = Chain.getValue(1); + + NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDOperand Ops1[] = { Chain, + DAG.getTargetExternalSymbol("___tls_get_addr", + PtrVT), + DAG.getRegister(X86::EAX, PtrVT), + DAG.getRegister(X86::EBX, PtrVT), + InFlag }; + Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5); + InFlag = Chain.getValue(1); + + return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag); +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or +// "local exec" model. +static SDOperand +LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const MVT::ValueType PtrVT) { + // Get the Thread Pointer + SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT); + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial + // exec) + SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + GA->getValueType(0), + GA->getOffset()); + SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA); + + if (GA->getGlobal()->isDeclaration()) // initial exec TLS model + Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0); + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); +} + +SDOperand +X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() && + "TLS not implemented for non-ELF and 64-bit targets"); + GlobalAddressSDNode *GA = cast(Op); + // If the relocation model is PIC, use the "General Dynamic" TLS Model, + // otherwise use the "Local Exec"TLS Model + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy()); + else + return LowerToTLSExecModel(GA, DAG, getPointerTy()); +} + SDOperand X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) { const char *Sym = cast(Op)->getSymbol(); @@ -3217,7 +3276,7 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); else - Tys = DAG.getVTList(MVT::f64, MVT::Other); + Tys = DAG.getVTList(Op.getValueType(), MVT::Other); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(StackSlot); @@ -3272,7 +3331,7 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { if (X86ScalarSSE) { assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); - SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other); + SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDOperand Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) }; @@ -3292,16 +3351,21 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); + MVT::ValueType EltVT = VT; + if (MVT::isVector(VT)) + EltVT = MVT::getVectorElementType(VT); + const Type *OpNTy = MVT::getTypeForValueType(EltVT); std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))); + CV.push_back(C); + CV.push_back(C); } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31)))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + Constant *C = ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); } Constant *CS = ConstantStruct::get(CV); SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); @@ -3316,26 +3380,42 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) { SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); - const Type *OpNTy = MVT::getTypeForValueType(VT); + MVT::ValueType EltVT = VT; + unsigned EltNum = 1; + if (MVT::isVector(VT)) { + EltVT = MVT::getVectorElementType(VT); + EltNum = MVT::getVectorNumElements(VT); + } + const Type *OpNTy = MVT::getTypeForValueType(EltVT); std::vector CV; - if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + if (EltVT == MVT::f64) { + Constant *C = ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)); + CV.push_back(C); + CV.push_back(C); } else { - CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31))); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); - CV.push_back(ConstantFP::get(OpNTy, 0.0)); + Constant *C = ConstantFP::get(OpNTy, BitsToFloat(1U << 31)); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); + CV.push_back(C); } Constant *CS = ConstantStruct::get(CV); SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SmallVector Ops; - Ops.push_back(DAG.getEntryNode()); - Ops.push_back(CPIdx); - Ops.push_back(DAG.getSrcValue(NULL)); - SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); - return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); + if (MVT::isVector(VT)) { + SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0); + return DAG.getNode(ISD::BIT_CONVERT, VT, + DAG.getNode(ISD::XOR, MVT::v2i64, + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)), + DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask))); + } else { + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SmallVector Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(CPIdx); + Ops.push_back(DAG.getSrcValue(NULL)); + SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask); + } } SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { @@ -3562,6 +3642,48 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { } } + +// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. +// Calls to _alloca is needed to probe the stack when allocating more than 4k +// bytes in one go. Touching the stack at 4K increments is necessary to ensure +// that the guard pages used by the OS virtual memory manager are allocated in +// correct sequence. +SDOperand +X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, + SelectionDAG &DAG) { + assert(Subtarget->isTargetCygMing() && + "This should be used only on Cygwin/Mingw targets"); + + // Get the inputs. + SDOperand Chain = Op.getOperand(0); + SDOperand Size = Op.getOperand(1); + // FIXME: Ensure alignment here + + SDOperand Flag; + + MVT::ValueType IntPtr = getPointerTy(); + MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32); + + Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDOperand Ops[] = { Chain, + DAG.getTargetExternalSymbol("_alloca", IntPtr), + DAG.getRegister(X86::EAX, IntPtr), + Flag }; + Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4); + Flag = Chain.getValue(1); + + Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1); + + std::vector Tys; + Tys.push_back(SPTy); + Tys.push_back(MVT::Other); + SDOperand Ops1[2] = { Chain.getValue(0), Chain }; + return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); +} + SDOperand X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); @@ -3569,7 +3691,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { if (Fn->hasExternalLinkage() && Subtarget->isTargetCygMing() && Fn->getName() == "main") - MF.getInfo()->setForceFramePointer(true); + MF.getInfo()->setForceFramePointer(true); unsigned CC = cast(Op.getOperand(1))->getValue(); if (Subtarget->is64Bit()) @@ -3585,10 +3707,10 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { case CallingConv::C: return LowerCCCArguments(Op, DAG); case CallingConv::X86_StdCall: - MF.getInfo()->setDecorationStyle(StdCall); + MF.getInfo()->setDecorationStyle(StdCall); return LowerCCCArguments(Op, DAG, true); case CallingConv::X86_FastCall: - MF.getInfo()->setDecorationStyle(FastCall); + MF.getInfo()->setDecorationStyle(FastCall); return LowerFastCCArguments(Op, DAG); } } @@ -3611,16 +3733,10 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { TargetLowering::ArgListEntry Entry; Entry.Node = Op.getOperand(1); Entry.Ty = IntPtrTy; - Entry.isSigned = false; - Entry.isInReg = false; - Entry.isSRet = false; Args.push_back(Entry); // Extend the unsigned i8 argument to be an int value for the call. Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); Entry.Ty = IntPtrTy; - Entry.isSigned = false; - Entry.isInReg = false; - Entry.isSRet = false; Args.push_back(Entry); Entry.Node = Op.getOperand(3); Args.push_back(Entry); @@ -3771,9 +3887,6 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = getTargetData()->getIntPtrType(); - Entry.isSigned = false; - Entry.isInReg = false; - Entry.isSRet = false; Entry.Node = Op.getOperand(1); Args.push_back(Entry); Entry.Node = Op.getOperand(2); Args.push_back(Entry); Entry.Node = Op.getOperand(3); Args.push_back(Entry); @@ -3977,6 +4090,33 @@ SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size()); } +SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) { + // X86-64 va_list is a struct { i32, i32, i8*, i8* }. + SDOperand Chain = Op.getOperand(0); + SDOperand DstPtr = Op.getOperand(1); + SDOperand SrcPtr = Op.getOperand(2); + SrcValueSDNode *DstSV = cast(Op.getOperand(3)); + SrcValueSDNode *SrcSV = cast(Op.getOperand(4)); + + SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr, + SrcSV->getValue(), SrcSV->getOffset()); + Chain = SrcPtr.getValue(1); + for (unsigned i = 0; i < 3; ++i) { + SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr, + SrcSV->getValue(), SrcSV->getOffset()); + Chain = Val.getValue(1); + Chain = DAG.getStore(Chain, Val, DstPtr, + DstSV->getValue(), DstSV->getOffset()); + if (i == 2) + break; + SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, + DAG.getConstant(8, getPointerTy())); + DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, + DAG.getConstant(8, getPointerTy())); + } + return Chain; +} + SDOperand X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(0))->getValue(); @@ -4109,6 +4249,39 @@ SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { DAG.getConstant(4, getPointerTy())); } +SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op, + SelectionDAG &DAG) { + // Is not yet supported on x86-64 + if (Subtarget->is64Bit()) + return SDOperand(); + + return DAG.getConstant(8, getPointerTy()); +} + +SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG) +{ + assert(!Subtarget->is64Bit() && + "Lowering of eh_return builtin is not supported yet on x86-64"); + + MachineFunction &MF = DAG.getMachineFunction(); + SDOperand Chain = Op.getOperand(0); + SDOperand Offset = Op.getOperand(1); + SDOperand Handler = Op.getOperand(2); + + SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF), + getPointerTy()); + + SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame, + DAG.getConstant(-4UL, getPointerTy())); + StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset); + Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0); + Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr); + MF.addLiveOut(X86::ECX); + + return DAG.getNode(X86ISD::EH_RETURN, MVT::Other, + Chain, DAG.getRegister(X86::ECX, getPointerTy())); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -4121,6 +4294,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::SHL_PARTS: case ISD::SRA_PARTS: @@ -4141,9 +4315,14 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLCECOUNTER(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::FRAME_TO_ARGS_OFFSET: + return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); } return SDOperand(); } @@ -4187,26 +4366,59 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; + case X86ISD::FRCP: return "X86ISD::FRCP"; + case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::THREAD_POINTER: return "X86ISD::THREAD_POINTER"; + case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; } } -/// isLegalAddressImmediate - Return true if the integer value or -/// GlobalValue can be used as the offset of the target addressing mode. -bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const { - // X86 allows a sign-extended 32-bit immediate field. - return (V > -(1LL << 32) && V < (1LL << 32)-1); -} - -bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { - // In 64-bit mode, GV is 64-bit so it won't fit in the 32-bit displacement - // field unless we are in small code model. - if (Subtarget->is64Bit() && - getTargetMachine().getCodeModel() != CodeModel::Small) +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // X86 supports extremely general addressing modes. + + // X86 allows a sign-extended 32-bit immediate field as a displacement. + if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) + return false; + + if (AM.BaseGV) { + // X86-64 only supports addr of globals in small code model. + if (Subtarget->is64Bit() && + getTargetMachine().getCodeModel() != CodeModel::Small) + return false; + + // We can only fold this if we don't need a load either. + if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) + return false; + } + + switch (AM.Scale) { + case 0: + case 1: + case 2: + case 4: + case 8: + // These scales always work. + break; + case 3: + case 5: + case 9: + // These scales are formed with basereg+scalereg. Only accept if there is + // no basereg yet. + if (AM.HasBaseReg) + return false; + break; + default: // Other stuff never works. return false; + } - return (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false)); + return true; } + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -4216,11 +4428,14 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { // Only do shuffles on 128-bit vector types for now. if (MVT::getSizeInBits(VT) == 64) return false; return (Mask.Val->getNumOperands() <= 4 || + isIdentityMask(Mask.Val) || + isIdentityMask(Mask.Val, true) || isSplatMask(Mask.Val) || isPSHUFHW_PSHUFLWMask(Mask.Val) || X86::isUNPCKLMask(Mask.Val) || + X86::isUNPCKHMask(Mask.Val) || X86::isUNPCKL_v_undef_Mask(Mask.Val) || - X86::isUNPCKHMask(Mask.Val)); + X86::isUNPCKH_v_undef_Mask(Mask.Val)); } bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, @@ -4309,9 +4524,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, return BB; } - case X86::FP_TO_INT16_IN_MEM: - case X86::FP_TO_INT32_IN_MEM: - case X86::FP_TO_INT64_IN_MEM: { + case X86::FP32_TO_INT16_IN_MEM: + case X86::FP32_TO_INT32_IN_MEM: + case X86::FP32_TO_INT64_IN_MEM: + case X86::FP64_TO_INT16_IN_MEM: + case X86::FP64_TO_INT32_IN_MEM: + case X86::FP64_TO_INT64_IN_MEM: { // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); @@ -4338,9 +4556,12 @@ X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, unsigned Opc; switch (MI->getOpcode()) { default: assert(0 && "illegal opcode!"); - case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break; - case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break; - case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break; + case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; + case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; + case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; + case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; + case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; + case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; } X86AddressMode AM; @@ -4384,6 +4605,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, uint64_t Mask, uint64_t &KnownZero, uint64_t &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || @@ -4412,11 +4634,11 @@ static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) { i %= NumElems; if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) { return (i == 0) - ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); + ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) { SDOperand Idx = PermMask.getOperand(i); if (Idx.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT)); + return DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(VT)); return getShuffleScalarElt(V.Val,cast(Idx)->getValue(),DAG); } return SDOperand(); @@ -4463,8 +4685,8 @@ static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size, if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; - int FI = dyn_cast(Loc)->getIndex(); - int BFI = dyn_cast(BaseLoc)->getIndex(); + int FI = cast(Loc)->getIndex(); + int BFI = cast(BaseLoc)->getIndex(); int FS = MFI->getObjectSize(FI); int BFS = MFI->getObjectSize(BFI); if (FS != BFS || FS != Size) return false; @@ -4491,7 +4713,7 @@ static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI, return (GV->getAlignment() >= 16 && (Offset % 16) == 0); else { assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!"); - int BFI = dyn_cast(Base)->getIndex(); + int BFI = cast(Base)->getIndex(); if (BFI < 0) // Fixed objects do not specify alignment, however the offsets are known. return ((Subtarget->getStackAlignment() % 16) == 0 && @@ -4512,7 +4734,7 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MVT::ValueType VT = N->getValueType(0); - MVT::ValueType EVT = MVT::getVectorBaseType(VT); + MVT::ValueType EVT = MVT::getVectorElementType(VT); SDOperand PermMask = N->getOperand(2); int NumElems = (int)PermMask.getNumOperands(); SDNode *Base = NULL; @@ -4644,19 +4866,23 @@ SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType -X86TargetLowering::getConstraintType(char ConstraintLetter) const { - switch (ConstraintLetter) { - case 'A': - case 'r': - case 'R': - case 'l': - case 'q': - case 'Q': - case 'x': - case 'Y': - return C_RegisterClass; - default: return TargetLowering::getConstraintType(ConstraintLetter); +X86TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'A': + case 'r': + case 'R': + case 'l': + case 'q': + case 'Q': + case 'x': + case 'Y': + return C_RegisterClass; + default: + break; + } } + return TargetLowering::getConstraintType(Constraint); } /// isOperandValidForConstraint - Return the specified operand (possibly @@ -4666,67 +4892,76 @@ SDOperand X86TargetLowering:: isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) { switch (Constraint) { default: break; - case 'i': + case 'I': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getValue() <= 31) + return DAG.getTargetConstant(C->getValue(), Op.getValueType()); + } + return SDOperand(0,0); + case 'N': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getValue() <= 255) + return DAG.getTargetConstant(C->getValue(), Op.getValueType()); + } + return SDOperand(0,0); + case 'i': { // Literal immediates are always ok. - if (isa(Op)) return Op; + if (ConstantSDNode *CST = dyn_cast(Op)) + return DAG.getTargetConstant(CST->getValue(), Op.getValueType()); - // If we are in non-pic codegen mode, we allow the address of a global to - // be used with 'i'. - if (GlobalAddressSDNode *GA = dyn_cast(Op)) { - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) + // If we are in non-pic codegen mode, we allow the address of a global (with + // an optional displacement) to be used with 'i'. + GlobalAddressSDNode *GA = dyn_cast(Op); + int64_t Offset = 0; + + // Match either (GA) or (GA+C) + if (GA) { + Offset = GA->getOffset(); + } else if (Op.getOpcode() == ISD::ADD) { + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + GA = dyn_cast(Op.getOperand(0)); + if (C && GA) { + Offset = GA->getOffset()+C->getValue(); + } else { + C = dyn_cast(Op.getOperand(1)); + GA = dyn_cast(Op.getOperand(0)); + if (C && GA) + Offset = GA->getOffset()+C->getValue(); + else + C = 0, GA = 0; + } + } + + if (GA) { + // If addressing this global requires a load (e.g. in PIC mode), we can't + // match. + if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), + false)) return SDOperand(0, 0); - if (GA->getOpcode() != ISD::TargetGlobalAddress) - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - GA->getOffset()); + Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + Offset); return Op; } // Otherwise, not valid for this mode. return SDOperand(0, 0); } + } return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG); } - std::vector X86TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const { if (Constraint.size() == 1) { // FIXME: not handling fp-stack yet! - // FIXME: not handling MMX registers yet ('y' constraint). switch (Constraint[0]) { // GCC X86 Constraint Letters default: break; // Unknown constraint letter case 'A': // EAX/EDX if (VT == MVT::i32 || VT == MVT::i64) return make_vector(X86::EAX, X86::EDX, 0); break; - case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS - if (VT == MVT::i64 && Subtarget->is64Bit()) - return make_vector(X86::RAX, X86::RDX, X86::RCX, X86::RBX, - X86::RSI, X86::RDI, X86::RBP, X86::RSP, - X86::R8, X86::R9, X86::R10, X86::R11, - X86::R12, X86::R13, X86::R14, X86::R15, 0); - if (VT == MVT::i32) - return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0); - else if (VT == MVT::i16) - return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::BP, X86::SP, 0); - else if (VT == MVT::i8) - return make_vector(X86::AL, X86::DL, X86::CL, X86::BL, 0); - break; - case 'l': // INDEX_REGS - if (VT == MVT::i32) - return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, - X86::ESI, X86::EDI, X86::EBP, 0); - else if (VT == MVT::i16) - return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, - X86::SI, X86::DI, X86::BP, 0); - else if (VT == MVT::i8) - return make_vector(X86::AL, X86::DL, X86::CL, X86::DL, 0); - break; case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) case 'Q': // Q_REGS if (VT == MVT::i32) @@ -4736,18 +4971,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, else if (VT == MVT::i8) return make_vector(X86::AL, X86::DL, X86::CL, X86::DL, 0); break; - case 'x': // SSE_REGS if SSE1 allowed - if (Subtarget->hasSSE1()) - return make_vector(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, - 0); - return std::vector(); - case 'Y': // SSE_REGS if SSE2 allowed - if (Subtarget->hasSSE2()) - return make_vector(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, - X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7, - 0); - return std::vector(); } } @@ -4757,6 +4980,56 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, std::pair X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, MVT::ValueType VT) const { + // First, see if this is a constraint that directly corresponds to an LLVM + // register class. + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'r': // GENERAL_REGS + case 'R': // LEGACY_REGS + case 'l': // INDEX_REGS + if (VT == MVT::i64 && Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR64RegisterClass); + if (VT == MVT::i32) + return std::make_pair(0U, X86::GR32RegisterClass); + else if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16RegisterClass); + else if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8RegisterClass); + break; + case 'y': // MMX_REGS if MMX allowed. + if (!Subtarget->hasMMX()) break; + return std::make_pair(0U, X86::VR64RegisterClass); + break; + case 'Y': // SSE_REGS if SSE2 allowed + if (!Subtarget->hasSSE2()) break; + // FALL THROUGH. + case 'x': // SSE_REGS if SSE1 allowed + if (!Subtarget->hasSSE1()) break; + + switch (VT) { + default: break; + // Scalar SSE types. + case MVT::f32: + case MVT::i32: + return std::make_pair(0U, X86::FR32RegisterClass); + case MVT::f64: + case MVT::i64: + return std::make_pair(0U, X86::FR64RegisterClass); + // Vector types. + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + return std::make_pair(0U, X86::VR128RegisterClass); + } + break; + } + } + // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res;