X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=f65167bd8877862f5c8834d42cf5e345be645b69;hb=48c1bc2ace6481d3272ab5c18e1f19352c563be8;hp=3401a2c4d35872b8b1fba51549135a4086cd8fee;hpb=014278e6a11fa0767853b831e5bf51b95bf541c5;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3401a2c4d35..f65167bd887 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -297,10 +297,20 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB_8, MVT::i8, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB_8 , MVT::i8, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); + + if (!Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD_ADD_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_AND_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_OR_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_XOR_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_NAND_64, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP_64, MVT::i64, Custom); + } // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); @@ -494,20 +504,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FPOW , MVT::f64 , Expand); setOperationAction(ISD::FPOW , MVT::f80 , Expand); - setOperationAction(ISD::FLOG, MVT::f32, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f80, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f80, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f80, Expand); - setOperationAction(ISD::FEXP, MVT::f32, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); setOperationAction(ISD::FEXP, MVT::f80, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f80, Expand); // First set operation action for all vector types to expand. Then we @@ -890,7 +890,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue TargetAddress = TailCall.getOperand(1); SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && - (cast(TargetAddress)->getReg() == X86::ECX || + (cast(TargetAddress)->getReg() == X86::EAX || cast(TargetAddress)->getReg() == X86::R9)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && @@ -1098,8 +1098,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_32_TailCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else @@ -1605,7 +1603,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { // Note: The actual moving to ecx is done further down. GlobalAddressSDNode *G = dyn_cast(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && + if (G && !G->getGlobal()->hasHiddenVisibility() && !G->getGlobal()->hasProtectedVisibility()) Callee = LowerGlobalAddress(Callee, DAG); else if (isa(Callee)) @@ -1700,7 +1698,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::ECX; + unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; Chain = DAG.getCopyToReg(Chain, DAG.getRegister(Opc, getPointerTy()), @@ -1878,12 +1876,13 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, FastISel * X86TargetLowering::createFastISel(MachineFunction &mf, + MachineModuleInfo *mmo, DenseMap &vm, DenseMap &bm, DenseMap &am) { - return X86::createFastISel(mf, vm, bm, am); + return X86::createFastISel(mf, mmo, vm, bm, am); } @@ -2527,6 +2526,21 @@ bool X86::isSplatLoMask(SDNode *N) { return true; } +/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVDDUP. +bool X86::isMOVDDUPMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned e = N->getNumOperands() / 2; + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(i), i)) + return false; + for (unsigned i = 0; i < e; ++i) + if (!isUndefOrEqual(N->getOperand(e+i), i)) + return false; + return true; +} + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. @@ -2694,15 +2708,14 @@ static bool ShouldXformToMOVHLPS(SDNode *Mask) { /// is promoted to a vector. It also returns the LoadSDNode by reference if /// required. static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { - if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { - N = N->getOperand(0).getNode(); - if (ISD::isNON_EXTLoad(N)) { - if (LD) - *LD = cast(N); - return true; - } - } - return false; + if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) + return false; + N = N->getOperand(0).getNode(); + if (!ISD::isNON_EXTLoad(N)) + return false; + if (LD) + *LD = cast(N); + return true; } /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to @@ -2954,6 +2967,46 @@ static SDValue PromoteSplat(SDValue Op, SelectionDAG &DAG, bool HasSSE2) { return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } +/// isVectorLoad - Returns true if the node is a vector load, a scalar +/// load that's promoted to vector, or a load bitcasted. +static bool isVectorLoad(SDValue Op) { + assert(Op.getValueType().isVector() && "Expected a vector type"); + if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR || + Op.getOpcode() == ISD::BIT_CONVERT) { + return isa(Op.getOperand(0)); + } + return isa(Op); +} + + +/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64. +/// +static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask, + SelectionDAG &DAG, bool HasSSE3) { + // If we have sse3 and shuffle has more than one use or input is a load, then + // use movddup. Otherwise, use movlhps. + bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1)); + MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32; + MVT VT = Op.getValueType(); + if (VT == PVT) + return Op; + unsigned NumElems = PVT.getVectorNumElements(); + if (NumElems == 2) { + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst); + } else { + assert(NumElems == 4); + SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32); + SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32); + Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1); + } + + V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); + SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, + DAG.getNode(ISD::UNDEF, PVT), Mask); + return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); +} + /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified /// vector of zero or undef vector. This produces a shuffle where the low /// element of V2 is swizzled into the zero/undef vector, landing at element @@ -3899,6 +3952,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { else if (isIdentityMask(PermMask.getNode(), true)) return V2; + // Canonicalize movddup shuffles. + if (V2IsUndef && Subtarget->hasSSE2() && + X86::isMOVDDUPMask(PermMask.getNode())) + return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3()); + if (isSplatMask(PermMask.getNode())) { if (isMMX || NumElems < 4) return Op; // Promote it to a v4{if}32 splat. @@ -4311,8 +4369,8 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { - GlobalValue *GV = cast(Op)->getGlobal(); +X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, + SelectionDAG &DAG) const { SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy()); Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. @@ -4335,6 +4393,12 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { return Result; } +SDValue +X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { + const GlobalValue *GV = cast(Op)->getGlobal(); + return LowerGlobalAddress(GV, DAG); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, @@ -4965,7 +5029,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && !isScalarFPTypeInSSEReg(VT)) // FPStack? - IllegalFPCMov = !hasFPCMov(cast(CC)->getSignExtended()); + IllegalFPCMov = !hasFPCMov(cast(CC)->getSExtValue()); if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || @@ -5074,15 +5138,16 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - const Value *DstSV, uint64_t DstSVOff) { + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + const Value *DstSV, + uint64_t DstSVOff) { ConstantSDNode *ConstantSize = dyn_cast(Size); - /// If not DWORD aligned or size is more than the threshold, call the library. - /// The libc version is likely to be faster for these cases. It can use the - /// address value and run time information about the CPU. + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > @@ -5091,8 +5156,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); - if (const char *bzeroEntry = - V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { MVT IntPtr = getPointerTy(); const Type *IntPtrTy = TD->getIntPtrType(); TargetLowering::ArgListTy Args; @@ -5103,9 +5169,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, Entry.Node = Size; Args.push_back(Entry); std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol(bzeroEntry, IntPtr), - Args, DAG); + LowerCallTo(Chain, Type::VoidTy, false, false, false, false, + CallingConv::C, false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG); return CallResult.second; } @@ -5591,13 +5657,15 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { - // Depths > 0 not supported yet! - if (cast(Op.getOperand(0))->getZExtValue() > 0) - return SDValue(); - - SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); - return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, - DAG.getIntPtrConstant(TD->getPointerSize())); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + MVT VT = Op.getValueType(); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; } SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, @@ -5703,7 +5771,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, // Check that ECX wasn't needed by an 'inreg' parameter. const FunctionType *FTy = Func->getFunctionType(); - const PAListPtr &Attrs = Func->getParamAttrs(); + const AttrListPtr &Attrs = Func->getAttributes(); if (!Attrs.isEmpty() && !Func->isVarArg()) { unsigned InRegCount = 0; @@ -5711,7 +5779,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.paramHasAttr(Idx, ParamAttr::InReg)) + if (Attrs.paramHasAttr(Idx, Attribute::InReg)) // FIXME: should only count parameters that are lowered to integers. InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; @@ -5789,7 +5857,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other, - DAG.getEntryNode(), StackSlot); + DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0); @@ -5898,10 +5966,10 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), - Op.getOperand(1), - Op.getOperand(3), - DAG.getTargetConstant(size, MVT::i8), - cpIn.getValue(1) }; + Op.getOperand(1), + Op.getOperand(3), + DAG.getTargetConstant(size, MVT::i8), + cpIn.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5); SDValue cpOut = @@ -5932,8 +6000,8 @@ SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), - Op->getOperand(1), - swapInH.getValue(1)}; + Op->getOperand(1), + swapInH.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32, @@ -5946,18 +6014,43 @@ SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op, return DAG.getMergeValues(Vals, 2).getNode(); } -SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op, - SelectionDAG &DAG) { - MVT T = Op->getValueType(0); +SDValue X86TargetLowering::LowerATOMIC_BINARY_64(SDValue Op, + SelectionDAG &DAG, + unsigned NewOp) { + SDNode *Node = Op.getNode(); + MVT T = Node->getValueType(0); + assert (T == MVT::i64 && "Only know how to expand i64 atomics"); + + SDValue Chain = Node->getOperand(0); + SDValue In1 = Node->getOperand(1); + assert(Node->getOperand(2).getNode()->getOpcode()==ISD::BUILD_PAIR); + SDValue In2L = Node->getOperand(2).getNode()->getOperand(0); + SDValue In2H = Node->getOperand(2).getNode()->getOperand(1); + SDValue Ops[] = { Chain, In1, In2L, In2H }; + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue Result = DAG.getNode(NewOp, Tys, Ops, 4); + SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; + SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2); + SDValue Vals[2] = { ResultVal, Result.getValue(2) }; + return SDValue(DAG.getMergeValues(Vals, 2).getNode(), 0); +} + +SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + MVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, T, - DAG.getConstant(0, T), Op->getOperand(2)); - return DAG.getAtomic((T==MVT::i8 ? ISD::ATOMIC_LOAD_ADD_8: - T==MVT::i16 ? ISD::ATOMIC_LOAD_ADD_16: - T==MVT::i32 ? ISD::ATOMIC_LOAD_ADD_32: - T==MVT::i64 ? ISD::ATOMIC_LOAD_ADD_64: 0), - Op->getOperand(0), Op->getOperand(1), negOp, - cast(Op)->getSrcValue(), - cast(Op)->getAlignment()).getNode(); + DAG.getConstant(0, T), Node->getOperand(2)); + return DAG.getAtomic((Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_8 ? + ISD::ATOMIC_LOAD_ADD_8 : + Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_16 ? + ISD::ATOMIC_LOAD_ADD_16 : + Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_32 ? + ISD::ATOMIC_LOAD_ADD_32 : + ISD::ATOMIC_LOAD_ADD_64), + Node->getOperand(0), + Node->getOperand(1), negOp, + cast(Node)->getSrcValue(), + cast(Node)->getAlignment()); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -5965,10 +6058,27 @@ SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op, SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Should not custom lower this!"); - case ISD::ATOMIC_CMP_SWAP_8: return LowerCMP_SWAP(Op,DAG); - case ISD::ATOMIC_CMP_SWAP_16: return LowerCMP_SWAP(Op,DAG); - case ISD::ATOMIC_CMP_SWAP_32: return LowerCMP_SWAP(Op,DAG); + case ISD::ATOMIC_CMP_SWAP_8: + case ISD::ATOMIC_CMP_SWAP_16: + case ISD::ATOMIC_CMP_SWAP_32: case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG); + case ISD::ATOMIC_LOAD_SUB_8: + case ISD::ATOMIC_LOAD_SUB_16: + case ISD::ATOMIC_LOAD_SUB_32: return LowerLOAD_SUB(Op,DAG); + case ISD::ATOMIC_LOAD_SUB_64: return (Subtarget->is64Bit()) ? + LowerLOAD_SUB(Op,DAG) : + LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMSUB64_DAG); + case ISD::ATOMIC_LOAD_AND_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMAND64_DAG); + case ISD::ATOMIC_LOAD_OR_64: return LowerATOMIC_BINARY_64(Op, DAG, + X86ISD::ATOMOR64_DAG); + case ISD::ATOMIC_LOAD_XOR_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMXOR64_DAG); + case ISD::ATOMIC_LOAD_NAND_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMNAND64_DAG); + case ISD::ATOMIC_LOAD_ADD_64: return LowerATOMIC_BINARY_64(Op,DAG, + X86ISD::ATOMADD64_DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); @@ -6023,10 +6133,6 @@ SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG); - case ISD::ATOMIC_LOAD_SUB_8: return ExpandATOMIC_LOAD_SUB(N,DAG); - case ISD::ATOMIC_LOAD_SUB_16: return ExpandATOMIC_LOAD_SUB(N,DAG); - case ISD::ATOMIC_LOAD_SUB_32: return ExpandATOMIC_LOAD_SUB(N,DAG); - case ISD::ATOMIC_LOAD_SUB_64: return ExpandATOMIC_LOAD_SUB(N,DAG); } } @@ -6078,6 +6184,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; + case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; + case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; + case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; + case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; + case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; + case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -6303,6 +6415,146 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, return nextMBB; } +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpcL, + unsigned regOpcH, + unsigned immOpcL, + unsigned immOpcH, + bool invSrc) { + // For the atomic bitwise operator, we generate + // thisMBB (instructions are in pairs, except cmpxchg8b) + // ld t1,t2 = [bitinstr.addr] + // newMBB: + // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) + // op t5, t6 <- out1, out2, [bitinstr.val] + // mov ECX, EBX <- t5, t6 + // mov EAX, EDX <- t1, t2 + // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] + // mov t3, t4 <- EAX, EDX + // bz newMBB + // result in out1, out2 + // fallthrough -->nextMBB + + const TargetRegisterClass *RC = X86::GR32RegisterClass; + const unsigned LoadOpc = X86::MOV32rm; + const unsigned copyOpc = X86::MOV32rr; + const unsigned NotOpc = X86::NOT32r; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, newMBB); + F->insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + // There are 8 "real" operands plus 9 implicit def/uses, ignored here. + assert(bInstr->getNumOperands() < 18 && "unexpected number of operands"); + MachineOperand& dest1Oper = bInstr->getOperand(0); + MachineOperand& dest2Oper = bInstr->getOperand(1); + MachineOperand* argOpers[6]; + for (int i=0; i < 6; ++i) + argOpers[i] = &bInstr->getOperand(i+2); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + + unsigned t1 = F->getRegInfo().createVirtualRegister(RC); + MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + unsigned t2 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2); + // add 4 to displacement. getImm verifies it's immediate. + for (int i=0; i <= lastAddrIndx-1; ++i) + (*MIB).addOperand(*argOpers[i]); + MachineOperand newOp3 = MachineOperand::CreateImm(argOpers[3]->getImm()+4); + (*MIB).addOperand(newOp3); + + // t3/4 are defined later, at the bottom of the loop + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + unsigned t4 = F->getRegInfo().createVirtualRegister(RC); + BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg()) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); + BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg()) + .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); + + unsigned tt1 = F->getRegInfo().createVirtualRegister(RC); + unsigned tt2 = F->getRegInfo().createVirtualRegister(RC); + if (invSrc) { + MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1); + MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2); + } else { + tt1 = t1; + tt2 = t2; + } + + assert((argOpers[4]->isRegister() || argOpers[4]->isImmediate()) && + "invalid operand"); + unsigned t5 = F->getRegInfo().createVirtualRegister(RC); + unsigned t6 = F->getRegInfo().createVirtualRegister(RC); + if (argOpers[4]->isRegister()) + MIB = BuildMI(newMBB, TII->get(regOpcL), t5); + else + MIB = BuildMI(newMBB, TII->get(immOpcL), t5); + MIB.addReg(tt1); + (*MIB).addOperand(*argOpers[4]); + assert(argOpers[5]->isRegister() == argOpers[4]->isRegister()); + assert(argOpers[5]->isImmediate() == argOpers[4]->isImmediate()); + if (argOpers[5]->isRegister()) + MIB = BuildMI(newMBB, TII->get(regOpcH), t6); + else + MIB = BuildMI(newMBB, TII->get(immOpcH), t6); + MIB.addReg(tt2); + (*MIB).addOperand(*argOpers[5]); + + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX); + MIB.addReg(t1); + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX); + MIB.addReg(t5); + MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX); + MIB.addReg(t6); + + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); + (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + + MIB = BuildMI(newMBB, TII->get(copyOpc), t3); + MIB.addReg(X86::EAX); + MIB = BuildMI(newMBB, TII->get(copyOpc), t4); + MIB.addReg(X86::EDX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + return nextMBB; +} + // private utility function MachineBasicBlock * X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, @@ -6633,6 +6885,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::NOT8r, X86::AL, X86::GR8RegisterClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + // This group is for 64-bit host. case X86::ATOMAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, @@ -6665,6 +6918,40 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); case X86::ATOMUMAX64: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + + // This group does 64-bit operations on a 32-bit host. + case X86::ATOMAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + false); + case X86::ATOMOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::OR32rr, X86::OR32rr, + X86::OR32ri, X86::OR32ri, + false); + case X86::ATOMXOR6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::XOR32rr, X86::XOR32rr, + X86::XOR32ri, X86::XOR32ri, + false); + case X86::ATOMNAND6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::AND32rr, X86::AND32rr, + X86::AND32ri, X86::AND32ri, + true); + // FIXME carry + case X86::ATOMADD6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::ADD32rr, X86::ADC32rr, + X86::ADD32ri, X86::ADC32ri, + false); + // FIXME carry + case X86::ATOMSUB6432: + return EmitAtomicBit6432WithCustomInserter(MI, BB, + X86::SUB32rr, X86::SBB32rr, + X86::SUB32ri, X86::SBB32ri, + false); } } @@ -6780,8 +7067,8 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, /// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd. static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget, - const TargetLowering &TLI) { + const X86Subtarget *Subtarget, + const TargetLowering &TLI) { unsigned NumOps = N->getNumOperands(); // Ignore single operand BUILD_VECTOR. @@ -6817,7 +7104,11 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, if (LD->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); - return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr()); + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1)); + return ResNode; } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. @@ -7078,6 +7369,7 @@ LowerXConstraint(MVT ConstraintVT) const { /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, + bool hasMemory, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -7092,6 +7384,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } return; + case 'J': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (C->getZExtValue() <= 63) { + Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + break; + } + } + return; case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { @@ -7131,14 +7431,11 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } if (GA) { - // If addressing this global requires a load (e.g. in PIC mode), we can't - // match. - if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(), - false)) - return; - - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - Offset); + if (hasMemory) + Op = LowerGlobalAddress(GA->getGlobal(), DAG); + else + Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + Offset); Result = Op; break; } @@ -7152,7 +7449,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, + Ops, DAG); } std::vector X86TargetLowering::