setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_8, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB_8 , MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD_ADD_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP_64, MVT::i64, Custom);
+ }
// Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
- setOperationAction(ISD::FLOG, MVT::f32, Expand);
- setOperationAction(ISD::FLOG, MVT::f64, Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
- setOperationAction(ISD::FLOG10, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
- setOperationAction(ISD::FEXP, MVT::f32, Expand);
- setOperationAction(ISD::FEXP, MVT::f64, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
// First set operation action for all vector types to expand. Then we
SDValue TargetAddress = TailCall.getOperand(1);
SDValue StackAdjustment = TailCall.getOperand(2);
assert(((TargetAddress.getOpcode() == ISD::Register &&
- (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
+ (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
- else if (CC == CallingConv::Fast && PerformTailCallOpt)
- return CC_X86_32_TailCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else
if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
// Note: The actual moving to ecx is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (G && !G->getGlobal()->hasHiddenVisibility() &&
+ if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
- unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
+ unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
Chain = DAG.getCopyToReg(Chain,
DAG.getRegister(Opc, getPointerTy()),
FastISel *
X86TargetLowering::createFastISel(MachineFunction &mf,
+ MachineModuleInfo *mmo,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *,
MachineBasicBlock *> &bm,
DenseMap<const AllocaInst *, int> &am) {
- return X86::createFastISel(mf, vm, bm, am);
+ return X86::createFastISel(mf, mmo, vm, bm, am);
}
return true;
}
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(SDNode *N) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR);
+
+ unsigned e = N->getNumOperands() / 2;
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(i), i))
+ return false;
+ for (unsigned i = 0; i < e; ++i)
+ if (!isUndefOrEqual(N->getOperand(e+i), i))
+ return false;
+ return true;
+}
+
/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
/// instructions.
/// is promoted to a vector. It also returns the LoadSDNode by reference if
/// required.
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
- if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
- N = N->getOperand(0).getNode();
- if (ISD::isNON_EXTLoad(N)) {
- if (LD)
- *LD = cast<LoadSDNode>(N);
- return true;
- }
- }
- return false;
+ if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+ return false;
+ N = N->getOperand(0).getNode();
+ if (!ISD::isNON_EXTLoad(N))
+ return false;
+ if (LD)
+ *LD = cast<LoadSDNode>(N);
+ return true;
}
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
+/// isVectorLoad - Returns true if the node is a vector load, a scalar
+/// load that's promoted to vector, or a load bitcasted.
+static bool isVectorLoad(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Expected a vector type");
+ if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR ||
+ Op.getOpcode() == ISD::BIT_CONVERT) {
+ return isa<LoadSDNode>(Op.getOperand(0));
+ }
+ return isa<LoadSDNode>(Op);
+}
+
+
+/// CanonicalizeMovddup - Cannonicalize movddup shuffle to v2f64.
+///
+static SDValue CanonicalizeMovddup(SDValue Op, SDValue V1, SDValue Mask,
+ SelectionDAG &DAG, bool HasSSE3) {
+ // If we have sse3 and shuffle has more than one use or input is a load, then
+ // use movddup. Otherwise, use movlhps.
+ bool UseMovddup = HasSSE3 && (!Op.hasOneUse() || isVectorLoad(V1));
+ MVT PVT = UseMovddup ? MVT::v2f64 : MVT::v4f32;
+ MVT VT = Op.getValueType();
+ if (VT == PVT)
+ return Op;
+ unsigned NumElems = PVT.getVectorNumElements();
+ if (NumElems == 2) {
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
+ } else {
+ assert(NumElems == 4);
+ SDValue Cst0 = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Cst1 = DAG.getTargetConstant(1, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst0, Cst1, Cst0, Cst1);
+ }
+
+ V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
+ SDValue Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1,
+ DAG.getNode(ISD::UNDEF, PVT), Mask);
+ return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
+}
+
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
else if (isIdentityMask(PermMask.getNode(), true))
return V2;
+ // Canonicalize movddup shuffles.
+ if (V2IsUndef && Subtarget->hasSSE2() &&
+ X86::isMOVDDUPMask(PermMask.getNode()))
+ return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
+
if (isSplatMask(PermMask.getNode())) {
if (isMMX || NumElems < 4) return Op;
// Promote it to a v4{if}32 splat.
}
SDValue
-X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
- GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
+ SelectionDAG &DAG) const {
SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
return Result;
}
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ return LowerGlobalAddress(GV, DAG);
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
- IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
SDValue
X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- const Value *DstSV, uint64_t DstSVOff) {
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV,
+ uint64_t DstSVOff) {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- /// If not DWORD aligned or size is more than the threshold, call the library.
- /// The libc version is likely to be faster for these cases. It can use the
- /// address value and run time information about the CPU.
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
if ((Align & 3) != 0 ||
!ConstantSize ||
ConstantSize->getZExtValue() >
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- if (const char *bzeroEntry =
- V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
MVT IntPtr = getPointerTy();
const Type *IntPtrTy = TD->getIntPtrType();
TargetLowering::ArgListTy Args;
Entry.Node = Size;
Args.push_back(Entry);
std::pair<SDValue,SDValue> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
- false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
- Args, DAG);
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
+ CallingConv::C, false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG);
return CallResult.second;
}
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
- // Depths > 0 not supported yet!
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
- return SDValue();
-
- SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
- return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
- DAG.getIntPtrConstant(TD->getPointerSize()));
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
}
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
// Check that ECX wasn't needed by an 'inreg' parameter.
const FunctionType *FTy = Func->getFunctionType();
- const PAListPtr &Attrs = Func->getParamAttrs();
+ const AttrListPtr &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.paramHasAttr(Idx, ParamAttr::InReg))
+ if (Attrs.paramHasAttr(Idx, Attribute::InReg))
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other,
- DAG.getEntryNode(), StackSlot);
+ DAG.getEntryNode(), StackSlot);
// Load FP Control Word from stack slot
SDValue CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0);
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
- Op.getOperand(1),
- Op.getOperand(3),
- DAG.getTargetConstant(size, MVT::i8),
- cpIn.getValue(1) };
+ Op.getOperand(1),
+ Op.getOperand(3),
+ DAG.getTargetConstant(size, MVT::i8),
+ cpIn.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, Tys, Ops, 5);
SDValue cpOut =
swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
- Op->getOperand(1),
- swapInH.getValue(1)};
+ Op->getOperand(1),
+ swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32,
return DAG.getMergeValues(Vals, 2).getNode();
}
-SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op,
- SelectionDAG &DAG) {
- MVT T = Op->getValueType(0);
+SDValue X86TargetLowering::LowerATOMIC_BINARY_64(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned NewOp) {
+ SDNode *Node = Op.getNode();
+ MVT T = Node->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ assert(Node->getOperand(2).getNode()->getOpcode()==ISD::BUILD_PAIR);
+ SDValue In2L = Node->getOperand(2).getNode()->getOperand(0);
+ SDValue In2H = Node->getOperand(2).getNode()->getOperand(1);
+ SDValue Ops[] = { Chain, In1, In2L, In2H };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result = DAG.getNode(NewOp, Tys, Ops, 4);
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2);
+ SDValue Vals[2] = { ResultVal, Result.getValue(2) };
+ return SDValue(DAG.getMergeValues(Vals, 2).getNode(), 0);
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ MVT T = Node->getValueType(0);
SDValue negOp = DAG.getNode(ISD::SUB, T,
- DAG.getConstant(0, T), Op->getOperand(2));
- return DAG.getAtomic((T==MVT::i8 ? ISD::ATOMIC_LOAD_ADD_8:
- T==MVT::i16 ? ISD::ATOMIC_LOAD_ADD_16:
- T==MVT::i32 ? ISD::ATOMIC_LOAD_ADD_32:
- T==MVT::i64 ? ISD::ATOMIC_LOAD_ADD_64: 0),
- Op->getOperand(0), Op->getOperand(1), negOp,
- cast<AtomicSDNode>(Op)->getSrcValue(),
- cast<AtomicSDNode>(Op)->getAlignment()).getNode();
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic((Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_8 ?
+ ISD::ATOMIC_LOAD_ADD_8 :
+ Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_16 ?
+ ISD::ATOMIC_LOAD_ADD_16 :
+ Op.getOpcode()==ISD::ATOMIC_LOAD_SUB_32 ?
+ ISD::ATOMIC_LOAD_ADD_32 :
+ ISD::ATOMIC_LOAD_ADD_64),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment());
}
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Should not custom lower this!");
- case ISD::ATOMIC_CMP_SWAP_8: return LowerCMP_SWAP(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP_16: return LowerCMP_SWAP(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP_32: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_CMP_SWAP_8:
+ case ISD::ATOMIC_CMP_SWAP_16:
+ case ISD::ATOMIC_CMP_SWAP_32:
case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB_8:
+ case ISD::ATOMIC_LOAD_SUB_16:
+ case ISD::ATOMIC_LOAD_SUB_32: return LowerLOAD_SUB(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB_64: return (Subtarget->is64Bit()) ?
+ LowerLOAD_SUB(Op,DAG) :
+ LowerATOMIC_BINARY_64(Op,DAG,
+ X86ISD::ATOMSUB64_DAG);
+ case ISD::ATOMIC_LOAD_AND_64: return LowerATOMIC_BINARY_64(Op,DAG,
+ X86ISD::ATOMAND64_DAG);
+ case ISD::ATOMIC_LOAD_OR_64: return LowerATOMIC_BINARY_64(Op, DAG,
+ X86ISD::ATOMOR64_DAG);
+ case ISD::ATOMIC_LOAD_XOR_64: return LowerATOMIC_BINARY_64(Op,DAG,
+ X86ISD::ATOMXOR64_DAG);
+ case ISD::ATOMIC_LOAD_NAND_64: return LowerATOMIC_BINARY_64(Op,DAG,
+ X86ISD::ATOMNAND64_DAG);
+ case ISD::ATOMIC_LOAD_ADD_64: return LowerATOMIC_BINARY_64(Op,DAG,
+ X86ISD::ATOMADD64_DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG);
- case ISD::ATOMIC_LOAD_SUB_8: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_16: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_32: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_64: return ExpandATOMIC_LOAD_SUB(N,DAG);
}
}
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VSHL: return "X86ISD::VSHL";
return nextMBB;
}
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned copyOpc = X86::MOV32rr;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < 18 && "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[6];
+ for (int i=0; i < 6; ++i)
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2);
+ // add 4 to displacement. getImm verifies it's immediate.
+ for (int i=0; i <= lastAddrIndx-1; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = MachineOperand::CreateImm(argOpers[3]->getImm()+4);
+ (*MIB).addOperand(newOp3);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1);
+ MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2);
+ } else {
+ tt1 = t1;
+ tt2 = t2;
+ }
+
+ assert((argOpers[4]->isRegister() || argOpers[4]->isImmediate()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[4]->isRegister())
+ MIB = BuildMI(newMBB, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpcL), t5);
+ MIB.addReg(tt1);
+ (*MIB).addOperand(*argOpers[4]);
+ assert(argOpers[5]->isRegister() == argOpers[4]->isRegister());
+ assert(argOpers[5]->isImmediate() == argOpers[4]->isImmediate());
+ if (argOpers[5]->isRegister())
+ MIB = BuildMI(newMBB, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpcH), t6);
+ MIB.addReg(tt2);
+ (*MIB).addOperand(*argOpers[5]);
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
case X86::ATOMUMAX64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ // FIXME carry
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ // FIXME carry
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
}
}
/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget,
- const TargetLowering &TLI) {
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
unsigned NumOps = N->getNumOperands();
// Ignore single operand BUILD_VECTOR.
if (LD->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- return DAG.getNode(X86ISD::VZEXT_LOAD, VT, LD->getChain(), LD->getBasePtr());
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, Tys, Ops, 2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Base, 1), ResNode.getValue(1));
+ return ResNode;
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
+ bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
}
}
return;
+ case 'J':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if (C->getZExtValue() <= 63) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
}
if (GA) {
- // If addressing this global requires a load (e.g. in PIC mode), we can't
- // match.
- if (Subtarget->GVRequiresExtraLoad(GA->getGlobal(), getTargetMachine(),
- false))
- return;
-
- Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
- Offset);
+ if (hasMemory)
+ Op = LowerGlobalAddress(GA->getGlobal(), DAG);
+ else
+ Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ Offset);
Result = Op;
break;
}
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+ Ops, DAG);
}
std::vector<unsigned> X86TargetLowering::