SDValue V2);
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
- switch (TM.getSubtarget<X86Subtarget>().TargetType) {
- default: llvm_unreachable("unknown subtarget type");
- case X86Subtarget::isDarwin:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_MachoTargetObjectFile();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
+ if (is64Bit) return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- case X86Subtarget::isELF:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_ELFTargetObjectFile(TM);
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
+ if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- case X86Subtarget::isMingw:
- case X86Subtarget::isCygwin:
- case X86Subtarget::isWindows:
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
return new TargetLoweringObjectFileCOFF();
- }
+ }
+ llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- if (!Subtarget->hasSSE2())
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false);
+
addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
setOperationAction(ISD::ADD, MVT::v8i8, Legal);
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
}
}
EVT VT = SVT;
// Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector()) {
+ if (!VT.is128BitVector())
continue;
- }
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
- return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue ValToCopy = Outs[i].Val;
+ SDValue ValToCopy = OutVals[i];
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
- ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
}
}
report_fatal_error("SSE register return with SSE disabled");
}
+ SDValue Val;
+
// If this is a call to a function that returns an fp value on the floating
- // point stack, but where we prefer to use the value in xmm registers, copy
- // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::ST0 ||
- VA.getLocReg() == X86::ST1) &&
- isScalarFPTypeInSSEReg(VA.getValVT())) {
- CopyVT = MVT::f80;
- }
+ // point stack, we must guarantee the the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpGET_ST0 instructions
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ bool isST0 = VA.getLocReg() == X86::ST0;
+ unsigned Opc = 0;
+ if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
+ if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
+ if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Ops, 2), 1);
+ Val = Chain.getValue(0);
- SDValue Val;
- if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
Val = Chain.getValue(0);
}
InFlag = Chain.getValue(2);
-
- if (CopyVT != VA.getValVT()) {
- // Round the F80 the right size, which also moves to the appropriate xmm
- // register.
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
- }
-
InVals.push_back(Val);
}
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
if (isVarArg) {
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
- true, false));
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, Ins, DAG);
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
}
}
- if (Is64Bit && isVarArg) {
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
- // FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
FPDiff, dl);
}
- bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- WasGlobalOrExternal = true;
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
OpFlags = X86II::MO_DARWIN_STUB;
}
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing any
- // arguments.
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
if (isVarArg && !Outs.empty())
return false;
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
- // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
- // RDI, R8, R9, R11.
- if (!isa<GlobalAddressSDNode>(Callee) &&
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ !isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
- unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
unsigned NumInRegs = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- if (VA.isRegLoc()) {
- if (++NumInRegs == Limit)
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == 3)
return false;
+ break;
}
}
}
}
FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- ) const {
- return X86::createFastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- );
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return X86::createFastISel(funcInfo);
}
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- false, false);
+ false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- EVT MaskEltVT = MaskVT.getVectorElementType();
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- if (Op.getValueType() == MVT::v2f32)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(0))));
-
- if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+
+ if (Op.getValueType() == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
-
- SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
getPointerTy(),
GA->getOffset(), OpFlag);
-
- DebugLoc DL = Op.getDebugLoc();
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC32, the address is actually $g + Offset.
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
FudgePtr, PseudoSourceValue::getConstantPool(),
0, MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
+ default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
case X86::COND_O: case X86::COND_NO:
NeedOF = true;
break;
- default: break;
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
- if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
- unsigned Opcode = 0;
- unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
- case ISD::ADD:
- // Due to an isel shortcoming, be conservative if this add is
- // likely to be selected as part of a load-modify-store
- // instruction. When the root node in a match is a store, isel
- // doesn't know how to remap non-chain non-flag uses of other
- // nodes in the match, such as the ADD in this case. This leads
- // to the ADD being left around and reselected, with the result
- // being two adds in the output. Alas, even if none our users
- // are stores, that doesn't prove we're O.K. Ergo, if we have
- // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
- // A better fix seems to require climbing the DAG back to the
- // root, and it doesn't seem to be worth the effort.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
- goto default_case;
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
}
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::AND: {
- // If the primary and result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
- bool NonFlagUse = false;
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- unsigned UOpNo = UI.getOperandNo();
- if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
- // Look pass truncate.
- UOpNo = User->use_begin().getOperandNo();
- User = *User->use_begin();
- }
- if (User->getOpcode() != ISD::BRCOND &&
- User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
- NonFlagUse = true;
- break;
- }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
+ }
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
}
- if (!NonFlagUse)
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
break;
+ }
}
+
+ if (!NonFlagUse)
+ break;
+ }
// FALL THROUGH
- case ISD::SUB:
- case ISD::OR:
- case ISD::XOR:
- // Due to the ISEL shortcoming noted above, be conservative if this op is
- // likely to be selected as part of a load-modify-store instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() == ISD::STORE)
- goto default_case;
- // Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
- case ISD::SUB: Opcode = X86ISD::SUB; break;
- case ISD::OR: Opcode = X86ISD::OR; break;
- case ISD::XOR: Opcode = X86ISD::XOR; break;
- case ISD::AND: Opcode = X86ISD::AND; break;
- default: llvm_unreachable("unexpected operator!");
- }
- NumOperands = 2;
- break;
- case X86ISD::ADD:
- case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::OR:
- case X86ISD::XOR:
- case X86ISD::AND:
- return SDValue(Op.getNode(), 1);
- default:
- default_case:
- break;
- }
- if (Opcode != 0) {
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i != NumOperands; ++i)
- Ops.push_back(Op.getOperand(i));
- SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
- DAG.ReplaceAllUsesWith(Op, New);
- return SDValue(New.getNode(), 1);
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
}
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
}
- // Otherwise just emit a CMP with 0, which is the TEST pattern.
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, Op.getValueType()));
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
SDValue Flag;
- EVT IntPtr = getPointerTy();
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
Store = DAG.getStore(Op.getOperand(0), dl,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, SV, 4, false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
false, false, 0);
MemOps.push_back(Store);
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
- SDValue Chain = Op.getOperand(0);
- SDValue SrcPtr = Op.getOperand(1);
- SDValue SrcSV = Op.getOperand(2);
report_fatal_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- report_fatal_error("Nest register in use - reduce number of inreg parameters!");
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
}
}
break;
return Sum;
}
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->hasSSE2())
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if(!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else {
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+ }
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
unsigned immOpc,
unsigned LoadOpc,
unsigned CXchgOpc,
- unsigned copyOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
newMBB->addSuccessor(newMBB);
// Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
DebugLoc dl = bInstr->getDebugLoc();
MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = bInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &bInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MIB.addReg(tt);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(EAXreg);
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
const TargetRegisterClass *RC = X86::GR32RegisterClass;
const unsigned LoadOpc = X86::MOV32rm;
- const unsigned copyOpc = X86::MOV32rr;
const unsigned NotOpc = X86::NOT32r;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
DebugLoc dl = bInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
// There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
"unexpected number of operands");
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
// We use some of the operands multiple times, so conservatively just
}
// x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
MIB.addReg(t2);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
MIB.addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
MIB.addReg(t6);
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
MIB.addReg(X86::EDX);
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors of thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(mInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
DebugLoc dl = mInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = mInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &mInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
else
MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
(*MIB).setMemRefs(mInstr->memoperands_begin(),
mInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
+ mInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
- MachineFunction *F = BB->getParent();
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
- F->DeleteMachineInstr(MI);
+ MI->eraseFromParent();
return BB;
}
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
- // Set up the CFG.
- // Move any original successors of MBB to the end block.
- EndMBB->transferSuccessors(MBB);
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
.addMemOperand(MMO);
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
-
- BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
-
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
-
- // Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const MachineFunction *MF = BB->getParent();
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
BitVector ReservedRegs = TRI->getReservedRegs(*MF);
- const MachineInstr *Term = BB->getFirstTerminator();
- for (unsigned I = 0, E = Term->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = Term->getOperand(I);
- if (!MO.isReg() || MO.isKill() || MO.isDead()) continue;
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
unsigned Reg = MO.getReg();
if (Reg != X86::EFLAGS) continue;
copy0MBB->addLiveIn(Reg);
sinkMBB->addLiveIn(Reg);
}
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineFunction *F = BB->getParent();
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
- BuildMI(BB, DL, TII->get(X86::CALLpcrel32))
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("_alloca")
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
assert(MI->getOperand(3).isGlobal() && "This should be a global");
if (Subtarget->is64Bit()) {
- MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(X86::MOV64rm), X86::RDI)
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(BB, DL, TII->get(X86::CALL64m));
- addDirectMem(MIB, X86::RDI).addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
- MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(X86::MOV32rm), X86::EAX)
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(BB, DL, TII->get(X86::CALL32m));
- addDirectMem(MIB, X86::EAX).addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
} else {
- MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(X86::MOV32rm), X86::EAX)
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(BB, DL, TII->get(X86::CALL32m));
- addDirectMem(MIB, X86::EAX).addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
} else {
AM.Disp = Op.getImm();
}
- addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
- .addReg(MI->getOperand(X86AddrNumOperands).getReg());
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMNAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass, true);
case X86::ATOMMIN32:
case X86::ATOMAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMXOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMNAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass, true);
case X86::ATOMMIN16:
case X86::ATOMAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMXOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMNAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMXOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMNAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass, true);
case X86::ATOMMIN64:
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
+ 0, false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ OffsetVal, StackPtr);
// Load the scalar.
- SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
- NULL, 0, false, false, 0);
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, NULL, 0, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
// so don't worry about this.
// Verify this is a simple bswap.
- if (CI->getNumOperands() != 2 ||
- CI->getType() != CI->getOperand(1)->getType() ||
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
Module *M = CI->getParent()->getParent()->getParent();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
CI->replaceAllUsesWith(Op);
// In any sort of PIC mode addresses need to be computed at runtime by
// adding in a register or some sort of table lookup. These can't
// be used as immediates.
- if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC() ||
- Subtarget->isPICStyleRIPRel())
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
return;
// If we are in non-pic codegen mode, we allow the address of a global (with
getTargetMachine())))
return;
- Result = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
break;
}
}