#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX"));
+
// Forward declarations.
static SDValue getMOVLMask(unsigned NumElems, SelectionDAG &DAG);
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
- setSetCCResultContents(ZeroOrOneSetCCResult);
+ setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(SchedulingForRegPressure);
setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, X86::GR64RegisterClass);
- setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
- setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+ // SETOEQ and SETUNE require checking two conditions.
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
} else {
- if (X86ScalarSSEf64)
+ if (X86ScalarSSEf64) {
+ // We have an impenetrably clever algorithm for ui64->double only.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
- else
+ } else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
}
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
// Expand certain atomics
- setOperationAction(ISD::ATOMIC_CMP_SWAP_8 , MVT::i8, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP_16, MVT::i16, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP_32, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP_64, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_8, MVT::i8, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_16, MVT::i16, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_32, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB_64, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+
+ if (!Subtarget->is64Bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+ }
// Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion.
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
+ bool ignored;
APFloat TmpFlt(+0.0);
- TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
+ TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
addLegalFPImmediate(TmpFlt); // FLD0
TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt); // FLD0/FCHS
APFloat TmpFlt2(+1.0);
- TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
+ TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+ &ignored);
addLegalFPImmediate(TmpFlt2); // FLD1
TmpFlt2.changeSign();
addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
- // First set operation action for all vector types to expand. Then we
- // will selectively turn on ones that can be effectively codegen'd.
+ // First set operation action for all vector types to either promote
+ // (for widening) or expand (for scalarization). Then we will selectively
+ // turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
}
- if (Subtarget->hasMMX()) {
+ if (!DisableMMX && Subtarget->hasMMX()) {
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
+
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
+ setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
+ setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
+ setOperationAction(ISD::SELECT, MVT::v1i64, Custom);
}
if (Subtarget->hasSSE1()) {
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
if (Subtarget->hasSSE41()) {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- setOperationAction(ISD::MUL, MVT::v2i64, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // Add/Sub/Mul with overflow operations are custom lowered.
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::UMULO, MVT::i64, Custom);
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::BUILD_VECTOR);
MVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
bool isSrcConst, bool isSrcStr) const {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+ // linux. This is because the stack realignment code can't handle certain
+ // cases like PR2962. This should be removed when PR2962 is fixed.
+ if (Subtarget->getStackAlignment() >= 16) {
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
+ return MVT::v4i32;
+ if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
+ return MVT::v4f32;
+ }
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
// Merge everything together with a MERGE_VALUES node.
ResultVals.push_back(Chain);
- return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0],
- ResultVals.size()).getNode();
+ return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(), &ResultVals[0],
+ ResultVals.size()).getNode();
}
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
- return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
- ArgValues.size()).getValue(Op.getResNo());
+ return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
+ &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
}
SDValue
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
// Load return adress for tail calls.
// non-JIT mode.
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
+ G->getOffset());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
if (IsTailCall) {
Ops.push_back(Chain);
- Ops.push_back(DAG.getIntPtrConstant(NumBytes));
- Ops.push_back(DAG.getIntPtrConstant(0));
+ Ops.push_back(DAG.getIntPtrConstant(NumBytes, true));
+ Ops.push_back(DAG.getIntPtrConstant(0, true));
if (InFlag.getNode())
Ops.push_back(InFlag);
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
// Returns a flag for retval copy to use.
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(NumBytes),
- DAG.getIntPtrConstant(NumBytesForCalleeToPush),
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+ true),
InFlag);
InFlag = Chain.getValue(1);
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *,
MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am) {
-
- return X86::createFastISel(mf, mmo, vm, bm, am);
+ DenseMap<const AllocaInst *, int> &am
+#ifndef NDEBUG
+ , SmallSet<Instruction*, 8> &cil
+#endif
+ ) {
+ return X86::createFastISel(mf, mmo, vm, bm, am
+#ifndef NDEBUG
+ , cil
+#endif
+ );
}
}
switch (SetCCOpcode) {
- default: break;
+ default: assert(0 && "Invalid integer condition!");
case ISD::SETEQ: X86CC = X86::COND_E; break;
case ISD::SETGT: X86CC = X86::COND_G; break;
case ISD::SETGE: X86CC = X86::COND_GE; break;
case ISD::SETULE: X86CC = X86::COND_BE; break;
case ISD::SETUGE: X86CC = X86::COND_AE; break;
}
- } else {
- // First determine if it requires or is profitable to flip the operands.
- bool Flip = false;
- switch (SetCCOpcode) {
- default: break;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- Flip = true;
- break;
- }
+ return true;
+ }
+
+ // First determine if it is required or is profitable to flip the operands.
- // If LHS is a foldable load, but RHS is not, flip the condition.
- if (!Flip &&
- (ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
- !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
- SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
- Flip = true;
- }
- if (Flip)
- std::swap(LHS, RHS);
-
- // On a floating point condition, the flags are set as follows:
- // ZF PF CF op
- // 0 | 0 | 0 | X > Y
- // 0 | 0 | 1 | X < Y
- // 1 | 0 | 0 | X == Y
- // 1 | 1 | 1 | unordered
- switch (SetCCOpcode) {
- default: break;
- case ISD::SETUEQ:
- case ISD::SETEQ:
- X86CC = X86::COND_E;
- break;
- case ISD::SETOLT: // flipped
- case ISD::SETOGT:
- case ISD::SETGT:
- X86CC = X86::COND_A;
- break;
- case ISD::SETOLE: // flipped
- case ISD::SETOGE:
- case ISD::SETGE:
- X86CC = X86::COND_AE;
- break;
- case ISD::SETUGT: // flipped
- case ISD::SETULT:
- case ISD::SETLT:
- X86CC = X86::COND_B;
- break;
- case ISD::SETUGE: // flipped
- case ISD::SETULE:
- case ISD::SETLE:
- X86CC = X86::COND_BE;
- break;
- case ISD::SETONE:
- case ISD::SETNE:
- X86CC = X86::COND_NE;
- break;
- case ISD::SETUO:
- X86CC = X86::COND_P;
- break;
- case ISD::SETO:
- X86CC = X86::COND_NP;
- break;
- }
+ // If LHS is a foldable load, but RHS is not, flip the condition.
+ if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
+ !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
+ SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+ std::swap(LHS, RHS);
}
- return X86CC != X86::COND_INVALID;
+ switch (SetCCOpcode) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ std::swap(LHS, RHS);
+ break;
+ }
+
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (SetCCOpcode) {
+ default: return false;
+ case ISD::SETUEQ:
+ case ISD::SETEQ: X86CC = X86::COND_E; return true;
+ case ISD::SETOLT: // flipped
+ case ISD::SETOGT:
+ case ISD::SETGT: X86CC = X86::COND_A; return true;
+ case ISD::SETOLE: // flipped
+ case ISD::SETOGE:
+ case ISD::SETGE: X86CC = X86::COND_AE; return true;
+ case ISD::SETUGT: // flipped
+ case ISD::SETULT:
+ case ISD::SETLT: X86CC = X86::COND_B; return true;
+ case ISD::SETUGE: // flipped
+ case ISD::SETULE:
+ case ISD::SETLE: X86CC = X86::COND_BE; return true;
+ case ISD::SETONE:
+ case ISD::SETNE: X86CC = X86::COND_NE; return true;
+ case ISD::SETUO: X86CC = X86::COND_P; return true;
+ case ISD::SETO: X86CC = X86::COND_NP; return true;
+ }
}
/// hasFPCMov - is there a floating point cmov for the specific X86 condition
return cast<ConstantSDNode>(ElementBase)->getZExtValue() < NumElems;
}
+/// getSplatMaskEltNo - Given a splat mask, return the index to the element
+/// we want to splat.
+static SDValue getSplatMaskEltNo(SDNode *N) {
+ assert(isSplatMask(N) && "Not a splat mask");
+ unsigned NumElems = N->getNumOperands();
+ SDValue ElementBase;
+ unsigned i = 0;
+ for (; i != NumElems; ++i) {
+ SDValue Elt = N->getOperand(i);
+ if (isa<ConstantSDNode>(Elt))
+ return Elt;
+ }
+ assert(0 && " No splat value found!");
+ return SDValue();
+}
+
+
/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
/// a splat of a single element and it's a 2 or 4 element mask.
bool X86::isSplatMask(SDNode *N) {
return Op;
SDValue V1 = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
- unsigned NumElems = Mask.getNumOperands();
+ unsigned MaskNumElems = Mask.getNumOperands();
+ unsigned NumElems = MaskNumElems;
// Special handling of v4f32 -> v4i32.
if (VT != MVT::v4f32) {
- Mask = getUnpacklMask(NumElems, DAG);
+ // Find which element we want to splat.
+ SDNode* EltNoNode = getSplatMaskEltNo(Mask.getNode()).getNode();
+ unsigned EltNo = cast<ConstantSDNode>(EltNoNode)->getZExtValue();
+ // unpack elements to the correct location
while (NumElems > 4) {
+ if (EltNo < NumElems/2) {
+ Mask = getUnpacklMask(MaskNumElems, DAG);
+ } else {
+ Mask = getUnpackhMask(MaskNumElems, DAG);
+ EltNo -= NumElems/2;
+ }
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
- Mask = getZeroVector(MVT::v4i32, true, DAG);
+ SDValue Cst = DAG.getConstant(EltNo, MVT::i32);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
}
V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
+ V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1FromV1;
} else {
+ V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
++V2FromV2;
}
// Canonicalize movddup shuffles.
if (V2IsUndef && Subtarget->hasSSE2() &&
+ VT.getSizeInBits() == 128 &&
X86::isMOVDDUPMask(PermMask.getNode()))
return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
} else if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
- // result has a single use which is a store or a bitcast to i32.
+ // result has a single use which is a store or a bitcast to i32. And in
+ // the case of a store, it's not worth it if the index is a constant 0,
+ // because a MOVSSmr can be used instead, which is smaller and faster.
if (!Op.hasOneUse())
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
- if (User->getOpcode() != ISD::STORE &&
+ if ((User->getOpcode() != ISD::STORE ||
+ (isa<ConstantSDNode>(Op.getOperand(1)) &&
+ cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
(User->getOpcode() != ISD::BIT_CONVERT ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
+ int64_t Offset,
SelectionDAG &DAG) const {
- SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ bool ExtraLoadRequired =
+ Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+ // Create the TargetGlobalAddress node, folding in the constant
+ // offset if it is legal.
+ SDValue Result;
+ if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Offset = 0;
+ } else
+ Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
+
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
- !Subtarget->isPICStyleRIPRel()) {
+ if (IsPic && !Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
- if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
+ if (ExtraLoadRequired)
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, getPointerTy(), Result,
+ DAG.getConstant(Offset, getPointerTy()));
+
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- return LowerGlobalAddress(GV, DAG);
+ int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+ return LowerGlobalAddress(GV, Offset, DAG);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
return Result;
}
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ MVT SrcVT = Op.getOperand(0).getValueType();
+ assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!");
+
+ // We only handle SSE2 f64 target here; caller can handle the rest.
+ if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
+ return SDValue();
+
+ // This algorithm is not obvious. Here it is in C code, more or less:
+/*
+ double uint64_to_double( uint32_t hi, uint32_t lo )
+ {
+ static const __m128i exp = { 0x4330000045300000ULL, 0 };
+ static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+ // copy ints to xmm registers
+ __m128i xh = _mm_cvtsi32_si128( hi );
+ __m128i xl = _mm_cvtsi32_si128( lo );
+
+ // combine into low half of a single xmm register
+ __m128i x = _mm_unpacklo_epi32( xh, xl );
+ __m128d d;
+ double sd;
+
+ // merge in appropriate exponents to give the integer bits the
+ // right magnitude
+ x = _mm_unpacklo_epi32( x, exp );
+
+ // subtract away the biases to deal with the IEEE-754 double precision
+ // implicit 1
+ d = _mm_sub_pd( (__m128d) x, bias );
+
+ // All conversions up to here are exact. The correctly rounded result is
+ // calculated using the
+ // current rounding mode using the following horizontal add.
+ d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+ _mm_store_sd( &sd, d ); //since we are returning doubles in XMM, this
+ // store doesn't really need to be here (except maybe to zero the other
+ // double)
+ return sd;
+ }
+*/
+
+ // Build some magic constants.
+ std::vector<Constant*>CV0;
+ CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ CV0.push_back(ConstantInt::get(APInt(32, 0)));
+ Constant *C0 = ConstantVector::get(CV0);
+ SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
+
+ std::vector<Constant*>CV1;
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
+ CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
+ Constant *C1 = ConstantVector::get(CV1);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4);
+
+ SmallVector<SDValue, 4> MaskVec;
+ MaskVec.push_back(DAG.getConstant(0, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(4, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(1, MVT::i32));
+ MaskVec.push_back(DAG.getConstant(5, MVT::i32));
+ SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],
+ MaskVec.size());
+ SmallVector<SDValue, 4> MaskVec2;
+ MaskVec2.push_back(DAG.getConstant(1, MVT::i32));
+ MaskVec2.push_back(DAG.getConstant(0, MVT::i32));
+ SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec2[0],
+ MaskVec2.size());
+
+ SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(1)));
+ SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0)));
+ SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
+ XR1, XR2, UnpcklMask);
+ SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0,
+ PseudoSourceValue::getConstantPool(), 0, false, 16);
+ SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
+ Unpck1, CLod0, UnpcklMask);
+ SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2);
+ SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1,
+ PseudoSourceValue::getConstantPool(), 0, false, 16);
+ SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1);
+ // Add the halves; easiest way is to swap them into another reg first.
+ SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64,
+ Sub, Sub, ShufMask);
+ SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add,
+ DAG.getIntPtrConstant(0));
+}
+
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
}
-SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
- std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG);
- SDValue FIST = Vals.first, StackSlot = Vals.second;
- if (FIST.getNode() == 0) return 0;
-
- MVT VT = N->getValueType(0);
-
- // Return a load from the stack slot.
- SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0);
-
- // Use MERGE_VALUES to drop the chain result value and get a node with one
- // result. This requires turning off getMergeValues simplification, since
- // otherwise it will give us Res back.
- return DAG.getMergeValues(&Res, 1, false).getNode();
-}
-
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
unsigned X86CC;
- if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
- Op0, Op1, DAG)) {
- Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
- return DAG.getNode(X86ISD::SETCC, MVT::i8,
- DAG.getConstant(X86CC, MVT::i8), Cond);
- }
-
- assert(isFP && "Illegal integer SetCC!");
-
+ if (!translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
+ Op0, Op1, DAG))
+ assert(0 && "Illegal SetCC!");
+
+
Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
- switch (SetCCOpcode) {
- default: assert(false && "Illegal floating point SetCC!");
- case ISD::SETOEQ: { // !PF & ZF
- SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
- DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
- SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
- DAG.getConstant(X86::COND_E, MVT::i8), Cond);
- return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
- }
- case ISD::SETUNE: { // PF | !ZF
- SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
- DAG.getConstant(X86::COND_P, MVT::i8), Cond);
- SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
- DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
- return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
- }
- }
+ return DAG.getNode(X86ISD::SETCC, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond);
}
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(unsigned Opc) {
+ return Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI;
+}
+
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
- IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
+ IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
- if ((Opc == X86ISD::CMP ||
- Opc == X86ISD::COMI ||
- Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
+ if (isX86LogicalCmp(Opc) && !IllegalFPCMov) {
Cond = Cmp;
addTest = false;
}
return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
}
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+ Opc = Op.getOpcode();
+ if (Opc != ISD::OR && Opc != ISD::AND)
+ return false;
+ return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(0).hasOneUse() &&
+ Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+ Op.getOperand(1).hasOneUse());
+}
+
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
bool addTest = true;
SDValue Chain = Op.getOperand(0);
if (Cond.getOpcode() == ISD::SETCC)
Cond = LowerSETCC(Cond, DAG);
+ else if (Cond.getOpcode() == X86ISD::ADD ||
+ Cond.getOpcode() == X86ISD::SUB ||
+ Cond.getOpcode() == X86ISD::SMUL ||
+ Cond.getOpcode() == X86ISD::UMUL)
+ Cond = LowerXALUO(Cond, DAG);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- if (Opc == X86ISD::CMP ||
- Opc == X86ISD::COMI ||
- Opc == X86ISD::UCOMI) {
+ if (isX86LogicalCmp(Opc)) {
Cond = Cmp;
addTest = false;
+ } else {
+ switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+ default: break;
+ case X86::COND_O:
+ case X86::COND_C:
+ // These can only come from an arithmetic instruction with overflow, e.g.
+ // SADDO, UADDO.
+ Cond = Cond.getNode()->getOperand(1);
+ addTest = false;
+ break;
+ }
+ }
+ } else {
+ unsigned CondOpc;
+ if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+ SDValue Cmp = Cond.getOperand(0).getOperand(1);
+ unsigned Opc = Cmp.getOpcode();
+ if (CondOpc == ISD::OR) {
+ // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+ // two branches instead of an explicit OR instruction with a
+ // separate test.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Opc)) {
+ CC = Cond.getOperand(0).getOperand(0);
+ Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ CC = Cond.getOperand(1).getOperand(0);
+ Cond = Cmp;
+ addTest = false;
+ }
+ } else { // ISD::AND
+ // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+ // two branches instead of an explicit AND instruction with a
+ // separate test. However, we only do this if this block doesn't
+ // have a fall-through edge, because this requires an explicit
+ // jmp when the condition is false.
+ if (Cmp == Cond.getOperand(1).getOperand(1) &&
+ isX86LogicalCmp(Opc) &&
+ Op.getNode()->hasOneUse()) {
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ SDValue User = SDValue(*Op.getNode()->use_begin(), 0);
+ // Look for an unconditional branch following this conditional branch.
+ // We need this because we need to reverse the successors in order
+ // to implement FCMP_OEQ.
+ if (User.getOpcode() == ISD::BR) {
+ SDValue FalseBB = User.getOperand(1);
+ SDValue NewBR =
+ DAG.UpdateNodeOperands(User, User.getOperand(0), Dest);
+ assert(NewBR == User);
+ Dest = FalseBB;
+
+ Chain = DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
+ Chain, Dest, CC, Cmp);
+ X86::CondCode CCode =
+ (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ CC = DAG.getConstant(CCode, MVT::i8);
+ Cond = Cmp;
+ addTest = false;
+ }
+ }
+ }
}
}
Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
}
return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
- Chain, Op.getOperand(2), CC, Cond);
+ Chain, Dest, CC, Cond);
}
MVT IntPtr = getPointerTy();
MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
Flag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain,
- DAG.getIntPtrConstant(0),
- DAG.getIntPtrConstant(0),
+ DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true),
Flag);
Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
SDValue
X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- const Value *DstSV, uint64_t DstSVOff) {
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ const Value *DstSV,
+ uint64_t DstSVOff) {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- /// If not DWORD aligned or size is more than the threshold, call the library.
- /// The libc version is likely to be faster for these cases. It can use the
- /// address value and run time information about the CPU.
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
if ((Align & 3) != 0 ||
!ConstantSize ||
ConstantSize->getZExtValue() >
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- if (const char *bzeroEntry =
- V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
MVT IntPtr = getPointerTy();
const Type *IntPtrTy = TD->getIntPtrType();
TargetLowering::ArgListTy Args;
return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
}
-/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
-SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue TheChain = N->getOperand(0);
- SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
- if (Subtarget->is64Bit()) {
- SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
- SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX,
- MVT::i64, rax.getValue(2));
- SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
- DAG.getConstant(32, MVT::i8));
- SDValue Ops[] = {
- DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, 2).getNode();
- }
-
- SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
- SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX,
- MVT::i32, eax.getValue(2));
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { eax, edx };
- Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2);
-
- // Use a MERGE_VALUES to return the value and chain.
- Ops[1] = edx.getValue(1);
- return DAG.getMergeValues(Ops, 2).getNode();
-}
-
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
- // Depths > 0 not supported yet!
- if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
- return SDValue();
-
- SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
- return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
- DAG.getIntPtrConstant(TD->getPointerSize()));
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ MVT VT = Op.getValueType();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
}
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
return Op;
}
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+
+ // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+ // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+ // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+ // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+ // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+ //
+ // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+ // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+ // return AloBlo + AloBhi + AhiBlo;
+
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ A, DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+ B, DAG.getConstant(32, MVT::i32));
+ SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, B);
+ SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ A, Bhi);
+ SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+ Ahi, B);
+ AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AloBhi, DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+ AhiBlo, DAG.getConstant(32, MVT::i32));
+ SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
+ Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
+ return Res;
+}
+
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+ // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+ // looks for this combo and may remove the "setcc" instruction if the "setcc"
+ // has only one use.
+ SDNode *N = Op.getNode();
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ unsigned BaseOp = 0;
+ unsigned Cond = 0;
+
+ switch (Op.getOpcode()) {
+ default: assert(0 && "Unknown ovf instruction!");
+ case ISD::SADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UADDO:
+ BaseOp = X86ISD::ADD;
+ Cond = X86::COND_C;
+ break;
+ case ISD::SSUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_O;
+ break;
+ case ISD::USUBO:
+ BaseOp = X86ISD::SUB;
+ Cond = X86::COND_C;
+ break;
+ case ISD::SMULO:
+ BaseOp = X86ISD::SMUL;
+ Cond = X86::COND_O;
+ break;
+ case ISD::UMULO:
+ BaseOp = X86ISD::UMUL;
+ Cond = X86::COND_C;
+ break;
+ }
+
+ // Also sets EFLAGS.
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+ SDValue Sum = DAG.getNode(BaseOp, VTs, LHS, RHS);
+
+ SDValue SetCC =
+ DAG.getNode(X86ISD::SETCC, N->getValueType(1),
+ DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+ return Sum;
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) {
MVT T = Op.getValueType();
unsigned Reg = 0;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
case MVT::i64:
- if (Subtarget->is64Bit()) {
- Reg = X86::RAX; size = 8;
- } else //Should go away when LowerType stuff lands
- return SDValue(ExpandATOMIC_CMP_SWAP(Op.getNode(), DAG), 0);
+ assert(Subtarget->is64Bit() && "Node not type legal!");
+ Reg = X86::RAX; size = 8;
break;
- };
+ }
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
return cpOut;
}
-SDNode* X86TargetLowering::ExpandATOMIC_CMP_SWAP(SDNode* Op,
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
SelectionDAG &DAG) {
- MVT T = Op->getValueType(0);
- assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
- SDValue cpInL, cpInH;
- cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2),
- DAG.getConstant(0, MVT::i32));
- cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(2),
- DAG.getConstant(1, MVT::i32));
- cpInL = DAG.getCopyToReg(Op->getOperand(0), X86::EAX,
- cpInL, SDValue());
- cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX,
- cpInH, cpInL.getValue(1));
- SDValue swapInL, swapInH;
- swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3),
- DAG.getConstant(0, MVT::i32));
- swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op->getOperand(3),
- DAG.getConstant(1, MVT::i32));
- swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX,
- swapInL, cpInH.getValue(1));
- swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX,
- swapInH, swapInL.getValue(1));
- SDValue Ops[] = { swapInH.getValue(0),
- Op->getOperand(1),
- swapInH.getValue(1) };
+ assert(Subtarget->is64Bit() && "Result not type legalized?");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3);
- SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32,
- Result.getValue(1));
- SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32,
- cpOutL.getValue(2));
- SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
- SDValue ResultVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2);
- SDValue Vals[2] = { ResultVal, cpOutH.getValue(1) };
- return DAG.getMergeValues(Vals, 2).getNode();
+ SDValue TheChain = Op.getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
+ SDValue rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
+ SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX, MVT::i64,
+ rax.getValue(2));
+ SDValue Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
+ DAG.getConstant(32, MVT::i8));
+ SDValue Ops[] = {
+ DAG.getNode(ISD::OR, MVT::i64, rax, Tmp),
+ rdx.getValue(1)
+ };
+ return DAG.getMergeValues(Ops, 2);
}
-SDNode* X86TargetLowering::ExpandATOMIC_LOAD_SUB(SDNode* Op,
- SelectionDAG &DAG) {
- MVT T = Op->getValueType(0);
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ MVT T = Node->getValueType(0);
SDValue negOp = DAG.getNode(ISD::SUB, T,
- DAG.getConstant(0, T), Op->getOperand(2));
- return DAG.getAtomic((T==MVT::i8 ? ISD::ATOMIC_LOAD_ADD_8:
- T==MVT::i16 ? ISD::ATOMIC_LOAD_ADD_16:
- T==MVT::i32 ? ISD::ATOMIC_LOAD_ADD_32:
- T==MVT::i64 ? ISD::ATOMIC_LOAD_ADD_64: 0),
- Op->getOperand(0), Op->getOperand(1), negOp,
- cast<AtomicSDNode>(Op)->getSrcValue(),
- cast<AtomicSDNode>(Op)->getAlignment()).getNode();
+ DAG.getConstant(0, T), Node->getOperand(2));
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), negOp,
+ cast<AtomicSDNode>(Node)->getSrcValue(),
+ cast<AtomicSDNode>(Node)->getAlignment());
}
/// LowerOperation - Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Should not custom lower this!");
- case ISD::ATOMIC_CMP_SWAP_8: return LowerCMP_SWAP(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP_16: return LowerCMP_SWAP(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP_32: return LowerCMP_SWAP(Op,DAG);
- case ISD::ATOMIC_CMP_SWAP_64: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
+ case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShift(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FABS: return LowerFABS(Op, DAG);
case ISD::FNEG: return LowerFNEG(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
-
- // FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands.
- case ISD::READCYCLECOUNTER:
- return SDValue(ExpandREADCYCLECOUNTER(Op.getNode(), DAG), 0);
- }
+ case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO: return LowerXALUO(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG, unsigned NewOp) {
+ MVT T = Node->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue In1 = Node->getOperand(1);
+ SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(0));
+ SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
+ Node->getOperand(2), DAG.getIntPtrConstant(1));
+ // This is a generalized SDNode, not an AtomicSDNode, so it doesn't
+ // have a MemOperand. Pass the info through as a normal operand.
+ SDValue LSI = DAG.getMemOperand(cast<MemSDNode>(Node)->getMemOperand());
+ SDValue Ops[] = { Chain, In1, In2L, In2H, LSI };
+ SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+ SDValue Result = DAG.getNode(NewOp, Tys, Ops, 5);
+ SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2));
+ Results.push_back(Result.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
-SDNode *X86TargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) {
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) {
switch (N->getOpcode()) {
- default: assert(0 && "Should not custom lower this!");
- case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG);
- case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG);
- case ISD::ATOMIC_CMP_SWAP_64: return ExpandATOMIC_CMP_SWAP(N, DAG);
- case ISD::ATOMIC_LOAD_SUB_8: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_16: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_32: return ExpandATOMIC_LOAD_SUB(N,DAG);
- case ISD::ATOMIC_LOAD_SUB_64: return ExpandATOMIC_LOAD_SUB(N,DAG);
+ default:
+ assert(false && "Do not know how to custom type legalize this operation!");
+ return;
+ case ISD::FP_TO_SINT: {
+ std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG);
+ SDValue FIST = Vals.first, StackSlot = Vals.second;
+ if (FIST.getNode() != 0) {
+ MVT VT = N->getValueType(0);
+ // Return a load from the stack slot.
+ Results.push_back(DAG.getLoad(VT, FIST, StackSlot, NULL, 0));
+ }
+ return;
+ }
+ case ISD::READCYCLECOUNTER: {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue TheChain = N->getOperand(0);
+ SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
+ SDValue eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
+ SDValue edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX, MVT::i32,
+ eax.getValue(2));
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { eax, edx };
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2));
+ Results.push_back(edx.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT T = N->getValueType(0);
+ assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+ SDValue cpInL, cpInH;
+ cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(2),
+ DAG.getConstant(0, MVT::i32));
+ cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(2),
+ DAG.getConstant(1, MVT::i32));
+ cpInL = DAG.getCopyToReg(N->getOperand(0), X86::EAX, cpInL, SDValue());
+ cpInH = DAG.getCopyToReg(cpInL.getValue(0), X86::EDX, cpInH,
+ cpInL.getValue(1));
+ SDValue swapInL, swapInH;
+ swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(3),
+ DAG.getConstant(0, MVT::i32));
+ swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(3),
+ DAG.getConstant(1, MVT::i32));
+ swapInL = DAG.getCopyToReg(cpInH.getValue(0), X86::EBX, swapInL,
+ cpInH.getValue(1));
+ swapInH = DAG.getCopyToReg(swapInL.getValue(0), X86::ECX, swapInH,
+ swapInL.getValue(1));
+ SDValue Ops[] = { swapInH.getValue(0),
+ N->getOperand(1),
+ swapInH.getValue(1) };
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, Tys, Ops, 3);
+ SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), X86::EAX, MVT::i32,
+ Result.getValue(1));
+ SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), X86::EDX, MVT::i32,
+ cpOutL.getValue(2));
+ SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OpsF, 2));
+ Results.push_back(cpOutH.getValue(1));
+ return;
+ }
+ case ISD::ATOMIC_LOAD_ADD:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_AND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_NAND:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_OR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_SUB:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+ return;
+ case ISD::ATOMIC_LOAD_XOR:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+ return;
+ case ISD::ATOMIC_SWAP:
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ return;
}
}
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::TAILCALL: return "X86ISD::TAILCALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
+ case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
+ case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
+ case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
+ case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
+ case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
+ case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::ADD: return "X86ISD::ADD";
+ case X86ISD::SUB: return "X86ISD::SUB";
+ case X86ISD::SMUL: return "X86ISD::SMUL";
+ case X86ISD::UMUL: return "X86ISD::UMUL";
}
}
// We can only fold this if we don't need an extra load.
if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false))
return false;
+ // If BaseGV requires a register, we cannot also have a BaseReg.
+ if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) &&
+ AM.HasBaseReg)
+ return false;
// X86-64 only supports addr of globals in small code model.
if (Subtarget->is64Bit()) {
tt = t1;
unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
- assert((argOpers[valArgIndx]->isRegister() ||
- argOpers[valArgIndx]->isImmediate()) &&
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
"invalid operand");
- if (argOpers[valArgIndx]->isRegister())
+ if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, TII->get(regOpc), t2);
else
MIB = BuildMI(newMBB, TII->get(immOpc), t2);
return nextMBB;
}
+// private utility function: 64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+ MachineBasicBlock *MBB,
+ unsigned regOpcL,
+ unsigned regOpcH,
+ unsigned immOpcL,
+ unsigned immOpcH,
+ bool invSrc) {
+ // For the atomic bitwise operator, we generate
+ // thisMBB (instructions are in pairs, except cmpxchg8b)
+ // ld t1,t2 = [bitinstr.addr]
+ // newMBB:
+ // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+ // op t5, t6 <- out1, out2, [bitinstr.val]
+ // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
+ // mov ECX, EBX <- t5, t6
+ // mov EAX, EDX <- t1, t2
+ // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
+ // mov t3, t4 <- EAX, EDX
+ // bz newMBB
+ // result in out1, out2
+ // fallthrough -->nextMBB
+
+ const TargetRegisterClass *RC = X86::GR32RegisterClass;
+ const unsigned LoadOpc = X86::MOV32rm;
+ const unsigned copyOpc = X86::MOV32rr;
+ const unsigned NotOpc = X86::NOT32r;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineFunction::iterator MBBIter = MBB;
+ ++MBBIter;
+
+ /// First build the CFG
+ MachineFunction *F = MBB->getParent();
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(MBBIter, newMBB);
+ F->insert(MBBIter, nextMBB);
+
+ // Move all successors to thisMBB to nextMBB
+ nextMBB->transferSuccessors(thisMBB);
+
+ // Update thisMBB to fall through to newMBB
+ thisMBB->addSuccessor(newMBB);
+
+ // newMBB jumps to itself and fall through to nextMBB
+ newMBB->addSuccessor(nextMBB);
+ newMBB->addSuccessor(newMBB);
+
+ // Insert instructions into newMBB based on incoming instruction
+ // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+ assert(bInstr->getNumOperands() < 18 && "unexpected number of operands");
+ MachineOperand& dest1Oper = bInstr->getOperand(0);
+ MachineOperand& dest2Oper = bInstr->getOperand(1);
+ MachineOperand* argOpers[6];
+ for (int i=0; i < 6; ++i)
+ argOpers[i] = &bInstr->getOperand(i+2);
+
+ // x86 address has 4 operands: base, index, scale, and displacement
+ int lastAddrIndx = 3; // [0,3]
+
+ unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder MIB = BuildMI(thisMBB, TII->get(LoadOpc), t1);
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+ MIB = BuildMI(thisMBB, TII->get(LoadOpc), t2);
+ // add 4 to displacement.
+ for (int i=0; i <= lastAddrIndx-1; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+ MachineOperand newOp3 = *(argOpers[3]);
+ if (newOp3.isImm())
+ newOp3.setImm(newOp3.getImm()+4);
+ else
+ newOp3.setOffset(newOp3.getOffset()+4);
+ (*MIB).addOperand(newOp3);
+
+ // t3/4 are defined later, at the bottom of the loop
+ unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+ BuildMI(newMBB, TII->get(X86::PHI), dest1Oper.getReg())
+ .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+ BuildMI(newMBB, TII->get(X86::PHI), dest2Oper.getReg())
+ .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+ unsigned tt1 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned tt2 = F->getRegInfo().createVirtualRegister(RC);
+ if (invSrc) {
+ MIB = BuildMI(newMBB, TII->get(NotOpc), tt1).addReg(t1);
+ MIB = BuildMI(newMBB, TII->get(NotOpc), tt2).addReg(t2);
+ } else {
+ tt1 = t1;
+ tt2 = t2;
+ }
+
+ assert((argOpers[4]->isReg() || argOpers[4]->isImm()) &&
+ "invalid operand");
+ unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+ unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+ if (argOpers[4]->isReg())
+ MIB = BuildMI(newMBB, TII->get(regOpcL), t5);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpcL), t5);
+ if (regOpcL != X86::MOV32rr)
+ MIB.addReg(tt1);
+ (*MIB).addOperand(*argOpers[4]);
+ assert(argOpers[5]->isReg() == argOpers[4]->isReg());
+ assert(argOpers[5]->isImm() == argOpers[4]->isImm());
+ if (argOpers[5]->isReg())
+ MIB = BuildMI(newMBB, TII->get(regOpcH), t6);
+ else
+ MIB = BuildMI(newMBB, TII->get(immOpcH), t6);
+ if (regOpcH != X86::MOV32rr)
+ MIB.addReg(tt2);
+ (*MIB).addOperand(*argOpers[5]);
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EAX);
+ MIB.addReg(t1);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EDX);
+ MIB.addReg(t2);
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::EBX);
+ MIB.addReg(t5);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), X86::ECX);
+ MIB.addReg(t6);
+
+ MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG8B));
+ for (int i=0; i <= lastAddrIndx; ++i)
+ (*MIB).addOperand(*argOpers[i]);
+
+ assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+ (*MIB).addMemOperand(*F, *bInstr->memoperands_begin());
+
+ MIB = BuildMI(newMBB, TII->get(copyOpc), t3);
+ MIB.addReg(X86::EAX);
+ MIB = BuildMI(newMBB, TII->get(copyOpc), t4);
+ MIB.addReg(X86::EDX);
+
+ // insert branch
+ BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB);
+
+ F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ return nextMBB;
+}
+
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
(*MIB).addOperand(*argOpers[i]);
// We only support register and immediate values
- assert((argOpers[valArgIndx]->isRegister() ||
- argOpers[valArgIndx]->isImmediate()) &&
+ assert((argOpers[valArgIndx]->isReg() ||
+ argOpers[valArgIndx]->isImm()) &&
"invalid operand");
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
- if (argOpers[valArgIndx]->isRegister())
+ if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
else
MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
+ case X86::CMOV_V1I64:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
- if (Op.isRegister()) {
+ if (Op.isReg()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
- if (Op.isImmediate())
+ if (Op.isImm())
AM.Scale = Op.getImm();
Op = MI->getOperand(2);
- if (Op.isImmediate())
+ if (Op.isImm())
AM.IndexReg = Op.getImm();
Op = MI->getOperand(3);
- if (Op.isGlobalAddress()) {
+ if (Op.isGlobal()) {
AM.GV = Op.getGlobal();
} else {
AM.Disp = Op.getImm();
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ // This group is for 64-bit host.
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
case X86::ATOMUMAX64:
return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+ // This group does 64-bit operations on a 32-bit host.
+ case X86::ATOMAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ false);
+ case X86::ATOMOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::OR32rr, X86::OR32rr,
+ X86::OR32ri, X86::OR32ri,
+ false);
+ case X86::ATOMXOR6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::XOR32rr, X86::XOR32rr,
+ X86::XOR32ri, X86::XOR32ri,
+ false);
+ case X86::ATOMNAND6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::AND32rr, X86::AND32rr,
+ X86::AND32ri, X86::AND32ri,
+ true);
+ case X86::ATOMADD6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::ADD32rr, X86::ADC32rr,
+ X86::ADD32ri, X86::ADC32ri,
+ false);
+ case X86::ATOMSUB6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::SUB32rr, X86::SBB32rr,
+ X86::SUB32ri, X86::SBB32ri,
+ false);
+ case X86::ATOMSWAP6432:
+ return EmitAtomicBit6432WithCustomInserter(MI, BB,
+ X86::MOV32rr, X86::MOV32rr,
+ X86::MOV32ri, X86::MOV32ri,
+ false);
}
}
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+ Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;
}
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'A':
+ return C_Register;
case 'f':
case 'r':
case 'R':
if (GA) {
if (hasMemory)
- Op = LowerGlobalAddress(GA->getGlobal(), DAG);
+ Op = LowerGlobalAddress(GA->getGlobal(), Offset, DAG);
else
Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
Offset);
// FIXME: not handling fp-stack yet!
switch (Constraint[0]) { // GCC X86 Constraint Letters
default: break; // Unknown constraint letter
- case 'A': // EAX/EDX
- if (VT == MVT::i32 || VT == MVT::i64)
- return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
- break;
case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode)
case 'Q': // Q_REGS
if (VT == MVT::i32)
case 'r': // GENERAL_REGS
case 'R': // LEGACY_REGS
case 'l': // INDEX_REGS
- if (VT == MVT::i64 && Subtarget->is64Bit())
- return std::make_pair(0U, X86::GR64RegisterClass);
- if (VT == MVT::i32)
- return std::make_pair(0U, X86::GR32RegisterClass);
- else if (VT == MVT::i16)
- return std::make_pair(0U, X86::GR16RegisterClass);
- else if (VT == MVT::i8)
+ if (VT == MVT::i8)
return std::make_pair(0U, X86::GR8RegisterClass);
- break;
+ if (VT == MVT::i16)
+ return std::make_pair(0U, X86::GR16RegisterClass);
+ if (VT == MVT::i32 || !Subtarget->is64Bit())
+ return std::make_pair(0U, X86::GR32RegisterClass);
+ return std::make_pair(0U, X86::GR64RegisterClass);
case 'f': // FP Stack registers.
// If SSE is enabled for this VT, use f80 to ensure the isel moves the
// value to the correct fpstack register class.
case 'y': // MMX_REGS if MMX allowed.
if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass);
- break;
case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break;
// FALL THROUGH.
Res.first = X86::ST0;
Res.second = X86::RFP80RegisterClass;
}
-
+ // 'A' means EAX + EDX.
+ if (Constraint == "A") {
+ Res.first = X86::EAX;
+ Res.second = X86::GRADRegisterClass;
+ }
return Res;
}
return Res;
}
+
+//===----------------------------------------------------------------------===//
+// X86 Widen vector type
+//===----------------------------------------------------------------------===//
+
+/// getWidenVectorType: given a vector type, returns the type to widen
+/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
+/// If there is no vector type that we want to widen to, returns MVT::Other
+/// When and where to widen is target dependent based on the cost of
+/// scalarizing vs using the wider vector type.
+
+MVT X86TargetLowering::getWidenVectorType(MVT VT) {
+ assert(VT.isVector());
+ if (isTypeLegal(VT))
+ return VT;
+
+ // TODO: In computeRegisterProperty, we can compute the list of legal vector
+ // type based on element type. This would speed up our search (though
+ // it may not be worth it since the size of the list is relatively
+ // small).
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+
+ // On X86, it make sense to widen any vector wider than 1
+ if (NElts <= 1)
+ return MVT::Other;
+
+ for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
+ nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType)nVT;
+
+ if (isTypeLegal(SVT) &&
+ SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts)
+ return SVT;
+ }
+ return MVT::Other;
+}