#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
-#include "X86MCTargetExpr.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+ // FIXME: This produces lots of inefficiencies in isel since
+ // we then need notice that most of our operands have been implicitly
+ // converted to v2i64.
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
EVT VT = SVT;
if (!VT.is128BitVector()) {
continue;
}
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
}
/// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
-/// determining it.
+/// and store operations as a result of memset, memcpy, and memmove lowering.
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there
+/// isn't a need to check it against alignment requirement, probably because
+/// the source does not need to be loaded. It returns EVT::Other if
+/// SelectionDAG should be responsible for determining it.
EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ (DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16)) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (SafeToUseFP && Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (SafeToUseFP &&
+ Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasSSE2())
+ return MVT::f64;
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
MCContext &Ctx) const {
const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(MF->getFunctionNumber())+"$pb");
}
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
- return X86MCTargetExpr::Create(MBB->getSymbol(Ctx),
- X86MCTargetExpr::GOTOFF, Ctx);
+ return MCSymbolRefExpr::Create(MBB->getSymbol(),
+ MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
-
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
if (isTailCall) {
// Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
OpFlags);
}
- if (isTailCall && !WasGlobalOrExternal) {
- // Force the address into a (call preserved) caller-saved register since
- // tailcall must happen after callee-saved registers are poped.
- // FIXME: Give it a special register class that contains caller-saved
- // register instead?
- unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX;
- Chain = DAG.getCopyToReg(Chain, dl,
- DAG.getRegister(TCReg, getPointerTy()),
- Callee,InFlag);
- Callee = DAG.getRegister(TCReg, getPointerTy());
- }
-
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDValue, 8> Ops;
if (RVLocs[i].isRegLoc())
MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
-
- assert(((Callee.getOpcode() == ISD::Register &&
- (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
- cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
- Callee.getOpcode() == ISD::TargetExternalSymbol ||
- Callee.getOpcode() == ISD::TargetGlobalAddress) &&
- "Expecting a global address, external symbol, or scratch register");
-
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
}
X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) &&
// Look for obvious safe cases to perform tail call optimization that does not
// requite ABI changes. This is what gcc calls sibcall.
- // Do not tail call optimize vararg calls for now.
- if (isVarArg)
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing any
+ // arguments.
+ if (isVarArg && !Outs.empty())
return false;
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+ // Therefore if it's not used by the call it is not safe to optimize this into
+ // a sibcall.
+ bool Unused = false;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ if (!Ins[i].Used) {
+ Unused = true;
+ break;
+ }
+ }
+ if (Unused) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CalleeCC, false, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+ }
+ }
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
- if (!isInt32(Offset))
+ if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
return SDValue();
}
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &dl, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ }
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 2) {
- // If we have SSE 4.1, Expand into a number of inserts unless the number of
- // values to be inserted is equal to the number of elements, in which case
- // use the unpack code below in the hopes of matching the consecutive elts
- // load merge pattern for shuffles.
- // FIXME: We could probably just check that here directly.
- if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
- getSubtarget()->hasSSE41()) {
+ if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // For SSE 4.1, use inserts into undef.
+ if (getSubtarget()->hasSSE41()) {
V[0] = DAG.getUNDEF(VT);
for (unsigned i = 0; i < NumElems; ++i)
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
Op.getOperand(i), DAG.getIntPtrConstant(i));
return V[0];
}
- // Expand into a number of unpckl*.
+
+ // Otherwise, expand into a number of unpckl*
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
}
return V[0];
}
-
return SDValue();
}
N2C && N2C->isNullValue() &&
RHSC && RHSC->isNullValue()) {
SDValue CmpOp0 = Cmp.getOperand(0);
- Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(),
+ Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
return EmitLoweredSelect(MI, BB, EM);
case X86::FP32_TO_INT16_IN_MEM:
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
- EVT EltVT, LoadSDNode *&LDBase,
- unsigned &LastLoadedElt,
- SelectionDAG &DAG, MachineFrameInfo *MFI,
- const TargetLowering &TLI) {
- LDBase = NULL;
- LastLoadedElt = -1U;
- for (unsigned i = 0; i < NumElems; ++i) {
- if (N->getMaskElt(i) < 0) {
- if (!LDBase)
- return false;
- continue;
- }
-
- SDValue Elt = DAG.getShuffleScalarElt(N, i);
- if (!Elt.getNode() ||
- (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
- return false;
- if (!LDBase) {
- if (Elt.getNode()->getOpcode() == ISD::UNDEF)
- return false;
- LDBase = cast<LoadSDNode>(Elt.getNode());
- LastLoadedElt = i;
- continue;
- }
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
-
- LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
- return false;
- LastLoadedElt = i;
- }
- return true;
-}
-
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order. In the case of v2i64, it will see if it can rewrite the
-/// shuffle to be an appropriate build vector so it can take advantage of
-// performBuildVectorCombine.
+/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return SDValue();
- // Try to combine a vector_shuffle into a 128-bit load.
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- LoadSDNode *LD = NULL;
- unsigned LastLoadedElt;
- if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG,
- MFI, TLI))
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformShuffleCombine - Detect vector gather/scatter index generation
+/// and convert it from being a bunch of shuffles and extracts to a simple
+/// store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ SDValue InputVector = N->getOperand(0);
+
+ // Only operate on vectors of 4 elements, where the alternative shuffling
+ // gets to be more expensive.
+ if (InputVector.getValueType() != MVT::v4i32)
return SDValue();
- if (LastLoadedElt == NumElems - 1) {
- if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1) {
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+ // single use which is a sign-extend or zero-extend, and all elements are
+ // used.
+ SmallVector<SDNode *, 4> Uses;
+ unsigned ExtractedElements = 0;
+ for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+ UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != InputVector.getResNo())
+ return SDValue();
+
+ SDNode *Extract = *UI;
+ if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ if (Extract->getValueType(0) != MVT::i32)
+ return SDValue();
+ if (!Extract->hasOneUse())
+ return SDValue();
+ if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+ Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+ return SDValue();
+
+ // Record which element was extracted.
+ ExtractedElements |=
+ 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+ Uses.push_back(Extract);
+ }
+
+ // If not all the elements were used, this may not be worthwhile.
+ if (ExtractedElements != 15)
+ return SDValue();
+
+ // Ok, we've now decided to do the transformation.
+ DebugLoc dl = InputVector.getDebugLoc();
+
+ // Store the value to a temporary stack slot.
+ SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
+ false, false, 0);
+
+ // Replace each use (extract) with a load of the appropriate element.
+ for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+ UE = Uses.end(); UI != UE; ++UI) {
+ SDNode *Extract = *UI;
+
+ // Compute the element's address.
+ SDValue Idx = Extract->getOperand(1);
+ unsigned EltSize =
+ InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+ uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+
+ // Load the scalar.
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
+ NULL, 0, false, false, 0);
+
+ // Replace the exact with the load.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
}
+
+ // The replacement was made in place; don't return anything.
return SDValue();
}
switch (N->getOpcode()) {
default: break;
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
AsmPieces[2] == "${0:w}" &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
AsmPieces.clear();
- SplitString(IA->getConstraintString().substr(5), AsmPieces, ",");
+ const std::string &Constraints = IA->getConstraintString();
+ SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
std::sort(AsmPieces.begin(), AsmPieces.end());
if (AsmPieces.size() == 4 &&
AsmPieces[0] == "~{cc}" &&