#include "X86TargetMachine.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
int Base_FrameIndex;
unsigned Scale;
- SDValue IndexReg;
+ SDValue IndexReg;
int32_t Disp;
SDValue Segment;
const GlobalValue *GV;
bool hasSymbolicDisplacement() const {
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
}
-
+
bool hasBaseOrIndexReg() const {
return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
-
+
/// isRIPRelative - Return true if this addressing mode is already RIP
/// relative.
bool isRIPRelative() const {
return RegNode->getReg() == X86::RIP;
return false;
}
-
+
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
if (Base_Reg.getNode() != 0)
- Base_Reg.getNode()->dump();
+ Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
- dbgs() << "nul";
+ dbgs() << "nul";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
+#endif
};
}
private:
SDNode *Select(SDNode *N);
+ SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
- SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
-
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
-
+
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
- inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
+ inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
AM.Align, AM.Disp, AM.SymbolFlags);
- else if (AM.ES)
+ else if (AM.ES) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
- else if (AM.JT != -1)
+ } else if (AM.JT != -1) {
+ assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
- else if (AM.BlockAddr)
- Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
- true, AM.SymbolFlags);
+ } else if (AM.BlockAddr)
+ Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
+ AM.SymbolFlags);
else
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
-
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
++NumLoadMoved;
continue;
}
-
+
// Lower fpround and fpextend nodes that target the FP stack to be store and
// load to the stack. This is a gross hack. We would like to simply mark
// these as being illegal, but when we do that, legalize produces these when
// FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
-
+
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
if (N->getConstantOperandVal(1))
continue;
}
-
+
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
MemVT = SrcIsSSE ? SrcVT : DstVT;
-
+
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
DebugLoc dl = N->getDebugLoc();
-
+
// FIXME: optimize the case where the src/dest is a load or store?
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
// To avoid invalidating 'I', back it up to the convert node.
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-
+
// Now that we did that, the node is dead. Increment the iterator to the
// next node to process, then delete N.
++I;
CurDAG->DeleteNode(N);
- }
+ }
}
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
- Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+ Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
TII->get(CallOp)).addExternalSymbol("__main");
}
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
-
+
// load gs:0 -> GS segment register.
// load fs:0 -> FS segment register.
//
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
- Subtarget->isTargetELF())
+ Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
}
-
+
return true;
}
// Handle X86-64 rip-relative addresses. We check this before checking direct
// folding because RIP is preferable to non-RIP accesses.
- if (Subtarget->is64Bit() &&
+ if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
// they cannot be folded into immediate fields.
// FIXME: This can be improved for kernel and other models?
- (M == CodeModel::Small || M == CodeModel::Kernel) &&
- // Base and index reg must be 0 in order to use %rip as base and lowering
- // must allow RIP.
- !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+ (M == CodeModel::Small || M == CodeModel::Kernel)) {
+ // Base and index reg must be 0 in order to use %rip as base.
+ if (AM.hasBaseOrIndexReg())
+ return true;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
- } else {
- AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
- }
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ X86ISelAddressMode Backup = AM;
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.SymbolFlags = BA->getTargetFlags();
+ if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+ AM = Backup;
+ return true;
+ }
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
if (N.getOpcode() == X86ISD::WrapperRIP)
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
}
// Handle the case when globals fit in our immediate field: This is true for
- // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
- // mode, this results in a non-RIP-relative computation.
+ // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit
+ // mode, this only applies to a non-RIP-relative computation.
if (!Subtarget->is64Bit() ||
- ((M == CodeModel::Small || M == CodeModel::Kernel) &&
- TM.getRelocationModel() == Reloc::Static)) {
+ M == CodeModel::Small || M == CodeModel::Kernel) {
+ assert(N.getOpcode() != X86ISD::WrapperRIP &&
+ "RIP-relative addressing already handled");
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
- } else {
- AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
- AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
- }
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
+ AM.BlockAddr = BA->getBlockAddress();
+ AM.Disp += BA->getOffset();
+ AM.SymbolFlags = BA->getTargetFlags();
+ } else
+ llvm_unreachable("Unhandled symbol reference node.");
return false;
}
APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
MaskLZ);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne);
+ DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
case ISD::SHL:
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
break;
-
+
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
- uint64_t Disp = AddVal->getSExtValue() << Val;
+ uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
if (!FoldOffsetIntoAddress(Disp, AM))
return false;
}
!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
-
+
// Try again after commuting the operands.
if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
!MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
AM = Backup;
}
break;
-
+
case ISD::AND: {
// Perform some heroic transforms on an and of a constant-count shift
// with a constant to enable use of the scaled offset field.
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
-
+
if (Parent &&
// This list of opcodes are all the nodes that have an "addr:$ptr" operand
// that are not a MemSDNode, and thus don't have proper addrspace info.
if (AddrSpace == 257)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
-
+
if (MatchAddress(N, AM))
return false;
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
// Check to see if the top elements are all zeros (or bitcast of zeros).
- N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
N.getOperand(0).getOperand(0).hasOneUse() &&
// If it isn't worth using an LEA, reject it.
if (Complexity <= 2)
return false;
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
} else {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
-
+
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
!IsProfitableToFold(N, P, P) ||
!IsLegalToFold(N, P, P, OptLevel))
return false;
-
+
return SelectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
SDValue In1 = Node->getOperand(1);
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
+
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
return ResNode;
}
-// FIXME: Figure out some way to unify this with the 'or' and other code
-// below.
-SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
- if (Node->hasAnyUseOfValue(0))
- return 0;
-
- // Optimize common patterns for __sync_add_and_fetch and
- // __sync_sub_and_fetch where the result is not used. This allows us
- // to use "lock" version of add, sub, inc, dec instructions.
- // FIXME: Do not use special instructions but instead add the "lock"
- // prefix to the target node somehow. The extra information will then be
- // transferred to machine instruction and it denotes the prefix.
- SDValue Chain = Node->getOperand(0);
- SDValue Ptr = Node->getOperand(1);
- SDValue Val = Node->getOperand(2);
- SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return 0;
-
- bool isInc = false, isDec = false, isSub = false, isCN = false;
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
- if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
- isCN = true;
- int64_t CNVal = CN->getSExtValue();
- if (CNVal == 1)
- isInc = true;
- else if (CNVal == -1)
- isDec = true;
- else if (CNVal >= 0)
- Val = CurDAG->getTargetConstant(CNVal, NVT);
- else {
- isSub = true;
- Val = CurDAG->getTargetConstant(-CNVal, NVT);
- }
- } else if (Val.hasOneUse() &&
- Val.getOpcode() == ISD::SUB &&
- X86::isZeroNode(Val.getOperand(0))) {
- isSub = true;
- Val = Val.getOperand(1);
- }
-
- DebugLoc dl = Node->getDebugLoc();
- unsigned Opc = 0;
- switch (NVT.getSimpleVT().SimpleTy) {
- default: return 0;
- case MVT::i8:
- if (isInc)
- Opc = X86::LOCK_INC8m;
- else if (isDec)
- Opc = X86::LOCK_DEC8m;
- else if (isSub) {
- if (isCN)
- Opc = X86::LOCK_SUB8mi;
- else
- Opc = X86::LOCK_SUB8mr;
- } else {
- if (isCN)
- Opc = X86::LOCK_ADD8mi;
- else
- Opc = X86::LOCK_ADD8mr;
- }
- break;
- case MVT::i16:
- if (isInc)
- Opc = X86::LOCK_INC16m;
- else if (isDec)
- Opc = X86::LOCK_DEC16m;
- else if (isSub) {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB16mi8;
- else
- Opc = X86::LOCK_SUB16mi;
- } else
- Opc = X86::LOCK_SUB16mr;
- } else {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD16mi8;
- else
- Opc = X86::LOCK_ADD16mi;
- } else
- Opc = X86::LOCK_ADD16mr;
- }
- break;
- case MVT::i32:
- if (isInc)
- Opc = X86::LOCK_INC32m;
- else if (isDec)
- Opc = X86::LOCK_DEC32m;
- else if (isSub) {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB32mi8;
- else
- Opc = X86::LOCK_SUB32mi;
- } else
- Opc = X86::LOCK_SUB32mr;
- } else {
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD32mi8;
- else
- Opc = X86::LOCK_ADD32mi;
- } else
- Opc = X86::LOCK_ADD32mr;
- }
- break;
- case MVT::i64:
- if (isInc)
- Opc = X86::LOCK_INC64m;
- else if (isDec)
- Opc = X86::LOCK_DEC64m;
- else if (isSub) {
- Opc = X86::LOCK_SUB64mr;
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_SUB64mi8;
- else if (i64immSExt32(Val.getNode()))
- Opc = X86::LOCK_SUB64mi32;
- }
- } else {
- Opc = X86::LOCK_ADD64mr;
- if (isCN) {
- if (immSext8(Val.getNode()))
- Opc = X86::LOCK_ADD64mi8;
- else if (i64immSExt32(Val.getNode()))
- Opc = X86::LOCK_ADD64mi32;
- }
- }
- break;
- }
-
- SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, NVT), 0);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- if (isInc || isDec) {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
- } else {
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
- }
-}
-
+/// Atomic opcode table
+///
enum AtomicOpc {
+ ADD,
+ SUB,
+ INC,
+ DEC,
OR,
AND,
XOR,
AtomicSzEnd
};
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+ {
+ X86::LOCK_ADD8mi,
+ X86::LOCK_ADD8mr,
+ X86::LOCK_ADD16mi8,
+ X86::LOCK_ADD16mi,
+ X86::LOCK_ADD16mr,
+ X86::LOCK_ADD32mi8,
+ X86::LOCK_ADD32mi,
+ X86::LOCK_ADD32mr,
+ X86::LOCK_ADD64mi8,
+ X86::LOCK_ADD64mi32,
+ X86::LOCK_ADD64mr,
+ },
+ {
+ X86::LOCK_SUB8mi,
+ X86::LOCK_SUB8mr,
+ X86::LOCK_SUB16mi8,
+ X86::LOCK_SUB16mi,
+ X86::LOCK_SUB16mr,
+ X86::LOCK_SUB32mi8,
+ X86::LOCK_SUB32mi,
+ X86::LOCK_SUB32mr,
+ X86::LOCK_SUB64mi8,
+ X86::LOCK_SUB64mi32,
+ X86::LOCK_SUB64mr,
+ },
+ {
+ 0,
+ X86::LOCK_INC8m,
+ 0,
+ 0,
+ X86::LOCK_INC16m,
+ 0,
+ 0,
+ X86::LOCK_INC32m,
+ 0,
+ 0,
+ X86::LOCK_INC64m,
+ },
+ {
+ 0,
+ X86::LOCK_DEC8m,
+ 0,
+ 0,
+ X86::LOCK_DEC16m,
+ 0,
+ 0,
+ X86::LOCK_DEC32m,
+ 0,
+ 0,
+ X86::LOCK_DEC64m,
+ },
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
X86::LOCK_OR32mr,
X86::LOCK_OR64mi8,
X86::LOCK_OR64mi32,
- X86::LOCK_OR64mr
+ X86::LOCK_OR64mr,
},
{
X86::LOCK_AND8mi,
X86::LOCK_AND32mr,
X86::LOCK_AND64mi8,
X86::LOCK_AND64mi32,
- X86::LOCK_AND64mr
+ X86::LOCK_AND64mr,
},
{
X86::LOCK_XOR8mi,
X86::LOCK_XOR32mr,
X86::LOCK_XOR64mi8,
X86::LOCK_XOR64mi32,
- X86::LOCK_XOR64mr
+ X86::LOCK_XOR64mr,
}
};
+// Return the target constant operand for atomic-load-op and do simple
+// translations, such as from atomic-load-add to lock-sub. The return value is
+// one of the following 3 cases:
+// + target-constant, the operand could be supported as a target constant.
+// + empty, the operand is not needed any more with the new op selected.
+// + non-empty, otherwise.
+static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
+ DebugLoc dl,
+ enum AtomicOpc &Op, EVT NVT,
+ SDValue Val) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
+ int64_t CNVal = CN->getSExtValue();
+ // Quit if not 32-bit imm.
+ if ((int32_t)CNVal != CNVal)
+ return Val;
+ // For atomic-load-add, we could do some optimizations.
+ if (Op == ADD) {
+ // Translate to INC/DEC if ADD by 1 or -1.
+ if ((CNVal == 1) || (CNVal == -1)) {
+ Op = (CNVal == 1) ? INC : DEC;
+ // No more constant operand after being translated into INC/DEC.
+ return SDValue();
+ }
+ // Translate to SUB if ADD by negative value.
+ if (CNVal < 0) {
+ Op = SUB;
+ CNVal = -CNVal;
+ }
+ }
+ return CurDAG->getTargetConstant(CNVal, NVT);
+ }
+
+ // If the value operand is single-used, try to optimize it.
+ if (Op == ADD && Val.hasOneUse()) {
+ // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
+ if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
+ Op = SUB;
+ return Val.getOperand(1);
+ }
+ // A special case for i16, which needs truncating as, in most cases, it's
+ // promoted to i32. We will translate
+ // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
+ if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
+ Val.getOperand(0).getOpcode() == ISD::SUB &&
+ X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
+ Op = SUB;
+ Val = Val.getOperand(0);
+ return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
+ Val.getOperand(1));
+ }
+ }
+
+ return Val;
+}
+
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
-
+
+ DebugLoc dl = Node->getDebugLoc();
+
// Optimize common patterns for __sync_or_and_fetch and similar arith
// operations where the result is not used. This allows us to use the "lock"
// version of the arithmetic instruction.
- // FIXME: Same as for 'add' and 'sub', try to merge those down here.
SDValue Chain = Node->getOperand(0);
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
// Which index into the table.
enum AtomicOpc Op;
switch (Node->getOpcode()) {
+ default:
+ return 0;
case ISD::ATOMIC_LOAD_OR:
Op = OR;
break;
case ISD::ATOMIC_LOAD_XOR:
Op = XOR;
break;
- default:
- return 0;
- }
-
- bool isCN = false;
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
- if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
- isCN = true;
- Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+ case ISD::ATOMIC_LOAD_ADD:
+ Op = ADD;
+ break;
}
+ Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
+ bool isUnOp = !Val.getNode();
+ bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
+
unsigned Opc = 0;
switch (NVT.getSimpleVT().SimpleTy) {
default: return 0;
}
break;
}
-
+
assert(Opc != 0 && "Invalid arith lock transform!");
- DebugLoc dl = Node->getDebugLoc();
+ SDValue Ret;
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
- SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+ if (isUnOp) {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ } else {
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
+ array_lengthof(Ops)), 0);
+ }
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
SDValue RetVals[] = { Undef, Ret };
return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
return true;
}
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
+ SDValue StoredVal, SelectionDAG *CurDAG,
+ LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+ // is the value stored the result of a DEC or INC?
+ if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+ // is the stored value result 0 of the load?
+ if (StoredVal.getResNo() != 0) return false;
+
+ // are there other uses of the loaded value than the inc or dec?
+ if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+ // is the store non-extending and non-indexed?
+ if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+ return false;
+
+ SDValue Load = StoredVal->getOperand(0);
+ // Is the stored value a non-extending and non-indexed load?
+ if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+ // Return LoadNode by reference.
+ LoadNode = cast<LoadSDNode>(Load);
+ // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+ EVT LdVT = LoadNode->getMemoryVT();
+ if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
+ LdVT != MVT::i8)
+ return false;
+
+ // Is store the only read of the loaded value?
+ if (!Load.hasOneUse())
+ return false;
+
+ // Is the address of the store the same as the load?
+ if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+ LoadNode->getOffset() != StoreNode->getOffset())
+ return false;
+
+ // Check if the chain is produced by the load or is a TokenFactor with
+ // the load output chain as an operand. Return InputChain by reference.
+ SDValue Chain = StoreNode->getChain();
+
+ bool ChainCheck = false;
+ if (Chain == Load.getValue(1)) {
+ ChainCheck = true;
+ InputChain = LoadNode->getChain();
+ } else if (Chain.getOpcode() == ISD::TokenFactor) {
+ SmallVector<SDValue, 4> ChainOps;
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+ SDValue Op = Chain.getOperand(i);
+ if (Op == Load.getValue(1)) {
+ ChainCheck = true;
+ continue;
+ }
+
+ // Make sure using Op as part of the chain would not cause a cycle here.
+ // In theory, we could check whether the chain node is a predecessor of
+ // the load. But that can be very expensive. Instead visit the uses and
+ // make sure they all have smaller node id than the load.
+ int LoadId = LoadNode->getNodeId();
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = UI->use_end(); UI != UE; ++UI) {
+ if (UI.getUse().getResNo() != 0)
+ continue;
+ if (UI->getNodeId() > LoadId)
+ return false;
+ }
+
+ ChainOps.push_back(Op);
+ }
+
+ if (ChainCheck)
+ // Make a new TokenFactor with all the other input chains except
+ // for the load.
+ InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+ MVT::Other, &ChainOps[0], ChainOps.size());
+ }
+ if (!ChainCheck)
+ return false;
+
+ return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+ if (Opc == X86ISD::DEC) {
+ if (LdVT == MVT::i64) return X86::DEC64m;
+ if (LdVT == MVT::i32) return X86::DEC32m;
+ if (LdVT == MVT::i16) return X86::DEC16m;
+ if (LdVT == MVT::i8) return X86::DEC8m;
+ } else {
+ assert(Opc == X86ISD::INC && "unrecognized opcode");
+ if (LdVT == MVT::i64) return X86::INC64m;
+ if (LdVT == MVT::i32) return X86::INC32m;
+ if (LdVT == MVT::i16) return X86::INC16m;
+ if (LdVT == MVT::i8) return X86::INC8m;
+ }
+ llvm_unreachable("unrecognized size for LdVT");
+}
+
+/// SelectGather - Customized ISel for GATHER operations.
+///
+SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+ // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
+ SDValue Chain = Node->getOperand(0);
+ SDValue VSrc = Node->getOperand(2);
+ SDValue Base = Node->getOperand(3);
+ SDValue VIdx = Node->getOperand(4);
+ SDValue VMask = Node->getOperand(5);
+ ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
+ if (!Scale)
+ return 0;
+
+ SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
+ MVT::Other);
+
+ // Memory Operands: Base, Scale, Index, Disp, Segment
+ SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue Segment = CurDAG->getRegister(0, MVT::i32);
+ const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
+ Disp, Segment, VMask, Chain};
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+ VTs, Ops, array_lengthof(Ops));
+ // Node has 2 outputs: VDst and MVT::Other.
+ // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
+ // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
+ // of ResNode.
+ ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
+ return ResNode;
+}
+
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-
+
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
switch (Opcode) {
default: break;
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: break;
+ case Intrinsic::x86_avx2_gather_d_pd:
+ case Intrinsic::x86_avx2_gather_d_pd_256:
+ case Intrinsic::x86_avx2_gather_q_pd:
+ case Intrinsic::x86_avx2_gather_q_pd_256:
+ case Intrinsic::x86_avx2_gather_d_ps:
+ case Intrinsic::x86_avx2_gather_d_ps_256:
+ case Intrinsic::x86_avx2_gather_q_ps:
+ case Intrinsic::x86_avx2_gather_q_ps_256:
+ case Intrinsic::x86_avx2_gather_d_q:
+ case Intrinsic::x86_avx2_gather_d_q_256:
+ case Intrinsic::x86_avx2_gather_q_q:
+ case Intrinsic::x86_avx2_gather_q_q_256:
+ case Intrinsic::x86_avx2_gather_d_d:
+ case Intrinsic::x86_avx2_gather_d_d_256:
+ case Intrinsic::x86_avx2_gather_q_d:
+ case Intrinsic::x86_avx2_gather_q_d_256: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic");
+ case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
+ case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
+ case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
+ case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
+ case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
+ case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
+ case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
+ case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
+ case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
+ case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
+ case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
+ case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
+ case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
+ }
+ SDNode *RetVal = SelectGather(Node, Opc);
+ if (RetVal)
+ // We already called ReplaceUses inside SelectGather.
+ return NULL;
+ break;
+ }
+ }
+ break;
+ }
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+
case X86ISD::ATOMOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMOR6432);
case X86ISD::ATOMXOR64_DAG:
- return SelectAtomic64(Node, X86::ATOMXOR6432);
case X86ISD::ATOMADD64_DAG:
- return SelectAtomic64(Node, X86::ATOMADD6432);
case X86ISD::ATOMSUB64_DAG:
- return SelectAtomic64(Node, X86::ATOMSUB6432);
case X86ISD::ATOMNAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMNAND6432);
case X86ISD::ATOMAND64_DAG:
- return SelectAtomic64(Node, X86::ATOMAND6432);
- case X86ISD::ATOMSWAP64_DAG:
- return SelectAtomic64(Node, X86::ATOMSWAP6432);
-
- case ISD::ATOMIC_LOAD_ADD: {
- SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+ case X86ISD::ATOMSWAP64_DAG: {
+ unsigned Opc;
+ switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
+ case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
+ case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
+ case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
+ case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
+ case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
+ case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
+ case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
+ }
+ SDNode *RetVal = SelectAtomic64(Node, Opc);
if (RetVal)
return RetVal;
break;
}
+
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR: {
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
if (RetVal)
return RetVal;
// Make sure that we don't change the operation by removing bits.
// This only matters for OR and XOR, AND is unaffected.
- if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
+ if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
break;
- unsigned ShlOp, Op = 0;
+ unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
ShlOp = X86::SHL32ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
ShlOp = X86::SHL64ri;
switch (Opcode) {
+ default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
-
+
unsigned LoReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
}
-
+
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
-
+
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
-
+
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
-
+
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
- N0, SDValue()).getValue(1);
+ N0, SDValue()).getValue(1);
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
+ LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
+
return NULL;
}
return NULL;
}
- case X86ISD::CMP: {
+ case X86ISD::CMP:
+ case X86ISD::SUB: {
+ // Sometimes a SUB is used to perform comparison.
+ if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+ // This node is not a CMP.
+ break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
// On x86-32, only the ABCD registers have 8-bit subregisters.
if (!Subtarget->is64Bit()) {
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
SDValue Reg = N0.getNode()->getOperand(0);
// Put the value in an ABCD register.
- TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *TRC;
switch (N0.getValueType().getSimpleVT().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
break;
}
case ISD::STORE: {
+ // Change a chain of {load; incr or dec; store} of the same value into
+ // a simple increment or decrement through memory of that value, if the
+ // uses of the modified value and its address are suitable.
// The DEC64m tablegen pattern is currently not able to match the case where
- // the EFLAGS on the original DEC are used.
- // we'll need to improve tablegen to allow flags to be transferred from a
+ // the EFLAGS on the original DEC are used. (This also applies to
+ // {INC,DEC}X{64,32,16,8}.)
+ // We'll need to improve tablegen to allow flags to be transferred from a
// node in the pattern to the result node. probably with a new keyword
// for example, we have this
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
// [(store (add (loadi64 addr:$dst), -1), addr:$dst),
// (transferrable EFLAGS)]>;
+
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
- SDValue Chain = StoreNode->getOperand(0);
SDValue StoredVal = StoreNode->getOperand(1);
- SDValue Address = StoreNode->getOperand(2);
- SDValue Undef = StoreNode->getOperand(3);
-
- if (StoreNode->getMemOperand()->getSize() != 8 ||
- Undef->getOpcode() != ISD::UNDEF ||
- Chain->getOpcode() != ISD::LOAD ||
- StoredVal->getOpcode() != X86ISD::DEC ||
- StoredVal.getResNo() != 0 ||
- StoredVal->getOperand(0).getNode() != Chain.getNode())
- break;
-
- //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
- if (StoreNode->isNonTemporal())
- break;
-
- LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
- if (LoadNode->getOperand(1) != Address ||
- LoadNode->getOperand(2) != Undef)
- break;
+ unsigned Opc = StoredVal->getOpcode();
- if (!ISD::isNormalLoad(LoadNode))
+ LoadSDNode *LoadNode = 0;
+ SDValue InputChain;
+ if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+ LoadNode, InputChain))
break;
- if (!ISD::isNormalStore(StoreNode))
- break;
-
- // check load chain has only one use (from the store)
- if (!Chain.hasOneUse())
- break;
-
- // Merge the input chains if they are not intra-pattern references.
- SDValue InputChain = LoadNode->getOperand(0);
-
SDValue Base, Scale, Index, Disp, Segment;
if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
- MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m,
+ EVT LdVT = LoadNode->getMemoryVT();
+ unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+ MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
MVT::i32, MVT::Other, Ops,
array_lengthof(Ops));
return Result;
}
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPESTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ SDValue N3 = Node->getOperand(3);
+ SDValue N4 = Node->getOperand(4);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+ X86::EAX, N1, SDValue()).getValue(1);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+ N3, InFlag).getValue(1);
+
+ SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
+ X86::PCMPESTRIrr;
+ InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
+
+ // FIXME: Custom handling because TableGen doesn't support multiple implicit
+ // defs in an instruction pattern
+ case X86ISD::PCMPISTRI: {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+
+ // Make sure last argument is a constant
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst)
+ break;
+
+ uint64_t Imm = Cst->getZExtValue();
+
+ SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
+ unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
+ X86::PCMPISTRIrr;
+ SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+ array_lengthof(Ops)), 0);
+
+ if (!SDValue(Node, 0).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::ECX, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 0), Result);
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::EFLAGS, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(SDValue(Node, 1), Result);
+ }
+
+ return NULL;
+ }
}
SDNode *ResNode = SelectCode(Node);
return true;
break;
}
-
+
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
+/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}