//
// The LLVM Compiler Infrastructure
//
-// This file was developed by the Evan Cheng and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
+#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/SSARegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
-
//===----------------------------------------------------------------------===//
// Pattern Matcher Implementation
//===----------------------------------------------------------------------===//
int FrameIndex;
} Base;
- bool isRIPRel; // RIP relative?
+ bool isRIPRel; // RIP as base?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot = true, unsigned Depth = 0);
+ bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
+ bool isRoot, unsigned Depth);
bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
- void InstructionSelectPreprocess(SelectionDAG &DAG);
+ void PreprocessForRMW(SelectionDAG &DAG);
+ void PreprocessForFPConvert(SelectionDAG &DAG);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
/// base register. Return the virtual register that holds this value.
SDNode *getGlobalBaseReg();
+ /// getTruncate - return an SDNode that implements a subreg based truncate
+ /// of the specified operand to the the specified value type.
+ SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
+
#ifndef NDEBUG
unsigned Indent;
#endif
Store.getOperand(2), Store.getOperand(3));
}
-/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
-/// selector to pick more load-modify-store instructions. This is a common
-/// case:
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// This is only run if not in -fast mode (aka -O0).
+/// This allows the instruction selector to pick more read-modify-write
+/// instructions. This is a common case:
///
/// [Load chain]
/// ^
/// \ /
/// \ /
/// [Store]
-void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
+void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) {
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I) {
if (!ISD::isNON_TRUNCStore(I))
RModW = true;
std::swap(N10, N11);
}
- RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ RModW = RModW && N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
case X86ISD::SHRD: {
SDOperand N10 = N1.getOperand(0);
if (ISD::isNON_EXTLoad(N10.Val))
- RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
+ RModW = N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() &&
(N10.getOperand(1) == N2) &&
(N10.Val->getValueType(0) == N1.getValueType());
if (RModW)
}
}
+
+/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
+/// nodes that target the FP stack to be store and load to the stack. This is a
+/// gross hack. We would like to simply mark these as being illegal, but when
+/// we do that, legalize produces these when it expands calls, then expands
+/// these in the same legalize pass. We would like dag combine to be able to
+/// hack on these between the call expansion and the node legalization. As such
+/// this pass basically does "really late" legalization of these inline with the
+/// X86 isel pass.
+void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = DAG.allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+ continue;
+
+ // If the source and destination are SSE registers, then this is a legal
+ // conversion that should not be lowered.
+ MVT::ValueType SrcVT = N->getOperand(0).getValueType();
+ MVT::ValueType DstVT = N->getValueType(0);
+ bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+ bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+ if (SrcIsSSE && DstIsSSE)
+ continue;
+
+ if (!SrcIsSSE && !DstIsSSE) {
+ // If this is an FPStack extension, it is a noop.
+ if (N->getOpcode() == ISD::FP_EXTEND)
+ continue;
+ // If this is a value-preserving FPStack truncation, it is a noop.
+ if (N->getConstantOperandVal(1))
+ continue;
+ }
+
+ // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+ // FPStack has extload and truncstore. SSE can fold direct loads into other
+ // operations. Based on this, decide what we want to do.
+ MVT::ValueType MemVT;
+ if (N->getOpcode() == ISD::FP_ROUND)
+ MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+ else
+ MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+ SDOperand MemTmp = DAG.CreateStackTemporary(MemVT);
+
+ // FIXME: optimize the case where the src/dest is a load or store?
+ SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0),
+ MemTmp, NULL, 0, MemVT);
+ SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
+ NULL, 0, MemVT);
+
+ // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+ // extload we created. This will cause general havok on the dag because
+ // anything below the conversion could be folded into other existing nodes.
+ // To avoid invalidating 'I', back it up to the convert node.
+ --I;
+ DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result);
+
+ // Now that we did that, the node is dead. Increment the iterator to the
+ // next node to process, then delete N.
+ ++I;
+ DAG.DeleteNode(N);
+ }
+}
+
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
/// when it has created a SelectionDAG for us to codegen.
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
MachineFunction::iterator FirstMBB = BB;
if (!FastISel)
- InstructionSelectPreprocess(DAG);
+ PreprocessForRMW(DAG);
+
+ // FIXME: This should only happen when not -fast.
+ PreprocessForFPConvert(DAG);
// Codegen the basic block.
#ifndef NDEBUG
// If we are emitting FP stack code, scan the basic block to determine if this
// block defines any FP values. If so, put an FP_REG_KILL instruction before
// the terminator of the block.
- if (!Subtarget->hasSSE2()) {
- // Note that FP stack instructions *are* used in SSE code when returning
- // values, but these are not live out of the basic block, so we don't need
- // an FP_REG_KILL in this case either.
+
+ // Note that FP stack instructions are used in all modes for long double,
+ // so we always need to do this check.
+ // Also note that it's possible for an FP stack register to be live across
+ // an instruction that produces multiple basic blocks (SSE CMOV) so we
+ // must check all the generated basic blocks.
+
+ // Scan all of the machine instructions in these MBBs, checking for FP
+ // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
+ MachineFunction::iterator MBBI = FirstMBB;
+ do {
bool ContainsFPCode = false;
-
- // Scan all of the machine instructions in these MBBs, checking for FP
- // stores.
- MachineFunction::iterator MBBI = FirstMBB;
- do {
- for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
- !ContainsFPCode && I != E; ++I) {
- if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
- const TargetRegisterClass *clas;
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
- MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
- ((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
- X86::RFP32RegisterClass ||
- clas == X86::RFP64RegisterClass)) {
- ContainsFPCode = true;
- break;
- }
+ for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
+ !ContainsFPCode && I != E; ++I) {
+ if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
+ const TargetRegisterClass *clas;
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
+ TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
+ ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) ==
+ X86::RFP32RegisterClass ||
+ clas == X86::RFP64RegisterClass ||
+ clas == X86::RFP80RegisterClass)) {
+ ContainsFPCode = true;
+ break;
}
}
}
- } while (!ContainsFPCode && &*(MBBI++) != BB);
-
+ }
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block.
+ // a copy of the input value in this block. In SSE mode, we only care about
+ // 80-bit values.
if (!ContainsFPCode) {
// Final check, check LLVM BB's that are successors to the LLVM BB
// corresponding to BB for FP PHI nodes.
!ContainsFPCode && SI != E; ++SI) {
for (BasicBlock::const_iterator II = SI->begin();
(PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()->isFloatingPoint()) {
+ if (PN->getType()==Type::X86_FP80Ty ||
+ (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) ||
+ (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) {
ContainsFPCode = true;
break;
}
}
}
}
-
// Finally, if we found any FP code, emit the FP_REG_KILL instruction.
if (ContainsFPCode) {
- BuildMI(*BB, BB->getFirstTerminator(),
+ BuildMI(*MBBI, MBBI->getFirstTerminator(),
TM.getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
}
- }
+ } while (&*(MBBI++) != BB);
}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing())
BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
-
- // Switch the FPU to 64-bit precision mode for better compatibility and speed.
- int CWFrameIdx = MFI->CreateStackObject(2, 2);
- addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
-
- // Set the high part to be 64-bit precision.
- addFrameReference(BuildMI(BB, TII->get(X86::MOV8mi)),
- CWFrameIdx, 1).addImm(2);
-
- // Reload the modified control word now.
- addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
}
void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
/// MatchAddress - Add the specified node to the specified addressing mode,
/// returning true if it cannot be done. This just pattern matches for the
-/// addressing mode
+/// addressing mode.
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot, unsigned Depth) {
- if (Depth > 5) {
- // Default, generate it as a register.
- AM.BaseType = X86ISelAddressMode::RegBase;
- AM.Base.Reg = N;
- return false;
- }
+ // Limit recursion.
+ if (Depth > 5)
+ return MatchAddressBase(N, AM, isRoot, Depth);
// RIP relative addressing: %rip + 32-bit displacement!
if (AM.isRIPRel) {
}
int id = N.Val->getNodeId();
- bool Available = isSelected(id);
+ bool AlreadySelected = isSelected(id); // Already selected, not yet replaced.
switch (N.getOpcode()) {
default: break;
case X86ISD::Wrapper: {
bool is64Bit = Subtarget->is64Bit();
// Under X86-64 non-small code model, GV (and friends) are 64-bits.
- if (is64Bit && TM.getCodeModel() != CodeModel::Small)
+ // Also, base and index reg must be 0 in order to use rip as base.
+ if (is64Bit && (TM.getCodeModel() != CodeModel::Small ||
+ AM.Base.Reg.Val || AM.IndexReg.Val))
break;
if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
break;
// If value is available in a register both base and index components have
// been picked, we can't fit the result available in the register in the
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
- if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
- bool isStatic = TM.getRelocationModel() == Reloc::Static;
+ if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
SDOperand N0 = N.getOperand(0);
- // Mac OS X X86-64 lower 4G address is not available.
- bool isAbs32 = !is64Bit || (isStatic && !Subtarget->isTargetDarwin());
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
GlobalValue *GV = G->getGlobal();
- if (isAbs32 || isRoot) {
- AM.GV = GV;
- AM.Disp += G->getOffset();
- AM.isRIPRel = !isAbs32;
- return false;
- }
+ AM.GV = GV;
+ AM.Disp += G->getOffset();
+ AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
+ Subtarget->isPICStyleRIPRel();
+ return false;
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
- if (isAbs32 || isRoot) {
- AM.CP = CP->getConstVal();
- AM.Align = CP->getAlignment();
- AM.Disp += CP->getOffset();
- AM.isRIPRel = !isStatic;
- return false;
- }
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
+ Subtarget->isPICStyleRIPRel();
+ return false;
} else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
- if (isAbs32 || isRoot) {
- AM.ES = S->getSymbol();
- AM.isRIPRel = !isStatic;
- return false;
- }
+ AM.ES = S->getSymbol();
+ AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
+ Subtarget->isPICStyleRIPRel();
+ return false;
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
- if (isAbs32 || isRoot) {
- AM.JT = J->getIndex();
- AM.isRIPRel = !isStatic;
- return false;
- }
+ AM.JT = J->getIndex();
+ AM.isRIPRel = TM.getRelocationModel() != Reloc::Static &&
+ Subtarget->isPICStyleRIPRel();
+ return false;
}
}
break;
break;
case ISD::SHL:
- if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
- unsigned Val = CN->getValue();
- if (Val == 1 || Val == 2 || Val == 3) {
- AM.Scale = 1 << Val;
- SDOperand ShVal = N.Val->getOperand(0);
-
- // Okay, we know that we have a scale by now. However, if the scaled
- // value is an add of something and a constant, we can fold the
- // constant into the disp field here.
- if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
- isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
- AM.IndexReg = ShVal.Val->getOperand(0);
- ConstantSDNode *AddVal =
- cast<ConstantSDNode>(ShVal.Val->getOperand(1));
- uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
- if (isInt32(Disp))
- AM.Disp = Disp;
- else
- AM.IndexReg = ShVal;
- } else {
+ if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel)
+ break;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
+ unsigned Val = CN->getValue();
+ if (Val == 1 || Val == 2 || Val == 3) {
+ AM.Scale = 1 << Val;
+ SDOperand ShVal = N.Val->getOperand(0);
+
+ // Okay, we know that we have a scale by now. However, if the scaled
+ // value is an add of something and a constant, we can fold the
+ // constant into the disp field here.
+ if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
+ isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
+ AM.IndexReg = ShVal.Val->getOperand(0);
+ ConstantSDNode *AddVal =
+ cast<ConstantSDNode>(ShVal.Val->getOperand(1));
+ uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
+ if (isInt32(Disp))
+ AM.Disp = Disp;
+ else
AM.IndexReg = ShVal;
- }
- return false;
+ } else {
+ AM.IndexReg = ShVal;
}
+ return false;
}
break;
+ }
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ // A mul_lohi where we need the low part can be folded as a plain multiply.
+ if (N.ResNo != 0) break;
+ // FALL THROUGH
case ISD::MUL:
// X*[3,5,9] -> X+X*[2,4,8]
- if (!Available &&
+ if (!AlreadySelected &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base.Reg.Val == 0 &&
- AM.IndexReg.Val == 0) {
+ AM.IndexReg.Val == 0 &&
+ !AM.isRIPRel) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
AM.Scale = unsigned(CN->getValue())-1;
break;
case ISD::ADD:
- if (!Available) {
+ if (!AlreadySelected) {
X86ISelAddressMode Backup = AM;
if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
case ISD::OR:
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (!Available) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- X86ISelAddressMode Backup = AM;
- // Start with the LHS as an addr mode.
- if (!MatchAddress(N.getOperand(0), AM, false) &&
- // Address could not have picked a GV address for the displacement.
- AM.GV == NULL &&
- // On x86-64, the resultant disp must fit in 32-bits.
- isInt32(AM.Disp + CN->getSignExtended()) &&
- // Check to see if the LHS & C is zero.
- CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
- AM.Disp += CN->getValue();
- return false;
- }
- AM = Backup;
+ if (AlreadySelected) break;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ X86ISelAddressMode Backup = AM;
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM, false) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // On x86-64, the resultant disp must fit in 32-bits.
+ isInt32(AM.Disp + CN->getSignExtended()) &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += CN->getValue();
+ return false;
}
+ AM = Backup;
}
break;
+
+ case ISD::AND: {
+ // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+ // allows us to fold the shift into this addressing mode.
+ if (AlreadySelected) break;
+ SDOperand Shift = N.getOperand(0);
+ if (Shift.getOpcode() != ISD::SHL) break;
+
+ // Scale must not be used already.
+ if (AM.IndexReg.Val != 0 || AM.Scale != 1) break;
+
+ // Not when RIP is used as the base.
+ if (AM.isRIPRel) break;
+
+ ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C1 || !C2) break;
+
+ // Not likely to be profitable if either the AND or SHIFT node has more
+ // than one use (unless all uses are for address computation). Besides,
+ // isel mechanism requires their node ids to be reused.
+ if (!N.hasOneUse() || !Shift.hasOneUse())
+ break;
+
+ // Verify that the shift amount is something we can fold.
+ unsigned ShiftCst = C1->getValue();
+ if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+ break;
+
+ // Get the new AND mask, this folds to a constant.
+ SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(),
+ SDOperand(C2, 0), SDOperand(C1, 0));
+ SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(),
+ Shift.getOperand(0), NewANDMask);
+ NewANDMask.Val->setNodeId(Shift.Val->getNodeId());
+ NewAND.Val->setNodeId(N.Val->getNodeId());
+
+ AM.Scale = 1 << ShiftCst;
+ AM.IndexReg = NewAND;
+ return false;
+ }
}
+ return MatchAddressBase(N, AM, isRoot, Depth);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
+ bool isRoot, unsigned Depth) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
// If so, check to see if the scale index register is set.
- if (AM.IndexReg.Val == 0) {
+ if (AM.IndexReg.Val == 0 && !AM.isRIPRel) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
- cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
+ cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
}
// Also handle the case where we explicitly require zeros in the top
// elements. This is a vector shuffle from the zero vector.
if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
- N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ // Check to see if the top elements are all zeros (or bitcast of zeros).
+ ISD::isBuildVectorAllZeros(N.getOperand(0).Val) &&
N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(1).Val->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
N.getOperand(1).getOperand(0).hasOneUse()) {
- // Check to see if the BUILD_VECTOR is building a zero vector.
- SDOperand BV = N.getOperand(0);
- for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
- if (!isZeroNode(BV.getOperand(i)) &&
- BV.getOperand(i).getOpcode() != ISD::UNDEF)
- return false; // Not a zero/undef vector.
// Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
// from the LHS.
- unsigned VecWidth = BV.getNumOperands();
+ unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType());
SDOperand ShufMask = N.getOperand(2);
assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = BB->getParent()->front();
+ MachineFunction *MF = BB->getParent();
+ MachineBasicBlock &FirstMBB = MF->front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- SSARegMap *RegMap = BB->getParent()->getSSARegMap();
- unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass);
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
const TargetInstrInfo *TII = TM.getInstrInfo();
- BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack));
- BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC);
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0);
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
// not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
if (TM.getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
- GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
- BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg).
- addReg(PC).
- addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
} else {
GlobalBaseReg = PC;
}
return FindCallStartFromCall(Node->getOperand(0).Val);
}
+SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
+ SDOperand SRIdx;
+ switch (VT) {
+ case MVT::i8:
+ SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+ // Ensure that the source register has an 8-bit subreg on 32-bit targets
+ if (!Subtarget->is64Bit()) {
+ unsigned Opc;
+ MVT::ValueType VT;
+ switch (N0.getValueType()) {
+ default: assert(0 && "Unknown truncate!");
+ case MVT::i16:
+ Opc = X86::MOV16to16_;
+ VT = MVT::i16;
+ break;
+ case MVT::i32:
+ Opc = X86::MOV32to32_;
+ VT = MVT::i32;
+ break;
+ }
+ N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0);
+ return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
+ VT, N0, SRIdx, N0.getValue(1));
+ }
+ break;
+ case MVT::i16:
+ SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
+ break;
+ case MVT::i32:
+ SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
+ break;
+ default: assert(0 && "Unknown truncate!"); break;
+ }
+ return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx);
+}
+
+
SDNode *X86DAGToDAGISel::Select(SDOperand N) {
SDNode *Node = N.Val;
MVT::ValueType NVT = Node->getValueType(0);
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
+ case X86ISD::FP_GET_ST0_ST1: {
+ SDOperand Chain = N.getOperand(0);
+ SDOperand InFlag = N.getOperand(1);
+ AddToISelQueue(Chain);
+ AddToISelQueue(InFlag);
+ std::vector<MVT::ValueType> Tys;
+ Tys.push_back(MVT::f80);
+ Tys.push_back(MVT::f80);
+ Tys.push_back(MVT::Other);
+ Tys.push_back(MVT::Flag);
+ SDOperand Ops[] = { Chain, InFlag };
+ SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys,
+ Ops, 2);
+ Chain = SDOperand(ResNode, 2);
+ InFlag = SDOperand(ResNode, 3);
+ ReplaceUses(SDOperand(N.Val, 2), Chain);
+ ReplaceUses(SDOperand(N.Val, 3), InFlag);
+ return ResNode;
+ }
+
case ISD::ADD: {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
- // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
+ // In 64-bit small code size mode, use LEA to take advantage of
+ // RIP-relative addressing.
+ if (TM.getCodeModel() != CodeModel::Small)
+ break;
MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
break;
}
- case ISD::MULHU:
- case ISD::MULHS: {
- if (Opcode == ISD::MULHU)
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ SDOperand N0 = Node->getOperand(0);
+ SDOperand N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SMUL_LOHI;
+ if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
- SDOperand N0 = Node->getOperand(0);
- SDOperand N1 = Node->getOperand(1);
-
- bool foldedLoad = false;
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
- foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
- // MULHU and MULHS are commmutative
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
+ // multiplty is commmutative
if (!foldedLoad) {
foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
- if (foldedLoad) {
- N0 = Node->getOperand(1);
- N1 = Node->getOperand(0);
- }
+ if (foldedLoad)
+ std::swap(N0, N1);
}
- SDOperand Chain;
- if (foldedLoad) {
- Chain = N1.getOperand(0);
- AddToISelQueue(Chain);
- } else
- Chain = CurDAG->getEntryNode();
-
- SDOperand InFlag(0, 0);
AddToISelQueue(N0);
- Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
- N0, InFlag);
- InFlag = Chain.getValue(1);
+ SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg,
+ N0, SDOperand()).getValue(1);
if (foldedLoad) {
+ AddToISelQueue(N1.getOperand(0));
AddToISelQueue(Tmp0);
AddToISelQueue(Tmp1);
AddToISelQueue(Tmp2);
AddToISelQueue(Tmp3);
- SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
+ SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
- Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
} else {
AddToISelQueue(N1);
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
- SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
- ReplaceUses(N.getValue(0), Result);
- if (foldedLoad)
- ReplaceUses(N1.getValue(1), Result.getValue(1));
+ // Copy the low half of the result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the high half of the result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDOperand Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+ Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.Val->dump(CurDAG));
- DOUT << "\n";
Indent -= 2;
#endif
+
return NULL;
}
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM: {
- bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
- bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDOperand N0 = Node->getOperand(0);
+ SDOperand N1 = Node->getOperand(1);
+
+ bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned)
switch (NVT) {
default: assert(0 && "Unsupported VT!");
break;
}
- SDOperand N0 = Node->getOperand(0);
- SDOperand N1 = Node->getOperand(1);
- SDOperand InFlag(0, 0);
+ SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
+ bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
+
+ SDOperand InFlag;
if (NVT == MVT::i8 && !isSigned) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
Chain = CurDAG->getEntryNode();
}
- Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, InFlag);
+ Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand());
InFlag = Chain.getValue(1);
} else {
AddToISelQueue(N0);
InFlag =
- CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, N0,
- InFlag).getValue(1);
+ CurDAG->getCopyToReg(CurDAG->getEntryNode(),
+ LoReg, N0, SDOperand()).getValue(1);
if (isSigned) {
// Sign extend the low part into the high part.
InFlag =
} else {
// Zero out the high part, effectively zero extending the input.
SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, ClrNode,
- InFlag).getValue(1);
+ InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg,
+ ClrNode, InFlag).getValue(1);
}
}
- SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Chain;
- bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
if (foldedLoad) {
AddToISelQueue(N1.getOperand(0));
AddToISelQueue(Tmp0);
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
SDNode *CNode =
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
- Chain = SDOperand(CNode, 0);
InFlag = SDOperand(CNode, 1);
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDOperand(CNode, 0));
} else {
AddToISelQueue(N1);
- Chain = CurDAG->getEntryNode();
InFlag =
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
}
- SDOperand Result =
- CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, NVT, InFlag);
- ReplaceUses(N.getValue(0), Result);
- if (foldedLoad)
- ReplaceUses(N1.getValue(1), Result.getValue(1));
+ // Copy the division (low) result, if it is needed.
+ if (!N.getValue(0).use_empty()) {
+ SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ LoReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ ReplaceUses(N.getValue(0), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
+ // Copy the remainder (high) result, if it is needed.
+ if (!N.getValue(1).use_empty()) {
+ SDOperand Result;
+ if (HiReg == X86::AH && Subtarget->is64Bit()) {
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+ Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
+ MVT::i8, Result, SRIdx), 0);
+ } else {
+ Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
+ }
+ ReplaceUses(N.getValue(1), Result);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(Result.Val->dump(CurDAG));
+ DOUT << "\n";
+#endif
+ }
#ifndef NDEBUG
- DOUT << std::string(Indent-2, ' ') << "=> ";
- DEBUG(Result.Val->dump(CurDAG));
- DOUT << "\n";
Indent -= 2;
#endif
return NULL;
}
- case ISD::TRUNCATE: {
- if (!Subtarget->is64Bit() && NVT == MVT::i8) {
- unsigned Opc2;
- MVT::ValueType VT;
- switch (Node->getOperand(0).getValueType()) {
- default: assert(0 && "Unknown truncate!");
+ case ISD::ANY_EXTEND: {
+ SDOperand N0 = Node->getOperand(0);
+ AddToISelQueue(N0);
+ if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
+ SDOperand SRIdx;
+ switch(N0.getValueType()) {
+ case MVT::i32:
+ SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
+ break;
case MVT::i16:
- Opc = X86::MOV16to16_;
- VT = MVT::i16;
- Opc2 = X86::TRUNC_16_to8;
+ SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
break;
- case MVT::i32:
- Opc = X86::MOV32to32_;
- VT = MVT::i32;
- Opc2 = X86::TRUNC_32_to8;
+ case MVT::i8:
+ if (Subtarget->is64Bit())
+ SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
break;
+ default: assert(0 && "Unknown any_extend!");
}
+ if (SRIdx.Val) {
+ SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG,
+ NVT, N0, SRIdx);
- AddToISelQueue(Node->getOperand(0));
- SDOperand Tmp =
- SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
- SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return ResNode;
+ } // Otherwise let generated ISel handle it.
+ }
+ break;
+ }
+
+ case ISD::SIGN_EXTEND_INREG: {
+ SDOperand N0 = Node->getOperand(0);
+ AddToISelQueue(N0);
+
+ MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0);
+ unsigned Opc = 0;
+ switch (NVT) {
+ case MVT::i16:
+ if (SVT == MVT::i8) Opc = X86::MOVSX16rr8;
+ else assert(0 && "Unknown sign_extend_inreg!");
+ break;
+ case MVT::i32:
+ switch (SVT) {
+ case MVT::i8: Opc = X86::MOVSX32rr8; break;
+ case MVT::i16: Opc = X86::MOVSX32rr16; break;
+ default: assert(0 && "Unknown sign_extend_inreg!");
+ }
+ break;
+ case MVT::i64:
+ switch (SVT) {
+ case MVT::i8: Opc = X86::MOVSX64rr8; break;
+ case MVT::i16: Opc = X86::MOVSX64rr16; break;
+ case MVT::i32: Opc = X86::MOVSX64rr32; break;
+ default: assert(0 && "Unknown sign_extend_inreg!");
+ }
+ break;
+ default: assert(0 && "Unknown sign_extend_inreg!");
+ }
+
+ SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp);
+
+#ifndef NDEBUG
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(TruncOp.Val->dump(CurDAG));
+ DOUT << "\n";
+ DOUT << std::string(Indent-2, ' ') << "=> ";
+ DEBUG(ResNode->dump(CurDAG));
+ DOUT << "\n";
+ Indent -= 2;
+#endif
+ return ResNode;
+ break;
+ }
+
+ case ISD::TRUNCATE: {
+ SDOperand Input = Node->getOperand(0);
+ AddToISelQueue(Node->getOperand(0));
+ SDNode *ResNode = getTruncate(Input, NVT);
#ifndef NDEBUG
DOUT << std::string(Indent-2, ' ') << "=> ";
DOUT << "\n";
Indent -= 2;
#endif
- return ResNode;
- }
-
+ return ResNode;
break;
}
}