1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the Evan Cheng and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86ISelLowering.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/GlobalValue.h"
23 #include "llvm/Instructions.h"
24 #include "llvm/Intrinsics.h"
25 #include "llvm/Support/CFG.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/SSARegMap.h"
31 #include "llvm/CodeGen/SelectionDAGISel.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/ADT/Statistic.h"
42 //===----------------------------------------------------------------------===//
43 // Pattern Matcher Implementation
44 //===----------------------------------------------------------------------===//
47 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
48 /// SDOperand's instead of register numbers for the leaves of the matched
50 struct X86ISelAddressMode {
56 struct { // This is really a union, discriminated by BaseType!
61 bool isRIPRel; // RIP relative?
69 unsigned Align; // CP alignment.
72 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
73 GV(0), CP(0), ES(0), JT(-1), Align(0) {
80 NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added");
83 NumLoadMoved("x86-codegen", "Number of loads moved below TokenFactor");
85 //===--------------------------------------------------------------------===//
86 /// ISel - X86 specific code to select X86 machine instructions for
87 /// SelectionDAG operations.
89 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
90 /// ContainsFPCode - Every instruction we select that uses or defines a FP
91 /// register should set this to true.
94 /// FastISel - Enable fast(er) instruction selection.
98 /// TM - Keep a reference to X86TargetMachine.
100 X86TargetMachine &TM;
102 /// X86Lowering - This object fully describes how to lower LLVM code to an
103 /// X86-specific SelectionDAG.
104 X86TargetLowering X86Lowering;
106 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
107 /// make the right decision when generating code for different targets.
108 const X86Subtarget *Subtarget;
110 /// GlobalBaseReg - keeps track of the virtual register mapped onto global
112 unsigned GlobalBaseReg;
115 X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
116 : SelectionDAGISel(X86Lowering),
117 ContainsFPCode(false), FastISel(fast), TM(tm),
118 X86Lowering(*TM.getTargetLowering()),
119 Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
121 virtual bool runOnFunction(Function &Fn) {
122 // Make sure we re-emit a set of the global base reg if necessary
124 return SelectionDAGISel::runOnFunction(Fn);
127 virtual const char *getPassName() const {
128 return "X86 DAG->DAG Instruction Selection";
131 /// InstructionSelectBasicBlock - This callback is invoked by
132 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
133 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
135 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
137 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U);
139 // Include the pieces autogenerated from the target description.
140 #include "X86GenDAGISel.inc"
143 SDNode *Select(SDOperand N);
145 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true);
146 bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
147 SDOperand &Index, SDOperand &Disp);
148 bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
149 SDOperand &Index, SDOperand &Disp);
150 bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale,
151 SDOperand &Index, SDOperand &Disp,
152 SDOperand &InChain, SDOperand &OutChain);
153 bool TryFoldLoad(SDOperand P, SDOperand N,
154 SDOperand &Base, SDOperand &Scale,
155 SDOperand &Index, SDOperand &Disp);
156 void InstructionSelectPreprocess(SelectionDAG &DAG);
158 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
159 /// inline asm expressions.
160 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
162 std::vector<SDOperand> &OutOps,
165 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
167 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
168 SDOperand &Scale, SDOperand &Index,
170 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
171 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
173 Scale = getI8Imm(AM.Scale);
175 // These are 32-bit even in 64-bit mode since RIP relative offset
178 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
180 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
182 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
183 else if (AM.JT != -1)
184 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
186 Disp = getI32Imm(AM.Disp);
189 /// getI8Imm - Return a target constant with the specified value, of type
191 inline SDOperand getI8Imm(unsigned Imm) {
192 return CurDAG->getTargetConstant(Imm, MVT::i8);
195 /// getI16Imm - Return a target constant with the specified value, of type
197 inline SDOperand getI16Imm(unsigned Imm) {
198 return CurDAG->getTargetConstant(Imm, MVT::i16);
201 /// getI32Imm - Return a target constant with the specified value, of type
203 inline SDOperand getI32Imm(unsigned Imm) {
204 return CurDAG->getTargetConstant(Imm, MVT::i32);
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
217 static SDNode *findFlagUse(SDNode *N) {
218 unsigned FlagResNo = N->getNumValues()-1;
219 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
221 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
222 SDOperand Op = User->getOperand(i);
223 if (Op.Val == N && Op.ResNo == FlagResNo)
230 static void findNonImmUse(SDNode* Use, SDNode* Def, SDNode *Ignore, bool &found,
231 std::set<SDNode *> &Visited) {
233 Use->getNodeId() > Def->getNodeId() ||
234 !Visited.insert(Use).second)
237 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
238 SDNode *N = Use->getOperand(i).Val;
242 findNonImmUse(N, Def, Ignore, found, Visited);
250 static inline bool isNonImmUse(SDNode* Use, SDNode* Def, SDNode *Ignore=NULL) {
251 std::set<SDNode *> Visited;
253 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
254 SDNode *N = Use->getOperand(i).Val;
255 if (N != Def && N != Ignore) {
256 findNonImmUse(N, Def, Ignore, found, Visited);
261 if (!found && Ignore) {
262 // We must be checking for reachability between Def and a flag use. Go down
263 // recursively if Use also produces a flag.
264 MVT::ValueType VT = Use->getValueType(Use->getNumValues()-1);
265 if (VT == MVT::Flag && !Use->use_empty()) {
266 SDNode *FU = findFlagUse(Use);
268 return !isNonImmUse(FU, Def, Use);
275 bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) {
276 // If U use can somehow reach N through another path then U can't fold N or
277 // it will create a cycle. e.g. In the following diagram, U can reach N
278 // through X. If N is folded into into U, then X is both a predecessor and
288 if (!FastISel && !isNonImmUse(U, N)) {
289 // If U produces a flag, then it gets (even more) interesting. Since it
290 // would have been "glued" together with its flag use, we need to check if
302 // If FU (flag use) indirectly reach N (the load), and U fold N (call it
303 // NU), then TF is a predecessor of FU and a successor of NU. But since
304 // NU and FU are flagged together, this effectively creates a cycle.
305 MVT::ValueType VT = U->getValueType(U->getNumValues()-1);
306 if (VT == MVT::Flag && !U->use_empty()) {
307 SDNode *FU = findFlagUse(U);
309 return !isNonImmUse(FU, N, U);
316 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
317 /// and move load below the TokenFactor. Replace store's chain operand with
318 /// load's chain result.
319 static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
320 SDOperand Store, SDOperand TF) {
321 std::vector<SDOperand> Ops;
322 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
323 if (Load.Val == TF.Val->getOperand(i).Val)
324 Ops.push_back(Load.Val->getOperand(0));
326 Ops.push_back(TF.Val->getOperand(i));
327 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
328 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
329 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
330 Store.getOperand(2), Store.getOperand(3));
333 /// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
334 /// selector to pick more load-modify-store instructions. This is a common
345 /// [TokenFactor] [Op]
352 /// The fact the store's chain operand != load's chain will prevent the
353 /// (store (op (load))) instruction from being selected. We can transform it to:
372 void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
373 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
374 E = DAG.allnodes_end(); I != E; ++I) {
375 if (I->getOpcode() != ISD::STORE)
377 SDOperand Chain = I->getOperand(0);
378 if (Chain.Val->getOpcode() != ISD::TokenFactor)
381 SDOperand N1 = I->getOperand(1);
382 SDOperand N2 = I->getOperand(2);
383 if (MVT::isFloatingPoint(N1.getValueType()) ||
384 MVT::isVector(N1.getValueType()) ||
390 unsigned Opcode = N1.Val->getOpcode();
399 SDOperand N10 = N1.getOperand(0);
400 SDOperand N11 = N1.getOperand(1);
401 if (ISD::isNON_EXTLoad(N10.Val))
403 else if (ISD::isNON_EXTLoad(N11.Val)) {
407 RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
408 (N10.getOperand(1) == N2) &&
409 (N10.Val->getValueType(0) == N1.getValueType());
424 SDOperand N10 = N1.getOperand(0);
425 if (ISD::isNON_EXTLoad(N10.Val))
426 RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
427 (N10.getOperand(1) == N2) &&
428 (N10.Val->getValueType(0) == N1.getValueType());
436 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
442 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
443 /// when it has created a SelectionDAG for us to codegen.
444 void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
446 MachineFunction::iterator FirstMBB = BB;
449 InstructionSelectPreprocess(DAG);
451 // Codegen the basic block.
453 DEBUG(std::cerr << "===== Instruction selection begins:\n");
456 DAG.setRoot(SelectRoot(DAG.getRoot()));
458 DEBUG(std::cerr << "===== Instruction selection ends:\n");
461 DAG.RemoveDeadNodes();
463 // Emit machine code to BB.
464 ScheduleAndEmitDAG(DAG);
466 // If we are emitting FP stack code, scan the basic block to determine if this
467 // block defines any FP values. If so, put an FP_REG_KILL instruction before
468 // the terminator of the block.
469 if (!Subtarget->hasSSE2()) {
470 // Note that FP stack instructions *are* used in SSE code when returning
471 // values, but these are not live out of the basic block, so we don't need
472 // an FP_REG_KILL in this case either.
473 bool ContainsFPCode = false;
475 // Scan all of the machine instructions in these MBBs, checking for FP
477 MachineFunction::iterator MBBI = FirstMBB;
479 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
480 !ContainsFPCode && I != E; ++I) {
481 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
482 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
483 MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
484 RegMap->getRegClass(I->getOperand(0).getReg()) ==
485 X86::RFPRegisterClass) {
486 ContainsFPCode = true;
491 } while (!ContainsFPCode && &*(MBBI++) != BB);
493 // Check PHI nodes in successor blocks. These PHI's will be lowered to have
494 // a copy of the input value in this block.
495 if (!ContainsFPCode) {
496 // Final check, check LLVM BB's that are successors to the LLVM BB
497 // corresponding to BB for FP PHI nodes.
498 const BasicBlock *LLVMBB = BB->getBasicBlock();
500 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
501 !ContainsFPCode && SI != E; ++SI) {
502 for (BasicBlock::const_iterator II = SI->begin();
503 (PN = dyn_cast<PHINode>(II)); ++II) {
504 if (PN->getType()->isFloatingPoint()) {
505 ContainsFPCode = true;
512 // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
513 if (ContainsFPCode) {
514 BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0);
520 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
521 /// the main function.
522 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
523 MachineFrameInfo *MFI) {
524 if (Subtarget->isTargetCygwin())
525 BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main");
527 // Switch the FPU to 64-bit precision mode for better compatibility and speed.
528 int CWFrameIdx = MFI->CreateStackObject(2, 2);
529 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
531 // Set the high part to be 64-bit precision.
532 addFrameReference(BuildMI(BB, X86::MOV8mi, 5),
533 CWFrameIdx, 1).addImm(2);
535 // Reload the modified control word now.
536 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
539 void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
540 // If this is main, emit special code for main.
541 MachineBasicBlock *BB = MF.begin();
542 if (Fn.hasExternalLinkage() && Fn.getName() == "main")
543 EmitSpecialCodeForMain(BB, MF.getFrameInfo());
546 /// MatchAddress - Add the specified node to the specified addressing mode,
547 /// returning true if it cannot be done. This just pattern matches for the
549 bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
551 // RIP relative addressing: %rip + 32-bit displacement!
553 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
554 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
555 if (isInt32(AM.Disp + Val)) {
563 int id = N.Val->getNodeId();
564 bool Available = isSelected(id);
566 switch (N.getOpcode()) {
568 case ISD::Constant: {
569 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
570 if (isInt32(AM.Disp + Val)) {
577 case X86ISD::Wrapper:
578 // If value is available in a register both base and index components have
579 // been picked, we can't fit the result available in the register in the
580 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
582 // Can't fit GV or CP in addressing mode for X86-64 medium or large code
583 // model since the displacement field is 32-bit. Ok for small code model.
585 // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
586 // relative addressing mode.
587 if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
588 (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
589 bool isRIP = Subtarget->is64Bit();
590 if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
591 AM.BaseType == X86ISelAddressMode::FrameIndexBase))
593 if (ConstantPoolSDNode *CP =
594 dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
596 AM.CP = CP->getConstVal();
597 AM.Align = CP->getAlignment();
598 AM.Disp += CP->getOffset();
603 } else if (GlobalAddressSDNode *G =
604 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) {
606 AM.GV = G->getGlobal();
607 AM.Disp += G->getOffset();
612 } else if (isRoot && isRIP) {
613 if (ExternalSymbolSDNode *S =
614 dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
615 AM.ES = S->getSymbol();
618 } else if (JumpTableSDNode *J =
619 dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
620 AM.JT = J->getIndex();
628 case ISD::FrameIndex:
629 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
630 AM.BaseType = X86ISelAddressMode::FrameIndexBase;
631 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
637 if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
638 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
639 unsigned Val = CN->getValue();
640 if (Val == 1 || Val == 2 || Val == 3) {
642 SDOperand ShVal = N.Val->getOperand(0);
644 // Okay, we know that we have a scale by now. However, if the scaled
645 // value is an add of something and a constant, we can fold the
646 // constant into the disp field here.
647 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
648 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
649 AM.IndexReg = ShVal.Val->getOperand(0);
650 ConstantSDNode *AddVal =
651 cast<ConstantSDNode>(ShVal.Val->getOperand(1));
652 uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
666 // X*[3,5,9] -> X+X*[2,4,8]
668 AM.BaseType == X86ISelAddressMode::RegBase &&
669 AM.Base.Reg.Val == 0 &&
670 AM.IndexReg.Val == 0)
671 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
672 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
673 AM.Scale = unsigned(CN->getValue())-1;
675 SDOperand MulVal = N.Val->getOperand(0);
678 // Okay, we know that we have a scale by now. However, if the scaled
679 // value is an add of something and a constant, we can fold the
680 // constant into the disp field here.
681 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
682 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
683 Reg = MulVal.Val->getOperand(0);
684 ConstantSDNode *AddVal =
685 cast<ConstantSDNode>(MulVal.Val->getOperand(1));
686 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
690 Reg = N.Val->getOperand(0);
692 Reg = N.Val->getOperand(0);
695 AM.IndexReg = AM.Base.Reg = Reg;
702 X86ISelAddressMode Backup = AM;
703 if (!MatchAddress(N.Val->getOperand(0), AM, false) &&
704 !MatchAddress(N.Val->getOperand(1), AM, false))
707 if (!MatchAddress(N.Val->getOperand(1), AM, false) &&
708 !MatchAddress(N.Val->getOperand(0), AM, false))
717 X86ISelAddressMode Backup = AM;
718 // Look for (x << c1) | c2 where (c2 < c1)
719 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0));
720 if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) {
721 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
722 AM.Disp = CN->getValue();
727 CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1));
728 if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) {
729 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) {
730 AM.Disp = CN->getValue();
740 // Is the base register already occupied?
741 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
742 // If so, check to see if the scale index register is set.
743 if (AM.IndexReg.Val == 0) {
749 // Otherwise, we cannot select it.
753 // Default, generate it as a register.
754 AM.BaseType = X86ISelAddressMode::RegBase;
759 /// SelectAddr - returns true if it is able pattern match an addressing mode.
760 /// It returns the operands which make up the maximal addressing mode it can
761 /// match by reference.
762 bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
763 SDOperand &Index, SDOperand &Disp) {
764 X86ISelAddressMode AM;
765 if (MatchAddress(N, AM))
768 MVT::ValueType VT = N.getValueType();
769 if (AM.BaseType == X86ISelAddressMode::RegBase) {
770 if (!AM.Base.Reg.Val)
771 AM.Base.Reg = CurDAG->getRegister(0, VT);
774 if (!AM.IndexReg.Val)
775 AM.IndexReg = CurDAG->getRegister(0, VT);
777 getAddressOperands(AM, Base, Scale, Index, Disp);
781 /// isZeroNode - Returns true if Elt is a constant zero or a floating point
783 static inline bool isZeroNode(SDOperand Elt) {
784 return ((isa<ConstantSDNode>(Elt) &&
785 cast<ConstantSDNode>(Elt)->getValue() == 0) ||
786 (isa<ConstantFPSDNode>(Elt) &&
787 cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
791 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
792 /// match a load whose top elements are either undef or zeros. The load flavor
793 /// is derived from the type of N, which is either v4f32 or v2f64.
794 bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base,
795 SDOperand &Scale, SDOperand &Index,
796 SDOperand &Disp, SDOperand &InChain,
797 SDOperand &OutChain) {
798 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
799 InChain = N.getOperand(0).getValue(1);
800 if (ISD::isNON_EXTLoad(InChain.Val) && InChain.getValue(0).hasOneUse()) {
801 LoadSDNode *LD = cast<LoadSDNode>(InChain);
802 if (!SelectAddr(LD->getBasePtr(), Base, Scale, Index, Disp))
804 OutChain = LD->getChain();
809 // Also handle the case where we explicitly require zeros in the top
810 // elements. This is a vector shuffle from the zero vector.
811 if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
812 N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
813 N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
814 N.getOperand(1).Val->hasOneUse() &&
815 ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
816 N.getOperand(1).getOperand(0).hasOneUse()) {
817 // Check to see if the BUILD_VECTOR is building a zero vector.
818 SDOperand BV = N.getOperand(0);
819 for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
820 if (!isZeroNode(BV.getOperand(i)) &&
821 BV.getOperand(i).getOpcode() != ISD::UNDEF)
822 return false; // Not a zero/undef vector.
823 // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
825 unsigned VecWidth = BV.getNumOperands();
826 SDOperand ShufMask = N.getOperand(2);
827 assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
828 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
829 if (C->getValue() == VecWidth) {
830 for (unsigned i = 1; i != VecWidth; ++i) {
831 if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
834 ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
835 if (C->getValue() >= VecWidth) return false;
840 // Okay, this is a zero extending load. Fold it.
841 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
842 if (!SelectAddr(LD->getBasePtr(), Base, Scale, Index, Disp))
844 OutChain = LD->getChain();
845 InChain = SDOperand(LD, 1);
853 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
854 /// mode it matches can be cost effectively emitted as an LEA instruction.
855 bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
857 SDOperand &Index, SDOperand &Disp) {
858 X86ISelAddressMode AM;
859 if (MatchAddress(N, AM))
862 MVT::ValueType VT = N.getValueType();
863 unsigned Complexity = 0;
864 if (AM.BaseType == X86ISelAddressMode::RegBase)
868 AM.Base.Reg = CurDAG->getRegister(0, VT);
869 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
875 AM.IndexReg = CurDAG->getRegister(0, VT);
879 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
880 else if (AM.Scale > 1)
883 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
884 // to a LEA. This is determined with some expermentation but is by no means
885 // optimal (especially for code size consideration). LEA is nice because of
886 // its three-address nature. Tweak the cost function again when we can run
887 // convertToThreeAddress() at register allocation time.
888 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
889 // For X86-64, we should always use lea to materialize RIP relative
891 if (Subtarget->is64Bit())
897 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
900 if (Complexity > 2) {
901 getAddressOperands(AM, Base, Scale, Index, Disp);
907 bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
908 SDOperand &Base, SDOperand &Scale,
909 SDOperand &Index, SDOperand &Disp) {
910 if (ISD::isNON_EXTLoad(N.Val) &&
912 CanBeFoldedBy(N.Val, P.Val))
913 return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp);
917 static bool isRegister0(SDOperand Op) {
918 if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op))
919 return (R->getReg() == 0);
923 /// getGlobalBaseReg - Output the instructions required to put the
924 /// base address to use for accessing globals into a register.
926 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
927 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
928 if (!GlobalBaseReg) {
929 // Insert the set of GlobalBaseReg into the first MBB of the function
930 MachineBasicBlock &FirstMBB = BB->getParent()->front();
931 MachineBasicBlock::iterator MBBI = FirstMBB.begin();
932 SSARegMap *RegMap = BB->getParent()->getSSARegMap();
933 // FIXME: when we get to LP64, we will need to create the appropriate
934 // type of register here.
935 GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
936 BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
937 BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
939 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
942 static SDNode *FindCallStartFromCall(SDNode *Node) {
943 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
944 assert(Node->getOperand(0).getValueType() == MVT::Other &&
945 "Node doesn't have a token chain argument!");
946 return FindCallStartFromCall(Node->getOperand(0).Val);
949 SDNode *X86DAGToDAGISel::Select(SDOperand N) {
950 SDNode *Node = N.Val;
951 MVT::ValueType NVT = Node->getValueType(0);
953 unsigned Opcode = Node->getOpcode();
956 DEBUG(std::cerr << std::string(Indent, ' '));
957 DEBUG(std::cerr << "Selecting: ");
958 DEBUG(Node->dump(CurDAG));
959 DEBUG(std::cerr << "\n");
963 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
965 DEBUG(std::cerr << std::string(Indent-2, ' '));
966 DEBUG(std::cerr << "== ");
967 DEBUG(Node->dump(CurDAG));
968 DEBUG(std::cerr << "\n");
971 return NULL; // Already selected.
976 case X86ISD::GlobalBaseReg:
977 return getGlobalBaseReg();
980 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
981 // code and is matched first so to prevent it from being turned into
983 // In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
984 MVT::ValueType PtrVT = TLI.getPointerTy();
985 SDOperand N0 = N.getOperand(0);
986 SDOperand N1 = N.getOperand(1);
987 if (N.Val->getValueType(0) == PtrVT &&
988 N0.getOpcode() == X86ISD::Wrapper &&
989 N1.getOpcode() == ISD::Constant) {
990 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
992 // TODO: handle ExternalSymbolSDNode.
993 if (GlobalAddressSDNode *G =
994 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
995 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
996 G->getOffset() + Offset);
997 } else if (ConstantPoolSDNode *CP =
998 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
999 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
1001 CP->getOffset()+Offset);
1005 if (Subtarget->is64Bit()) {
1006 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
1007 CurDAG->getRegister(0, PtrVT), C };
1008 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
1010 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
1014 // Other cases are handled by auto-generated code.
1020 if (Opcode == ISD::MULHU)
1022 default: assert(0 && "Unsupported VT!");
1023 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
1024 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
1025 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
1026 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
1030 default: assert(0 && "Unsupported VT!");
1031 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
1032 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
1033 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
1034 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
1037 unsigned LoReg, HiReg;
1039 default: assert(0 && "Unsupported VT!");
1040 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
1041 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
1042 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
1043 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
1046 SDOperand N0 = Node->getOperand(0);
1047 SDOperand N1 = Node->getOperand(1);
1049 bool foldedLoad = false;
1050 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1051 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1052 // MULHU and MULHS are commmutative
1054 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
1056 N0 = Node->getOperand(1);
1057 N1 = Node->getOperand(0);
1063 Chain = N1.getOperand(0);
1064 AddToISelQueue(Chain);
1066 Chain = CurDAG->getEntryNode();
1068 SDOperand InFlag(0, 0);
1070 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
1072 InFlag = Chain.getValue(1);
1075 AddToISelQueue(Tmp0);
1076 AddToISelQueue(Tmp1);
1077 AddToISelQueue(Tmp2);
1078 AddToISelQueue(Tmp3);
1079 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
1081 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1082 Chain = SDOperand(CNode, 0);
1083 InFlag = SDOperand(CNode, 1);
1087 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1090 SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
1091 ReplaceUses(N.getValue(0), Result);
1093 ReplaceUses(N1.getValue(1), Result.getValue(1));
1096 DEBUG(std::cerr << std::string(Indent-2, ' '));
1097 DEBUG(std::cerr << "=> ");
1098 DEBUG(Result.Val->dump(CurDAG));
1099 DEBUG(std::cerr << "\n");
1109 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
1110 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
1113 default: assert(0 && "Unsupported VT!");
1114 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
1115 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
1116 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
1117 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
1121 default: assert(0 && "Unsupported VT!");
1122 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
1123 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
1124 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
1125 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
1128 unsigned LoReg, HiReg;
1129 unsigned ClrOpcode, SExtOpcode;
1131 default: assert(0 && "Unsupported VT!");
1133 LoReg = X86::AL; HiReg = X86::AH;
1134 ClrOpcode = X86::MOV8r0;
1135 SExtOpcode = X86::CBW;
1138 LoReg = X86::AX; HiReg = X86::DX;
1139 ClrOpcode = X86::MOV16r0;
1140 SExtOpcode = X86::CWD;
1143 LoReg = X86::EAX; HiReg = X86::EDX;
1144 ClrOpcode = X86::MOV32r0;
1145 SExtOpcode = X86::CDQ;
1148 LoReg = X86::RAX; HiReg = X86::RDX;
1149 ClrOpcode = X86::MOV64r0;
1150 SExtOpcode = X86::CQO;
1154 SDOperand N0 = Node->getOperand(0);
1155 SDOperand N1 = Node->getOperand(1);
1157 bool foldedLoad = false;
1158 SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
1159 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
1162 Chain = N1.getOperand(0);
1163 AddToISelQueue(Chain);
1165 Chain = CurDAG->getEntryNode();
1167 SDOperand InFlag(0, 0);
1169 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
1171 InFlag = Chain.getValue(1);
1174 // Sign extend the low part into the high part.
1176 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
1178 // Zero out the high part, effectively zero extending the input.
1179 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
1180 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT),
1182 InFlag = Chain.getValue(1);
1186 AddToISelQueue(Tmp0);
1187 AddToISelQueue(Tmp1);
1188 AddToISelQueue(Tmp2);
1189 AddToISelQueue(Tmp3);
1190 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
1192 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
1193 Chain = SDOperand(CNode, 0);
1194 InFlag = SDOperand(CNode, 1);
1198 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
1201 SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg,
1203 ReplaceUses(N.getValue(0), Result);
1205 ReplaceUses(N1.getValue(1), Result.getValue(1));
1208 DEBUG(std::cerr << std::string(Indent-2, ' '));
1209 DEBUG(std::cerr << "=> ");
1210 DEBUG(Result.Val->dump(CurDAG));
1211 DEBUG(std::cerr << "\n");
1218 case ISD::TRUNCATE: {
1219 if (!Subtarget->is64Bit() && NVT == MVT::i8) {
1222 switch (Node->getOperand(0).getValueType()) {
1223 default: assert(0 && "Unknown truncate!");
1225 Opc = X86::MOV16to16_;
1227 Opc2 = X86::TRUNC_16_to8;
1230 Opc = X86::MOV32to32_;
1232 Opc2 = X86::TRUNC_32_to8;
1236 AddToISelQueue(Node->getOperand(0));
1238 SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
1239 SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
1242 DEBUG(std::cerr << std::string(Indent-2, ' '));
1243 DEBUG(std::cerr << "=> ");
1244 DEBUG(ResNode->dump(CurDAG));
1245 DEBUG(std::cerr << "\n");
1255 SDNode *ResNode = SelectCode(N);
1258 DEBUG(std::cerr << std::string(Indent-2, ' '));
1259 DEBUG(std::cerr << "=> ");
1260 if (ResNode == NULL || ResNode == N.Val)
1261 DEBUG(N.Val->dump(CurDAG));
1263 DEBUG(ResNode->dump(CurDAG));
1264 DEBUG(std::cerr << "\n");
1271 bool X86DAGToDAGISel::
1272 SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
1273 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
1274 SDOperand Op0, Op1, Op2, Op3;
1275 switch (ConstraintCode) {
1276 case 'o': // offsetable ??
1277 case 'v': // not offsetable ??
1278 default: return true;
1280 if (!SelectAddr(Op, Op0, Op1, Op2, Op3))
1285 OutOps.push_back(Op0);
1286 OutOps.push_back(Op1);
1287 OutOps.push_back(Op2);
1288 OutOps.push_back(Op3);
1289 AddToISelQueue(Op0);
1290 AddToISelQueue(Op1);
1291 AddToISelQueue(Op2);
1292 AddToISelQueue(Op3);
1296 /// createX86ISelDag - This pass converts a legalized DAG into a
1297 /// X86-specific DAG, ready for instruction scheduling.
1299 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
1300 return new X86DAGToDAGISel(TM, Fast);