X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelDAGToDAG.cpp;h=bf233bfcbb459431c851f933662fdeab6819e8ed;hb=6634e26aa11b0e2eabde8b3b463bb943364f8d9d;hp=8b1690c05f58923f33fa32435196aee600435785;hpb=849f214a4e3676e41168b0c5398165c4d4fb99f8;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8b1690c05f5..bf233bfcbb4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by the Evan Cheng and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -16,6 +16,7 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" +#include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -28,9 +29,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/SSARegMap.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -42,7 +44,6 @@ using namespace llvm; STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); - //===----------------------------------------------------------------------===// // Pattern Matcher Implementation //===----------------------------------------------------------------------===// @@ -62,7 +63,7 @@ namespace { int FrameIndex; } Base; - bool isRIPRel; // RIP relative? + bool isRIPRel; // RIP as base? unsigned Scale; SDOperand IndexReg; unsigned Disp; @@ -132,7 +133,7 @@ namespace { virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root); + virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -142,6 +143,8 @@ namespace { bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true, unsigned Depth = 0); + bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, + bool isRoot, unsigned Depth); bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, @@ -153,7 +156,8 @@ namespace { bool TryFoldLoad(SDOperand P, SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); - void InstructionSelectPreprocess(SelectionDAG &DAG); + void PreprocessForRMW(SelectionDAG &DAG); + void PreprocessForFPConvert(SelectionDAG &DAG); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -208,6 +212,10 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); + /// getTruncate - return an SDNode that implements a subreg based truncate + /// of the specified operand to the the specified value type. + SDNode *getTruncate(SDOperand N0, MVT::ValueType VT); + #ifndef NDEBUG unsigned Indent; #endif @@ -272,7 +280,7 @@ static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, } -bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) { +bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { if (FastISel) return false; // If U use can somehow reach N through another path then U can't fold N or @@ -343,9 +351,10 @@ static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, Store.getOperand(2), Store.getOperand(3)); } -/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction -/// selector to pick more load-modify-store instructions. This is a common -/// case: +/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. +/// This is only run if not in -fast mode (aka -O0). +/// This allows the instruction selector to pick more read-modify-write +/// instructions. This is a common case: /// /// [Load chain] /// ^ @@ -382,7 +391,7 @@ static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, /// \ / /// \ / /// [Store] -void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) { +void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) { for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) { if (!ISD::isNON_TRUNCStore(I)) @@ -417,7 +426,7 @@ void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) { RModW = true; std::swap(N10, N11); } - RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && + RModW = RModW && N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && (N10.getOperand(1) == N2) && (N10.Val->getValueType(0) == N1.getValueType()); if (RModW) @@ -436,7 +445,7 @@ void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) { case X86ISD::SHRD: { SDOperand N10 = N1.getOperand(0); if (ISD::isNON_EXTLoad(N10.Val)) - RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && + RModW = N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && (N10.getOperand(1) == N2) && (N10.Val->getValueType(0) == N1.getValueType()); if (RModW) @@ -452,6 +461,71 @@ void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) { } } + +/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend +/// nodes that target the FP stack to be store and load to the stack. This is a +/// gross hack. We would like to simply mark these as being illegal, but when +/// we do that, legalize produces these when it expands calls, then expands +/// these in the same legalize pass. We would like dag combine to be able to +/// hack on these between the call expansion and the node legalization. As such +/// this pass basically does "really late" legalization of these inline with the +/// X86 isel pass. +void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) + continue; + + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. + MVT::ValueType SrcVT = N->getOperand(0).getValueType(); + MVT::ValueType DstVT = N->getValueType(0); + bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); + if (SrcIsSSE && DstIsSSE) + continue; + + if (!SrcIsSSE && !DstIsSSE) { + // If this is an FPStack extension, it is a noop. + if (N->getOpcode() == ISD::FP_EXTEND) + continue; + // If this is a value-preserving FPStack truncation, it is a noop. + if (N->getConstantOperandVal(1)) + continue; + } + + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. + // FPStack has extload and truncstore. SSE can fold direct loads into other + // operations. Based on this, decide what we want to do. + MVT::ValueType MemVT; + if (N->getOpcode() == ISD::FP_ROUND) + MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. + else + MemVT = SrcIsSSE ? SrcVT : DstVT; + + SDOperand MemTmp = DAG.CreateStackTemporary(MemVT); + + // FIXME: optimize the case where the src/dest is a load or store? + SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0), + MemTmp, NULL, 0, MemVT); + SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, + NULL, 0, MemVT); + + // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the + // extload we created. This will cause general havok on the dag because + // anything below the conversion could be folded into other existing nodes. + // To avoid invalidating 'I', back it up to the convert node. + --I; + DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result); + + // Now that we did that, the node is dead. Increment the iterator to the + // next node to process, then delete N. + ++I; + DAG.DeleteNode(N); + } +} + /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel /// when it has created a SelectionDAG for us to codegen. void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { @@ -459,7 +533,10 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { MachineFunction::iterator FirstMBB = BB; if (!FastISel) - InstructionSelectPreprocess(DAG); + PreprocessForRMW(DAG); + + // FIXME: This should only happen when not -fast. + PreprocessForFPConvert(DAG); // Codegen the basic block. #ifndef NDEBUG @@ -473,42 +550,57 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { DAG.RemoveDeadNodes(); - // Emit machine code to BB. + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. ScheduleAndEmitDAG(DAG); // If we are emitting FP stack code, scan the basic block to determine if this // block defines any FP values. If so, put an FP_REG_KILL instruction before // the terminator of the block. - if (!Subtarget->hasSSE2()) { - // Note that FP stack instructions *are* used in SSE code when returning - // values, but these are not live out of the basic block, so we don't need - // an FP_REG_KILL in this case either. - bool ContainsFPCode = false; + + // Note that FP stack instructions are used in all modes for long double, + // so we always need to do this check. + // Also note that it's possible for an FP stack register to be live across + // an instruction that produces multiple basic blocks (SSE CMOV) so we + // must check all the generated basic blocks. + + // Scan all of the machine instructions in these MBBs, checking for FP + // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) + MachineFunction::iterator MBBI = FirstMBB; + MachineFunction::iterator EndMBB = BB; ++EndMBB; + for (; MBBI != EndMBB; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + + // If this block returns, ignore it. We don't want to insert an FP_REG_KILL + // before the return. + if (!MBB->empty()) { + MachineBasicBlock::iterator EndI = MBB->end(); + --EndI; + if (EndI->getDesc().isReturn()) + continue; + } - // Scan all of the machine instructions in these MBBs, checking for FP - // stores. - MachineFunction::iterator MBBI = FirstMBB; - do { - for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end(); - !ContainsFPCode && I != E; ++I) { - if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { - const TargetRegisterClass *clas; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && - MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - ((clas = RegMap->getRegClass(I->getOperand(0).getReg())) == - X86::RFP32RegisterClass || - clas == X86::RFP64RegisterClass)) { - ContainsFPCode = true; - break; - } + bool ContainsFPCode = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + !ContainsFPCode && I != E; ++I) { + if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { + const TargetRegisterClass *clas; + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && + TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && + ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == + X86::RFP32RegisterClass || + clas == X86::RFP64RegisterClass || + clas == X86::RFP80RegisterClass)) { + ContainsFPCode = true; + break; } } } - } while (!ContainsFPCode && &*(MBBI++) != BB); - + } // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. + // a copy of the input value in this block. In SSE mode, we only care about + // 80-bit values. if (!ContainsFPCode) { // Final check, check LLVM BB's that are successors to the LLVM BB // corresponding to BB for FP PHI nodes. @@ -518,17 +610,18 @@ void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { !ContainsFPCode && SI != E; ++SI) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast(II)); ++II) { - if (PN->getType()->isFloatingPoint()) { + if (PN->getType()==Type::X86_FP80Ty || + (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { ContainsFPCode = true; break; } } } } - // Finally, if we found any FP code, emit the FP_REG_KILL instruction. if (ContainsFPCode) { - BuildMI(*BB, BB->getFirstTerminator(), + BuildMI(*MBB, MBBI->getFirstTerminator(), TM.getInstrInfo()->get(X86::FP_REG_KILL)); ++NumFPKill; } @@ -542,17 +635,6 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, const TargetInstrInfo *TII = TM.getInstrInfo(); if (Subtarget->isTargetCygMing()) BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); - - // Switch the FPU to 64-bit precision mode for better compatibility and speed. - int CWFrameIdx = MFI->CreateStackObject(2, 2); - addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx); - - // Set the high part to be 64-bit precision. - addFrameReference(BuildMI(BB, TII->get(X86::MOV8mi)), - CWFrameIdx, 1).addImm(2); - - // Reload the modified control word now. - addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx); } void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { @@ -564,15 +646,12 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { /// MatchAddress - Add the specified node to the specified addressing mode, /// returning true if it cannot be done. This just pattern matches for the -/// addressing mode +/// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot, unsigned Depth) { - if (Depth > 5) { - // Default, generate it as a register. - AM.BaseType = X86ISelAddressMode::RegBase; - AM.Base.Reg = N; - return false; - } + // Limit recursion. + if (Depth > 5) + return MatchAddressBase(N, AM, isRoot, Depth); // RIP relative addressing: %rip + 32-bit displacement! if (AM.isRIPRel) { @@ -587,7 +666,7 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, } int id = N.Val->getNodeId(); - bool Available = isSelected(id); + bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. switch (N.getOpcode()) { default: break; @@ -603,45 +682,41 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, case X86ISD::Wrapper: { bool is64Bit = Subtarget->is64Bit(); // Under X86-64 non-small code model, GV (and friends) are 64-bits. - if (is64Bit && TM.getCodeModel() != CodeModel::Small) + // Also, base and index reg must be 0 in order to use rip as base. + if (is64Bit && (TM.getCodeModel() != CodeModel::Small || + AM.Base.Reg.Val || AM.IndexReg.Val)) break; if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) break; // If value is available in a register both base and index components have // been picked, we can't fit the result available in the register in the // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. - if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) { - bool isStatic = TM.getRelocationModel() == Reloc::Static; + if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) { SDOperand N0 = N.getOperand(0); if (GlobalAddressSDNode *G = dyn_cast(N0)) { GlobalValue *GV = G->getGlobal(); - bool isAbs32 = !is64Bit || isStatic; - if (isAbs32 || isRoot) { - AM.GV = GV; - AM.Disp += G->getOffset(); - AM.isRIPRel = !isAbs32; - return false; - } + AM.GV = GV; + AM.Disp += G->getOffset(); + AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && + Subtarget->isPICStyleRIPRel(); + return false; } else if (ConstantPoolSDNode *CP = dyn_cast(N0)) { - if (!is64Bit || isStatic || isRoot) { - AM.CP = CP->getConstVal(); - AM.Align = CP->getAlignment(); - AM.Disp += CP->getOffset(); - AM.isRIPRel = !isStatic; - return false; - } + AM.CP = CP->getConstVal(); + AM.Align = CP->getAlignment(); + AM.Disp += CP->getOffset(); + AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && + Subtarget->isPICStyleRIPRel(); + return false; } else if (ExternalSymbolSDNode *S =dyn_cast(N0)) { - if (isStatic || isRoot) { - AM.ES = S->getSymbol(); - AM.isRIPRel = !isStatic; - return false; - } + AM.ES = S->getSymbol(); + AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && + Subtarget->isPICStyleRIPRel(); + return false; } else if (JumpTableSDNode *J = dyn_cast(N0)) { - if (isStatic || isRoot) { - AM.JT = J->getIndex(); - AM.isRIPRel = !isStatic; - return false; - } + AM.JT = J->getIndex(); + AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && + Subtarget->isPICStyleRIPRel(); + return false; } } break; @@ -656,40 +731,48 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, break; case ISD::SHL: - if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1) - if (ConstantSDNode *CN = dyn_cast(N.Val->getOperand(1))) { - unsigned Val = CN->getValue(); - if (Val == 1 || Val == 2 || Val == 3) { - AM.Scale = 1 << Val; - SDOperand ShVal = N.Val->getOperand(0); - - // Okay, we know that we have a scale by now. However, if the scaled - // value is an add of something and a constant, we can fold the - // constant into the disp field here. - if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && - isa(ShVal.Val->getOperand(1))) { - AM.IndexReg = ShVal.Val->getOperand(0); - ConstantSDNode *AddVal = - cast(ShVal.Val->getOperand(1)); - uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); - if (isInt32(Disp)) - AM.Disp = Disp; - else - AM.IndexReg = ShVal; - } else { + if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel) + break; + + if (ConstantSDNode *CN = dyn_cast(N.Val->getOperand(1))) { + unsigned Val = CN->getValue(); + if (Val == 1 || Val == 2 || Val == 3) { + AM.Scale = 1 << Val; + SDOperand ShVal = N.Val->getOperand(0); + + // Okay, we know that we have a scale by now. However, if the scaled + // value is an add of something and a constant, we can fold the + // constant into the disp field here. + if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && + isa(ShVal.Val->getOperand(1))) { + AM.IndexReg = ShVal.Val->getOperand(0); + ConstantSDNode *AddVal = + cast(ShVal.Val->getOperand(1)); + uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); + if (isInt32(Disp)) + AM.Disp = Disp; + else AM.IndexReg = ShVal; - } - return false; + } else { + AM.IndexReg = ShVal; } + return false; } break; + } + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + // A mul_lohi where we need the low part can be folded as a plain multiply. + if (N.ResNo != 0) break; + // FALL THROUGH case ISD::MUL: // X*[3,5,9] -> X+X*[2,4,8] - if (!Available && + if (!AlreadySelected && AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0 && - AM.IndexReg.Val == 0) { + AM.IndexReg.Val == 0 && + !AM.isRIPRel) { if (ConstantSDNode *CN = dyn_cast(N.Val->getOperand(1))) if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { AM.Scale = unsigned(CN->getValue())-1; @@ -721,7 +804,7 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, break; case ISD::ADD: - if (!Available) { + if (!AlreadySelected) { X86ISelAddressMode Backup = AM; if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) && !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1)) @@ -736,30 +819,78 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, case ISD::OR: // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (!Available) { - if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { - X86ISelAddressMode Backup = AM; - // Start with the LHS as an addr mode. - if (!MatchAddress(N.getOperand(0), AM, false) && - // Address could not have picked a GV address for the displacement. - AM.GV == NULL && - // On x86-64, the resultant disp must fit in 32-bits. - isInt32(AM.Disp + CN->getSignExtended()) && - // Check to see if the LHS & C is zero. - CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) { - AM.Disp += CN->getValue(); - return false; - } - AM = Backup; + if (AlreadySelected) break; + + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { + X86ISelAddressMode Backup = AM; + // Start with the LHS as an addr mode. + if (!MatchAddress(N.getOperand(0), AM, false) && + // Address could not have picked a GV address for the displacement. + AM.GV == NULL && + // On x86-64, the resultant disp must fit in 32-bits. + isInt32(AM.Disp + CN->getSignExtended()) && + // Check to see if the LHS & C is zero. + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.Disp += CN->getValue(); + return false; } + AM = Backup; } break; + + case ISD::AND: { + // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this + // allows us to fold the shift into this addressing mode. + if (AlreadySelected) break; + SDOperand Shift = N.getOperand(0); + if (Shift.getOpcode() != ISD::SHL) break; + + // Scale must not be used already. + if (AM.IndexReg.Val != 0 || AM.Scale != 1) break; + + // Not when RIP is used as the base. + if (AM.isRIPRel) break; + + ConstantSDNode *C2 = dyn_cast(N.getOperand(1)); + ConstantSDNode *C1 = dyn_cast(Shift.getOperand(1)); + if (!C1 || !C2) break; + + // Not likely to be profitable if either the AND or SHIFT node has more + // than one use (unless all uses are for address computation). Besides, + // isel mechanism requires their node ids to be reused. + if (!N.hasOneUse() || !Shift.hasOneUse()) + break; + + // Verify that the shift amount is something we can fold. + unsigned ShiftCst = C1->getValue(); + if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) + break; + + // Get the new AND mask, this folds to a constant. + SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), + SDOperand(C2, 0), SDOperand(C1, 0)); + SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), + Shift.getOperand(0), NewANDMask); + NewANDMask.Val->setNodeId(Shift.Val->getNodeId()); + NewAND.Val->setNodeId(N.Val->getNodeId()); + + AM.Scale = 1 << ShiftCst; + AM.IndexReg = NewAND; + return false; + } } + return MatchAddressBase(N, AM, isRoot, Depth); +} + +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, + bool isRoot, unsigned Depth) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { // If so, check to see if the scale index register is set. - if (AM.IndexReg.Val == 0) { + if (AM.IndexReg.Val == 0 && !AM.isRIPRel) { AM.IndexReg = N; AM.Scale = 1; return false; @@ -804,7 +935,7 @@ static inline bool isZeroNode(SDOperand Elt) { return ((isa(Elt) && cast(Elt)->getValue() == 0) || (isa(Elt) && - cast(Elt)->isExactlyValue(0.0))); + cast(Elt)->getValueAPF().isPosZero())); } @@ -833,20 +964,15 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, // Also handle the case where we explicitly require zeros in the top // elements. This is a vector shuffle from the zero vector. if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() && - N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + // Check to see if the top elements are all zeros (or bitcast of zeros). + ISD::isBuildVectorAllZeros(N.getOperand(0).Val) && N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(1).Val->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) && N.getOperand(1).getOperand(0).hasOneUse()) { - // Check to see if the BUILD_VECTOR is building a zero vector. - SDOperand BV = N.getOperand(0); - for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i) - if (!isZeroNode(BV.getOperand(i)) && - BV.getOperand(i).getOpcode() != ISD::UNDEF) - return false; // Not a zero/undef vector. // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something // from the LHS. - unsigned VecWidth = BV.getNumOperands(); + unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType()); SDOperand ShufMask = N.getOperand(2); assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!"); if (ConstantSDNode *C = dyn_cast(ShufMask.getOperand(0))) { @@ -944,23 +1070,24 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() { assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); if (!GlobalBaseReg) { // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = BB->getParent()->front(); + MachineFunction *MF = BB->getParent(); + MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - SSARegMap *RegMap = BB->getParent()->getSSARegMap(); - unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); const TargetInstrInfo *TII = TM.getInstrInfo(); - BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack)); - BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC); + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external if (TM.getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()) { - GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass); - BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg). - addReg(PC). - addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); } else { GlobalBaseReg = PC; } @@ -976,6 +1103,43 @@ static SDNode *FindCallStartFromCall(SDNode *Node) { return FindCallStartFromCall(Node->getOperand(0).Val); } +SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) { + SDOperand SRIdx; + switch (VT) { + case MVT::i8: + SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 + // Ensure that the source register has an 8-bit subreg on 32-bit targets + if (!Subtarget->is64Bit()) { + unsigned Opc; + MVT::ValueType VT; + switch (N0.getValueType()) { + default: assert(0 && "Unknown truncate!"); + case MVT::i16: + Opc = X86::MOV16to16_; + VT = MVT::i16; + break; + case MVT::i32: + Opc = X86::MOV32to32_; + VT = MVT::i32; + break; + } + N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0); + return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, + VT, N0, SRIdx, N0.getValue(1)); + } + break; + case MVT::i16: + SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 + break; + case MVT::i32: + SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 + break; + default: assert(0 && "Unknown truncate!"); break; + } + return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx); +} + + SDNode *X86DAGToDAGISel::Select(SDOperand N) { SDNode *Node = N.Val; MVT::ValueType NVT = Node->getValueType(0); @@ -1004,11 +1168,63 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + // FIXME: This is a workaround for a tblgen problem: rdar://5791600 + case X86ISD::RET_FLAG: + if (ConstantSDNode *Amt = dyn_cast(N.getOperand(1))) { + if (Amt->getSignExtended() != 0) break; + + // Match (X86retflag 0). + SDOperand Chain = N.getOperand(0); + bool HasInFlag = N.getOperand(N.getNumOperands()-1).getValueType() + == MVT::Flag; + SmallVector Ops0; + AddToISelQueue(Chain); + SDOperand InFlag(0, 0); + if (HasInFlag) { + InFlag = N.getOperand(N.getNumOperands()-1); + AddToISelQueue(InFlag); + } + for (unsigned i = 2, e = N.getNumOperands()-(HasInFlag?1:0); i != e; + ++i) { + AddToISelQueue(N.getOperand(i)); + Ops0.push_back(N.getOperand(i)); + } + Ops0.push_back(Chain); + if (HasInFlag) + Ops0.push_back(InFlag); + return CurDAG->getTargetNode(X86::RET, MVT::Other, + &Ops0[0], Ops0.size()); + } + break; + + case X86ISD::FP_GET_ST0_ST1: { + SDOperand Chain = N.getOperand(0); + SDOperand InFlag = N.getOperand(1); + AddToISelQueue(Chain); + AddToISelQueue(InFlag); + std::vector Tys; + Tys.push_back(MVT::f80); + Tys.push_back(MVT::f80); + Tys.push_back(MVT::Other); + Tys.push_back(MVT::Flag); + SDOperand Ops[] = { Chain, InFlag }; + SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys, + Ops, 2); + Chain = SDOperand(ResNode, 2); + InFlag = SDOperand(ResNode, 3); + ReplaceUses(SDOperand(N.Val, 2), Chain); + ReplaceUses(SDOperand(N.Val, 3), InFlag); + return ResNode; + } + case ISD::ADD: { // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd // code and is matched first so to prevent it from being turned into // LEA32r X+c. - // In 64-bit mode, use LEA to take advantage of RIP-relative addressing. + // In 64-bit small code size mode, use LEA to take advantage of + // RIP-relative addressing. + if (TM.getCodeModel() != CodeModel::Small) + break; MVT::ValueType PtrVT = TLI.getPointerTy(); SDOperand N0 = N.getOperand(0); SDOperand N1 = N.getOperand(1); @@ -1043,9 +1259,13 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { break; } - case ISD::MULHU: - case ISD::MULHS: { - if (Opcode == ISD::MULHU) + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + SDOperand N0 = Node->getOperand(0); + SDOperand N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SMUL_LOHI; + if (!isSigned) switch (NVT) { default: assert(0 && "Unsupported VT!"); case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; @@ -1071,70 +1291,90 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; } - SDOperand N0 = Node->getOperand(0); - SDOperand N1 = Node->getOperand(1); - - bool foldedLoad = false; SDOperand Tmp0, Tmp1, Tmp2, Tmp3; - foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); - // MULHU and MULHS are commmutative + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); + // multiplty is commmutative if (!foldedLoad) { foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); - if (foldedLoad) { - N0 = Node->getOperand(1); - N1 = Node->getOperand(0); - } + if (foldedLoad) + std::swap(N0, N1); } - SDOperand Chain; - if (foldedLoad) { - Chain = N1.getOperand(0); - AddToISelQueue(Chain); - } else - Chain = CurDAG->getEntryNode(); - - SDOperand InFlag(0, 0); AddToISelQueue(N0); - Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT), - N0, InFlag); - InFlag = Chain.getValue(1); + SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, + N0, SDOperand()).getValue(1); if (foldedLoad) { + AddToISelQueue(N1.getOperand(0)); AddToISelQueue(Tmp0); AddToISelQueue(Tmp1); AddToISelQueue(Tmp2); AddToISelQueue(Tmp3); - SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag }; + SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; SDNode *CNode = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); - Chain = SDOperand(CNode, 0); InFlag = SDOperand(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); } else { AddToISelQueue(N1); InFlag = SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); } - SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); - ReplaceUses(N.getValue(0), Result); - if (foldedLoad) - ReplaceUses(N1.getValue(1), Result.getValue(1)); + // Copy the low half of the result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif + } + // Copy the high half of the result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDOperand Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 + Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, + MVT::i8, Result, SRIdx), 0); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); + } + ReplaceUses(N.getValue(1), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif + } #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.Val->dump(CurDAG)); - DOUT << "\n"; Indent -= 2; #endif + return NULL; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; - bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDOperand N0 = Node->getOperand(0); + SDOperand N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SDIVREM; if (!isSigned) switch (NVT) { default: assert(0 && "Unsupported VT!"); @@ -1178,9 +1418,10 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { break; } - SDOperand N0 = Node->getOperand(0); - SDOperand N1 = Node->getOperand(1); - SDOperand InFlag(0, 0); + SDOperand Tmp0, Tmp1, Tmp2, Tmp3; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); + + SDOperand InFlag; if (NVT == MVT::i8 && !isSigned) { // Special case for div8, just use a move with zero extension to AX to // clear the upper 8 bits (AH). @@ -1203,13 +1444,13 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); Chain = CurDAG->getEntryNode(); } - Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, InFlag); + Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand()); InFlag = Chain.getValue(1); } else { AddToISelQueue(N0); InFlag = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, N0, - InFlag).getValue(1); + CurDAG->getCopyToReg(CurDAG->getEntryNode(), + LoReg, N0, SDOperand()).getValue(1); if (isSigned) { // Sign extend the low part into the high part. InFlag = @@ -1217,13 +1458,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { } else { // Zero out the high part, effectively zero extending the input. SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, ClrNode, - InFlag).getValue(1); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, + ClrNode, InFlag).getValue(1); } } - SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Chain; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); if (foldedLoad) { AddToISelQueue(N1.getOperand(0)); AddToISelQueue(Tmp0); @@ -1233,53 +1472,147 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; SDNode *CNode = CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); - Chain = SDOperand(CNode, 0); InFlag = SDOperand(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); } else { AddToISelQueue(N1); - Chain = CurDAG->getEntryNode(); InFlag = SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); } - SDOperand Result = - CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, NVT, InFlag); - ReplaceUses(N.getValue(0), Result); - if (foldedLoad) - ReplaceUses(N1.getValue(1), Result.getValue(1)); + // Copy the division (low) result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif + } + // Copy the remainder (high) result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDOperand Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 + Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, + MVT::i8, Result, SRIdx), 0); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); + } + ReplaceUses(N.getValue(1), Result); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(Result.Val->dump(CurDAG)); + DOUT << "\n"; +#endif + } #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.Val->dump(CurDAG)); - DOUT << "\n"; Indent -= 2; #endif return NULL; } - case ISD::TRUNCATE: { - if (!Subtarget->is64Bit() && NVT == MVT::i8) { - unsigned Opc2; - MVT::ValueType VT; - switch (Node->getOperand(0).getValueType()) { - default: assert(0 && "Unknown truncate!"); + case ISD::ANY_EXTEND: { + SDOperand N0 = Node->getOperand(0); + AddToISelQueue(N0); + if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) { + SDOperand SRIdx; + switch(N0.getValueType()) { + case MVT::i32: + SRIdx = CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); + break; case MVT::i16: - Opc = X86::MOV16to16_; - VT = MVT::i16; - Opc2 = X86::TRUNC_16_to8; + SRIdx = CurDAG->getTargetConstant(X86::SUBREG_16BIT, MVT::i32); break; - case MVT::i32: - Opc = X86::MOV32to32_; - VT = MVT::i32; - Opc2 = X86::TRUNC_32_to8; + case MVT::i8: + if (Subtarget->is64Bit()) + SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); break; + default: assert(0 && "Unknown any_extend!"); } + if (SRIdx.Val) { + SDOperand ImplVal = + CurDAG->getTargetConstant(X86InstrInfo::IMPL_VAL_UNDEF, MVT::i32); + SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, + NVT, ImplVal, N0, SRIdx); - AddToISelQueue(Node->getOperand(0)); - SDOperand Tmp = - SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0); - SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp); +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(ResNode->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; +#endif + return ResNode; + } // Otherwise let generated ISel handle it. + } + break; + } + + case ISD::SIGN_EXTEND_INREG: { + SDOperand N0 = Node->getOperand(0); + AddToISelQueue(N0); + + MVT::ValueType SVT = cast(Node->getOperand(1))->getVT(); + SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0); + unsigned Opc = 0; + switch (NVT) { + case MVT::i16: + if (SVT == MVT::i8) Opc = X86::MOVSX16rr8; + else assert(0 && "Unknown sign_extend_inreg!"); + break; + case MVT::i32: + switch (SVT) { + case MVT::i8: Opc = X86::MOVSX32rr8; break; + case MVT::i16: Opc = X86::MOVSX32rr16; break; + default: assert(0 && "Unknown sign_extend_inreg!"); + } + break; + case MVT::i64: + switch (SVT) { + case MVT::i8: Opc = X86::MOVSX64rr8; break; + case MVT::i16: Opc = X86::MOVSX64rr16; break; + case MVT::i32: Opc = X86::MOVSX64rr32; break; + default: assert(0 && "Unknown sign_extend_inreg!"); + } + break; + default: assert(0 && "Unknown sign_extend_inreg!"); + } + + SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); + +#ifndef NDEBUG + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(TruncOp.Val->dump(CurDAG)); + DOUT << "\n"; + DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(ResNode->dump(CurDAG)); + DOUT << "\n"; + Indent -= 2; +#endif + return ResNode; + break; + } + + case ISD::TRUNCATE: { + SDOperand Input = Node->getOperand(0); + AddToISelQueue(Node->getOperand(0)); + SDNode *ResNode = getTruncate(Input, NVT); #ifndef NDEBUG DOUT << std::string(Indent-2, ' ') << "=> "; @@ -1287,9 +1620,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) { DOUT << "\n"; Indent -= 2; #endif - return ResNode; - } - + return ResNode; break; } }