X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGISel.cpp;h=37d7eeb1a94de8f9f693128ddb7d916bd5eae3c6;hb=0b4f80ee898c1e85242482e4cb363e6bfe0a133b;hp=7cf3f5cb0dcc68ef9264c31e9e8e2ae58b694ca9;hpb=c70ddad2b7d7abffeaaace913939fb3c5c55a38b;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7cf3f5cb0dc..37d7eeb1a94 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -24,7 +24,6 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" -#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineDebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -34,6 +33,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Target/MRegisterInfo.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,9 +44,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" -#include -#include -#include #include using namespace llvm; @@ -184,6 +181,12 @@ namespace llvm { unsigned MakeReg(MVT::ValueType VT) { return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); } + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } unsigned CreateRegForValue(const Value *V); @@ -203,6 +206,7 @@ static bool isUsedOutsideOfDefiningBlock(Instruction *I) { BasicBlock *BB = I->getParent(); for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) if (cast(*UI)->getParent() != BB || isa(*UI) || + // FIXME: Remove switchinst special case. isa(*UI)) return true; return false; @@ -236,21 +240,22 @@ FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli, Function::iterator BB = Fn.begin(), EB = Fn.end(); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (AllocaInst *AI = dyn_cast(I)) - if (ConstantUInt *CUI = dyn_cast(AI->getArraySize())) { + if (ConstantInt *CUI = dyn_cast(AI->getArraySize())) { const Type *Ty = AI->getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getTypeAlignment(Ty), AI->getAlignment()); - // If the alignment of the value is smaller than the size of the value, - // and if the size of the value is particularly small (<= 8 bytes), - // round up to the size of the value for potentially better performance. + // If the alignment of the value is smaller than the size of the + // value, and if the size of the value is particularly small + // (<= 8 bytes), round up to the size of the value for potentially + // better performance. // // FIXME: This could be made better with a preferred alignment hook in // TargetData. It serves primarily to 8-byte align doubles for X86. if (Align < TySize && TySize <= 8) Align = TySize; - TySize *= CUI->getValue(); // Get total allocated size. + TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align); @@ -274,24 +279,25 @@ FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli, // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. PHINode *PN; - for (BasicBlock::iterator I = BB->begin(); - (PN = dyn_cast(I)); ++I) - if (!PN->use_empty()) { - MVT::ValueType VT = TLI.getValueType(PN->getType()); - unsigned NumElements; - if (VT != MVT::Vector) - NumElements = TLI.getNumElements(VT); - else { - MVT::ValueType VT1,VT2; - NumElements = - TLI.getPackedTypeBreakdown(cast(PN->getType()), - VT1, VT2); - } - unsigned PHIReg = ValueMap[PN]; - assert(PHIReg &&"PHI node does not have an assigned virtual register!"); - for (unsigned i = 0; i != NumElements; ++i) - BuildMI(MBB, TargetInstrInfo::PHI, PN->getNumOperands(), PHIReg+i); + for (BasicBlock::iterator I = BB->begin();(PN = dyn_cast(I)); ++I){ + if (PN->use_empty()) continue; + + MVT::ValueType VT = TLI.getValueType(PN->getType()); + unsigned NumElements; + if (VT != MVT::Vector) + NumElements = TLI.getNumElements(VT); + else { + MVT::ValueType VT1,VT2; + NumElements = + TLI.getPackedTypeBreakdown(cast(PN->getType()), + VT1, VT2); } + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + const TargetInstrInfo *TII = TLI.getTargetMachine().getInstrInfo(); + for (unsigned i = 0; i != NumElements; ++i) + BuildMI(MBB, TII->get(TargetInstrInfo::PHI), PHIReg+i); + } } } @@ -340,13 +346,10 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { // If this value is represented with multiple target registers, make sure // to create enough consecutive registers of the right (smaller) type. - unsigned NT = VT-1; // Find the type to use. - while (TLI.getNumElements((MVT::ValueType)NT) != 1) - --NT; - - unsigned R = MakeReg((MVT::ValueType)NT); + VT = TLI.getTypeToExpandTo(VT); + unsigned R = MakeReg(VT); for (unsigned i = 1; i != NV*NumVectorRegs; ++i) - MakeReg((MVT::ValueType)NT); + MakeReg(VT); return R; } @@ -393,11 +396,13 @@ class SelectionDAGLowering { /// The comparison function for sorting Case values. struct CaseCmp { bool operator () (const Case& C1, const Case& C2) { - if (const ConstantUInt* U1 = dyn_cast(C1.first)) - return U1->getValue() < cast(C2.first)->getValue(); + if (const ConstantInt* I1 = dyn_cast(C1.first)) + if (I1->getType()->isUnsigned()) + return I1->getZExtValue() < + cast(C2.first)->getZExtValue(); - const ConstantSInt* S1 = dyn_cast(C1.first); - return S1->getValue() < cast(C2.first)->getValue(); + return cast(C1.first)->getSExtValue() < + cast(C2.first)->getSExtValue(); } }; @@ -445,9 +450,13 @@ public: return Root; } + SDOperand CopyValueToVirtualRegister(Value *V, unsigned Reg); + void visit(Instruction &I) { visit(I.getOpcode(), I); } void visit(unsigned Opcode, User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. switch (Opcode) { default: assert(0 && "Unknown instruction type encountered!"); abort(); @@ -482,6 +491,12 @@ public: std::set &OutputRegs, std::set &InputRegs); + void FindMergedConditions(Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + unsigned Opc); + bool isExportableFromCurrentBlock(Value *V, const BasicBlock *FromBB); + void ExportFromCurrentBlock(Value *V); + // Terminator instructions. void visitRet(ReturnInst &I); void visitBr(BranchInst &I); @@ -496,33 +511,36 @@ public: void visitInvoke(InvokeInst &I) { assert(0 && "TODO"); } void visitUnwind(UnwindInst &I) { assert(0 && "TODO"); } - void visitBinary(User &I, unsigned IntOp, unsigned FPOp, unsigned VecOp); + void visitIntBinary(User &I, unsigned IntOp, unsigned VecOp); + void visitFPBinary(User &I, unsigned FPOp, unsigned VecOp); void visitShift(User &I, unsigned Opcode); void visitAdd(User &I) { - visitBinary(I, ISD::ADD, ISD::FADD, ISD::VADD); + if (I.getType()->isFloatingPoint()) + visitFPBinary(I, ISD::FADD, ISD::VADD); + else + visitIntBinary(I, ISD::ADD, ISD::VADD); } void visitSub(User &I); - void visitMul(User &I) { - visitBinary(I, ISD::MUL, ISD::FMUL, ISD::VMUL); - } - void visitDiv(User &I) { - const Type *Ty = I.getType(); - visitBinary(I, - Ty->isSigned() ? ISD::SDIV : ISD::UDIV, ISD::FDIV, - Ty->isSigned() ? ISD::VSDIV : ISD::VUDIV); - } - void visitRem(User &I) { - const Type *Ty = I.getType(); - visitBinary(I, Ty->isSigned() ? ISD::SREM : ISD::UREM, ISD::FREM, 0); - } - void visitAnd(User &I) { visitBinary(I, ISD::AND, 0, ISD::VAND); } - void visitOr (User &I) { visitBinary(I, ISD::OR, 0, ISD::VOR); } - void visitXor(User &I) { visitBinary(I, ISD::XOR, 0, ISD::VXOR); } - void visitShl(User &I) { visitShift(I, ISD::SHL); } - void visitShr(User &I) { - visitShift(I, I.getType()->isUnsigned() ? ISD::SRL : ISD::SRA); + void visitMul(User &I) { + if (I.getType()->isFloatingPoint()) + visitFPBinary(I, ISD::FMUL, ISD::VMUL); + else + visitIntBinary(I, ISD::MUL, ISD::VMUL); } - + void visitURem(User &I) { visitIntBinary(I, ISD::UREM, 0); } + void visitSRem(User &I) { visitIntBinary(I, ISD::SREM, 0); } + void visitFRem(User &I) { visitFPBinary (I, ISD::FREM, 0); } + void visitUDiv(User &I) { visitIntBinary(I, ISD::UDIV, ISD::VUDIV); } + void visitSDiv(User &I) { visitIntBinary(I, ISD::SDIV, ISD::VSDIV); } + void visitFDiv(User &I) { visitFPBinary (I, ISD::FDIV, ISD::VSDIV); } + void visitAnd(User &I) { visitIntBinary(I, ISD::AND, ISD::VAND); } + void visitOr (User &I) { visitIntBinary(I, ISD::OR, ISD::VOR); } + void visitXor(User &I) { visitIntBinary(I, ISD::XOR, ISD::VXOR); } + void visitShl(User &I) { visitShift(I, ISD::SHL); } + void visitLShr(User &I) { visitShift(I, ISD::SRL); } + void visitAShr(User &I) { visitShift(I, ISD::SRA); } + void visitICmp(User &I); + void visitFCmp(User &I); void visitSetCC(User &I, ISD::CondCode SignedOpc, ISD::CondCode UnsignedOpc, ISD::CondCode FPOpc); void visitSetEQ(User &I) { visitSetCC(I, ISD::SETEQ, ISD::SETEQ, @@ -537,13 +555,25 @@ public: ISD::SETOLT); } void visitSetGT(User &I) { visitSetCC(I, ISD::SETGT, ISD::SETUGT, ISD::SETOGT); } + // Visit the conversion instructions + void visitTrunc(User &I); + void visitZExt(User &I); + void visitSExt(User &I); + void visitFPTrunc(User &I); + void visitFPExt(User &I); + void visitFPToUI(User &I); + void visitFPToSI(User &I); + void visitUIToFP(User &I); + void visitSIToFP(User &I); + void visitPtrToInt(User &I); + void visitIntToPtr(User &I); + void visitBitCast(User &I); void visitExtractElement(User &I); void visitInsertElement(User &I); void visitShuffleVector(User &I); void visitGetElementPtr(User &I); - void visitCast(User &I); void visitSelect(User &I); void visitMalloc(MallocInst &I); @@ -637,7 +667,7 @@ SDOperand SelectionDAGLowering::getValue(const Value *V) { return N = DAG.getNode(ISD::VBUILD_VECTOR,MVT::Vector,&Ops[0],Ops.size()); } else { // Canonicalize all constant ints to be unsigned. - return N = DAG.getConstant(cast(C)->getRawValue(),VT); + return N = DAG.getConstant(cast(C)->getZExtValue(),VT); } } @@ -656,19 +686,26 @@ SDOperand SelectionDAGLowering::getValue(const Value *V) { // If this type is not legal, make it so now. if (VT != MVT::Vector) { - MVT::ValueType DestVT = TLI.getTypeToTransformTo(VT); - - N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT); - if (DestVT < VT) { + if (TLI.getTypeAction(VT) == TargetLowering::Expand) { // Source must be expanded. This input value is actually coming from the // register pair VMI->second and VMI->second+1. - N = DAG.getNode(ISD::BUILD_PAIR, VT, N, - DAG.getCopyFromReg(DAG.getEntryNode(), InReg+1, DestVT)); - } else if (DestVT > VT) { // Promotion case - if (MVT::isFloatingPoint(VT)) - N = DAG.getNode(ISD::FP_ROUND, VT, N); - else - N = DAG.getNode(ISD::TRUNCATE, VT, N); + MVT::ValueType DestVT = TLI.getTypeToExpandTo(VT); + unsigned NumVals = TLI.getNumElements(VT); + N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT); + if (NumVals == 1) + N = DAG.getNode(ISD::BIT_CONVERT, VT, N); + else { + assert(NumVals == 2 && "1 to 4 (and more) expansion not implemented!"); + N = DAG.getNode(ISD::BUILD_PAIR, VT, N, + DAG.getCopyFromReg(DAG.getEntryNode(), InReg+1, DestVT)); + } + } else { + MVT::ValueType DestVT = TLI.getTypeToTransformTo(VT); + N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT); + if (TLI.getTypeAction(VT) == TargetLowering::Promote) // Promotion case + N = MVT::isFloatingPoint(VT) + ? DAG.getNode(ISD::FP_ROUND, VT, N) + : DAG.getNode(ISD::TRUNCATE, VT, N); } } else { // Otherwise, if this is a vector, make it available as a generic vector @@ -760,10 +797,213 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { &NewValues[0], NewValues.size())); } +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { + // No need to export constants. + if (!isa(V) && !isa(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + PendingLoads.push_back(CopyValueToVirtualRegister(V, Reg)); +} + +bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (Instruction *VI = dyn_cast(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast(V)) + return I->getParent() == BB; + return true; +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGLowering::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + BinaryOperator *BOp = dyn_cast(Cond); + + if (!BOp || (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + if (IntrinsicInst *II = dyn_cast(Cond)) + if ((II->getIntrinsicID() == Intrinsic::isunordered_f32 || + II->getIntrinsicID() == Intrinsic::isunordered_f64) && + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. If this is the first + // block of the sequence, no exporting is needed. + (CurBB == CurMBB || + (isExportableFromCurrentBlock(II->getOperand(1), BB) && + isExportableFromCurrentBlock(II->getOperand(2), BB)))) { + SelectionDAGISel::CaseBlock CB(ISD::SETUO, II->getOperand(1), + II->getOperand(2), TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + + + // If the leaf of the tree is a setcond inst, merge the condition into the + // caseblock. + if (BOp && isa(BOp) && + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + (CurBB == CurMBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB)))) { + ISD::CondCode SignCond, UnsCond, FPCond, Condition; + switch (BOp->getOpcode()) { + default: assert(0 && "Unknown setcc opcode!"); + case Instruction::SetEQ: + SignCond = ISD::SETEQ; + UnsCond = ISD::SETEQ; + FPCond = ISD::SETOEQ; + break; + case Instruction::SetNE: + SignCond = ISD::SETNE; + UnsCond = ISD::SETNE; + FPCond = ISD::SETUNE; + break; + case Instruction::SetLE: + SignCond = ISD::SETLE; + UnsCond = ISD::SETULE; + FPCond = ISD::SETOLE; + break; + case Instruction::SetGE: + SignCond = ISD::SETGE; + UnsCond = ISD::SETUGE; + FPCond = ISD::SETOGE; + break; + case Instruction::SetLT: + SignCond = ISD::SETLT; + UnsCond = ISD::SETULT; + FPCond = ISD::SETOLT; + break; + case Instruction::SetGT: + SignCond = ISD::SETGT; + UnsCond = ISD::SETUGT; + FPCond = ISD::SETOGT; + break; + } + + const Type *OpType = BOp->getOperand(0)->getType(); + if (const PackedType *PTy = dyn_cast(OpType)) + OpType = PTy->getElementType(); + + if (!FiniteOnlyFPMath() && OpType->isFloatingPoint()) + Condition = FPCond; + else if (OpType->isUnsigned()) + Condition = UnsCond; + else + Condition = SignCond; + + SelectionDAGISel::CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + SelectionDAGISel::CaseBlock CB(ISD::SETEQ, Cond, ConstantBool::getTrue(), + TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineBasicBlock *TmpBB = new MachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->getBasicBlockList().insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc); + + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +static bool +ShouldEmitAsBranches(const std::vector &Cases) { + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + return true; +} + void SelectionDAGLowering::visitBr(BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - CurMBB->addSuccessor(Succ0MBB); // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -776,49 +1016,88 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { if (Succ0MBB != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), DAG.getBasicBlock(Succ0MBB))); - } else { - MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; - CurMBB->addSuccessor(Succ1MBB); - - SDOperand Cond = getValue(I.getCondition()); - if (Succ1MBB == NextBlock) { - // If the condition is false, fall through. This means we should branch - // if the condition is true to Succ #0. - DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), - Cond, DAG.getBasicBlock(Succ0MBB))); - } else if (Succ0MBB == NextBlock) { - // If the condition is true, fall through. This means we should branch if - // the condition is false to Succ #1. Invert the condition first. - SDOperand True = DAG.getConstant(1, Cond.getValueType()); - Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); - DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), - Cond, DAG.getBasicBlock(Succ1MBB))); - } else { - std::vector Ops; - Ops.push_back(getRoot()); - // If the false case is the current basic block, then this is a self - // loop. We do not want to emit "Loop: ... brcond Out; br Loop", as it - // adds an extra instruction in the loop. Instead, invert the - // condition and emit "Loop: ... br!cond Loop; br Out. - if (CurMBB == Succ1MBB) { - std::swap(Succ0MBB, Succ1MBB); - SDOperand True = DAG.getConstant(1, Cond.getValueType()); - Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); + + // Update machine-CFG edges. + CurMBB->addSuccessor(Succ0MBB); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (BinaryOperator *BOp = dyn_cast(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0]); + SwitchCases.erase(SwitchCases.begin()); + return; } - SDOperand True = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond, - DAG.getBasicBlock(Succ0MBB)); - DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, True, - DAG.getBasicBlock(Succ1MBB))); + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + CurMBB->getParent()->getBasicBlockList().erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); } } + + // Create a CaseBlock record representing this branch. + SelectionDAGISel::CaseBlock CB(ISD::SETEQ, CondVal, ConstantBool::getTrue(), + Succ0MBB, Succ1MBB, CurMBB); + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB); } /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) { - SDOperand SwitchOp = getValue(CB.SwitchV); - SDOperand CaseOp = getValue(CB.CaseC); - SDOperand Cond = DAG.getSetCC(MVT::i1, SwitchOp, CaseOp, CB.CC); + SDOperand Cond; + SDOperand CondLHS = getValue(CB.CmpLHS); + + // Build the setcc now, fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantBool::getTrue() && CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantBool::getFalse() && CB.CC == ISD::SETEQ) { + SDOperand True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -829,53 +1108,31 @@ void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) { // If the lhs block is the next block, invert the condition so that we can // fall through to the lhs instead of the rhs block. - if (CB.LHSBB == NextBlock) { - std::swap(CB.LHSBB, CB.RHSBB); + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); SDOperand True = DAG.getConstant(1, Cond.getValueType()); Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); } SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond, - DAG.getBasicBlock(CB.LHSBB)); - if (CB.RHSBB == NextBlock) + DAG.getBasicBlock(CB.TrueBB)); + if (CB.FalseBB == NextBlock) DAG.setRoot(BrCond); else DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, - DAG.getBasicBlock(CB.RHSBB))); + DAG.getBasicBlock(CB.FalseBB))); // Update successor info - CurMBB->addSuccessor(CB.LHSBB); - CurMBB->addSuccessor(CB.RHSBB); + CurMBB->addSuccessor(CB.TrueBB); + CurMBB->addSuccessor(CB.FalseBB); } void SelectionDAGLowering::visitJumpTable(SelectionDAGISel::JumpTable &JT) { // Emit the code for the jump table MVT::ValueType PTy = TLI.getPointerTy(); - assert((PTy == MVT::i32 || PTy == MVT::i64) && - "Jump table entries are 32-bit values"); - bool isPIC = TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_; - // PIC jump table entries are 32-bit values. - unsigned EntrySize = isPIC ? 4 : MVT::getSizeInBits(PTy)/8; - SDOperand Copy = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy); - SDOperand IDX = DAG.getNode(ISD::MUL, PTy, Copy, - DAG.getConstant(EntrySize, PTy)); - SDOperand TAB = DAG.getJumpTable(JT.JTI,PTy); - SDOperand ADD = DAG.getNode(ISD::ADD, PTy, IDX, TAB); - SDOperand LD = DAG.getLoad(isPIC ? MVT::i32 : PTy, Copy.getValue(1), ADD, - NULL, 0); - if (isPIC) { - // For Pic, the sequence is: - // BRIND(load(Jumptable + index) + RelocBase) - // RelocBase is the JumpTable on PPC and X86, GOT on Alpha - SDOperand Reloc; - if (TLI.usesGlobalOffsetTable()) - Reloc = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PTy); - else - Reloc = TAB; - ADD = DAG.getNode(ISD::ADD, PTy, - ((PTy != MVT::i32) ? DAG.getNode(ISD::SIGN_EXTEND, PTy, LD) : LD), Reloc); - DAG.setRoot(DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), ADD)); - } else { - DAG.setRoot(DAG.getNode(ISD::BRIND, MVT::Other, LD.getValue(1), LD)); - } + SDOperand Index = DAG.getCopyFromReg(getRoot(), JT.Reg, PTy); + SDOperand Table = DAG.getJumpTable(JT.JTI, PTy); + DAG.setRoot(DAG.getNode(ISD::BR_JT, MVT::Other, Index.getValue(1), + Table, Index)); + return; } void SelectionDAGLowering::visitSwitch(SwitchInst &I) { @@ -886,18 +1143,19 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { if (++BBI != CurMBB->getParent()->end()) NextBlock = BBI; + MachineBasicBlock *Default = FuncInfo.MBBMap[I.getDefaultDest()]; + // If there is only the default destination, branch to it if it is not the // next basic block. Otherwise, just fall through. if (I.getNumOperands() == 2) { // Update machine-CFG edges. - MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[I.getDefaultDest()]; // If this is not a fall-through branch, emit the branch. - if (DefaultMBB != NextBlock) + if (Default != NextBlock) DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), - DAG.getBasicBlock(DefaultMBB))); + DAG.getBasicBlock(Default))); - CurMBB->addSuccessor(DefaultMBB); + CurMBB->addSuccessor(Default); return; } @@ -917,21 +1175,72 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { // inserted into CaseBlock records, representing basic blocks in the binary // search tree. Value *SV = I.getOperand(0); - MachineBasicBlock *Default = FuncInfo.MBBMap[I.getDefaultDest()]; // Get the MachineFunction which holds the current MBB. This is used during // emission of jump tables, and when inserting any additional MBBs necessary // to represent the switch. MachineFunction *CurMF = CurMBB->getParent(); const BasicBlock *LLVMBB = CurMBB->getBasicBlock(); + + // If the switch has few cases (two or less) emit a series of specific + // tests. + if (Cases.size() < 3) { + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && Cases.back().second != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (unsigned i = 0, e = Cases.size()-1; i != e; ++i) { + if (Cases[i].second == NextBlock) { + std::swap(Cases[i], Cases.back()); + break; + } + } + } + + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CurMBB; + for (unsigned i = 0, e = Cases.size(); i != e; ++i) { + MachineBasicBlock *FallThrough; + if (i != e-1) { + FallThrough = new MachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->getBasicBlockList().insert(BBI, FallThrough); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } + + SelectionDAGISel::CaseBlock CB(ISD::SETEQ, SV, Cases[i].first, + Cases[i].second, FallThrough, CurBlock); + + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == CurMBB) + visitSwitchCase(CB); + else + SwitchCases.push_back(CB); + + CurBlock = FallThrough; + } + return; + } // If the switch has more than 5 blocks, and at least 31.25% dense, and the // target supports indirect branches, then emit a jump table rather than // lowering the switch to a binary tree of conditional branches. - if (TLI.isOperationLegal(ISD::BRIND, TLI.getPointerTy()) && + if ((TLI.isOperationLegal(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegal(ISD::BRIND, MVT::Other)) && Cases.size() > 5) { - uint64_t First = cast(Cases.front().first)->getRawValue(); - uint64_t Last = cast(Cases.back().first)->getRawValue(); + uint64_t First =cast(Cases.front().first)->getZExtValue(); + uint64_t Last = cast(Cases.back().first)->getZExtValue(); double Density = (double)Cases.size() / (double)((Last - First) + 1ULL); if (Density >= 0.3125) { @@ -979,19 +1288,26 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { // the default BB. std::vector DestBBs; uint64_t TEI = First; - for (CaseItr ii = Cases.begin(), ee = Cases.end(); ii != ee; ++TEI) - if (cast(ii->first)->getRawValue() == TEI) { + if (cast(ii->first)->getZExtValue() == TEI) { DestBBs.push_back(ii->second); ++ii; } else { DestBBs.push_back(Default); } - // Update successor info + // Update successor info. Add one edge to each unique successor. + // Vector bool would be better, but vector is really slow. + std::vector SuccsHandled; + SuccsHandled.resize(CurMBB->getParent()->getNumBlockIDs()); + for (std::vector::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) - JumpTableBB->addSuccessor(*I); + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } // Create a jump table index for this jump table, or return an existing // one. @@ -1045,7 +1361,7 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); Constant *C = Pivot->first; - MachineBasicBlock *RHSBB = 0, *LHSBB = 0; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; // We know that we branch to the LHS if the Value being switched on is // less than the Pivot value, C. We use this to optimize our binary @@ -1055,13 +1371,13 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { // rather than creating a leaf node for it. if ((LHSR.second - LHSR.first) == 1 && LHSR.first->first == CR.GE && - cast(C)->getRawValue() == - (cast(CR.GE)->getRawValue() + 1ULL)) { - LHSBB = LHSR.first->second; + cast(C)->getZExtValue() == + (cast(CR.GE)->getZExtValue() + 1ULL)) { + TrueBB = LHSR.first->second; } else { - LHSBB = new MachineBasicBlock(LLVMBB); - CurMF->getBasicBlockList().insert(BBI, LHSBB); - CaseVec.push_back(CaseRec(LHSBB,C,CR.GE,LHSR)); + TrueBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, TrueBB); + CaseVec.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); } // Similar to the optimization above, if the Value being switched on is @@ -1069,20 +1385,20 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &I) { // is CR.LT - 1, then we can branch directly to the target block for // the current Case Value, rather than emitting a RHS leaf node for it. if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast(RHSR.first->first)->getRawValue() == - (cast(CR.LT)->getRawValue() - 1ULL)) { - RHSBB = RHSR.first->second; + cast(RHSR.first->first)->getZExtValue() == + (cast(CR.LT)->getZExtValue() - 1ULL)) { + FalseBB = RHSR.first->second; } else { - RHSBB = new MachineBasicBlock(LLVMBB); - CurMF->getBasicBlockList().insert(BBI, RHSBB); - CaseVec.push_back(CaseRec(RHSBB,CR.LT,C,RHSR)); + FalseBB = new MachineBasicBlock(LLVMBB); + CurMF->getBasicBlockList().insert(BBI, FalseBB); + CaseVec.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); } // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. ISD::CondCode CC = C->getType()->isSigned() ? ISD::SETLT : ISD::SETULT; - SelectionDAGISel::CaseBlock CB(CC, SV, C, LHSBB, RHSBB, CR.CaseBB); + SelectionDAGISel::CaseBlock CB(CC, SV, C, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == CurMBB) visitSwitchCase(CB); @@ -1101,25 +1417,38 @@ void SelectionDAGLowering::visitSub(User &I) { setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2)); return; } - } - visitBinary(I, ISD::SUB, ISD::FSUB, ISD::VSUB); + visitFPBinary(I, ISD::FSUB, ISD::VSUB); + } else + visitIntBinary(I, ISD::SUB, ISD::VSUB); } -void SelectionDAGLowering::visitBinary(User &I, unsigned IntOp, unsigned FPOp, - unsigned VecOp) { +void +SelectionDAGLowering::visitIntBinary(User &I, unsigned IntOp, unsigned VecOp) { const Type *Ty = I.getType(); SDOperand Op1 = getValue(I.getOperand(0)); SDOperand Op2 = getValue(I.getOperand(1)); - if (Ty->isIntegral()) { - setValue(&I, DAG.getNode(IntOp, Op1.getValueType(), Op1, Op2)); - } else if (Ty->isFloatingPoint()) { - setValue(&I, DAG.getNode(FPOp, Op1.getValueType(), Op1, Op2)); + if (const PackedType *PTy = dyn_cast(Ty)) { + SDOperand Num = DAG.getConstant(PTy->getNumElements(), MVT::i32); + SDOperand Typ = DAG.getValueType(TLI.getValueType(PTy->getElementType())); + setValue(&I, DAG.getNode(VecOp, MVT::Vector, Op1, Op2, Num, Typ)); } else { - const PackedType *PTy = cast(Ty); + setValue(&I, DAG.getNode(IntOp, Op1.getValueType(), Op1, Op2)); + } +} + +void +SelectionDAGLowering::visitFPBinary(User &I, unsigned FPOp, unsigned VecOp) { + const Type *Ty = I.getType(); + SDOperand Op1 = getValue(I.getOperand(0)); + SDOperand Op2 = getValue(I.getOperand(1)); + + if (const PackedType *PTy = dyn_cast(Ty)) { SDOperand Num = DAG.getConstant(PTy->getNumElements(), MVT::i32); SDOperand Typ = DAG.getValueType(TLI.getValueType(PTy->getElementType())); setValue(&I, DAG.getNode(VecOp, MVT::Vector, Op1, Op2, Num, Typ)); + } else { + setValue(&I, DAG.getNode(FPOp, Op1.getValueType(), Op1, Op2)); } } @@ -1132,6 +1461,60 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2)); } +void SelectionDAGLowering::visitICmp(User &I) { + ICmpInst *IC = cast(&I); + SDOperand Op1 = getValue(IC->getOperand(0)); + SDOperand Op2 = getValue(IC->getOperand(1)); + ISD::CondCode Opcode; + switch (IC->getPredicate()) { + case ICmpInst::ICMP_EQ : Opcode = ISD::SETEQ; break; + case ICmpInst::ICMP_NE : Opcode = ISD::SETNE; break; + case ICmpInst::ICMP_UGT : Opcode = ISD::SETUGT; break; + case ICmpInst::ICMP_UGE : Opcode = ISD::SETUGE; break; + case ICmpInst::ICMP_ULT : Opcode = ISD::SETULT; break; + case ICmpInst::ICMP_ULE : Opcode = ISD::SETULE; break; + case ICmpInst::ICMP_SGT : Opcode = ISD::SETGT; break; + case ICmpInst::ICMP_SGE : Opcode = ISD::SETGE; break; + case ICmpInst::ICMP_SLT : Opcode = ISD::SETLT; break; + case ICmpInst::ICMP_SLE : Opcode = ISD::SETLE; break; + default: + assert(!"Invalid ICmp predicate value"); + Opcode = ISD::SETEQ; + break; + } + setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode)); +} + +void SelectionDAGLowering::visitFCmp(User &I) { + FCmpInst *FC = cast(&I); + SDOperand Op1 = getValue(FC->getOperand(0)); + SDOperand Op2 = getValue(FC->getOperand(1)); + ISD::CondCode Opcode; + switch (FC->getPredicate()) { + case FCmpInst::FCMP_FALSE : Opcode = ISD::SETFALSE; + case FCmpInst::FCMP_OEQ : Opcode = ISD::SETOEQ; + case FCmpInst::FCMP_OGT : Opcode = ISD::SETOGT; + case FCmpInst::FCMP_OGE : Opcode = ISD::SETOGE; + case FCmpInst::FCMP_OLT : Opcode = ISD::SETOLT; + case FCmpInst::FCMP_OLE : Opcode = ISD::SETOLE; + case FCmpInst::FCMP_ONE : Opcode = ISD::SETONE; + case FCmpInst::FCMP_ORD : Opcode = ISD::SETO; + case FCmpInst::FCMP_UNO : Opcode = ISD::SETUO; + case FCmpInst::FCMP_UEQ : Opcode = ISD::SETUEQ; + case FCmpInst::FCMP_UGT : Opcode = ISD::SETUGT; + case FCmpInst::FCMP_UGE : Opcode = ISD::SETUGE; + case FCmpInst::FCMP_ULT : Opcode = ISD::SETULT; + case FCmpInst::FCMP_ULE : Opcode = ISD::SETULE; + case FCmpInst::FCMP_UNE : Opcode = ISD::SETUNE; + case FCmpInst::FCMP_TRUE : Opcode = ISD::SETTRUE; + default: + assert(!"Invalid FCmp predicate value"); + Opcode = ISD::SETFALSE; + break; + } + setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode)); +} + void SelectionDAGLowering::visitSetCC(User &I,ISD::CondCode SignedOpcode, ISD::CondCode UnsignedOpcode, ISD::CondCode FPOpcode) { @@ -1159,63 +1542,127 @@ void SelectionDAGLowering::visitSelect(User &I) { } } -void SelectionDAGLowering::visitCast(User &I) { + +void SelectionDAGLowering::visitTrunc(User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); +} + +void SelectionDAGLowering::visitZExt(User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitSExt(User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPTrunc(User &I) { + // FPTrunc is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPExt(User &I){ + // FPTrunc is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitFPToUI(User &I) { + // FPToUI is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N)); +} + +void SelectionDAGLowering::visitFPToSI(User &I) { + // FPToSI is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N)); +} + +void SelectionDAGLowering::visitUIToFP(User &I) { + // UIToFP is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N)); +} + +void SelectionDAGLowering::visitSIToFP(User &I){ + // UIToFP is never a no-op cast, no need to check + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N)); +} + +void SelectionDAGLowering::visitPtrToInt(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. SDOperand N = getValue(I.getOperand(0)); MVT::ValueType SrcVT = N.getValueType(); MVT::ValueType DestVT = TLI.getValueType(I.getType()); + SDOperand Result; + if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT)) + Result = DAG.getNode(ISD::TRUNCATE, DestVT, N); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + Result = DAG.getNode(ISD::ZERO_EXTEND, DestVT, N); + setValue(&I, Result); +} +void SelectionDAGLowering::visitIntToPtr(User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType SrcVT = N.getValueType(); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); + if (MVT::getSizeInBits(DestVT) < MVT::getSizeInBits(SrcVT)) + setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); + else + // Note: ZERO_EXTEND can handle cases where the sizes are equal too + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); +} + +void SelectionDAGLowering::visitBitCast(User &I) { + SDOperand N = getValue(I.getOperand(0)); + MVT::ValueType DestVT = TLI.getValueType(I.getType()); if (DestVT == MVT::Vector) { - // This is a cast to a vector from something else. This is always a bit - // convert. Get information about the input vector. + // This is a cast to a vector from something else. + // Get information about the output vector. const PackedType *DestTy = cast(I.getType()); MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType()); setValue(&I, DAG.getNode(ISD::VBIT_CONVERT, DestVT, N, DAG.getConstant(DestTy->getNumElements(),MVT::i32), DAG.getValueType(EltVT))); - } else if (SrcVT == DestVT) { - setValue(&I, N); // noop cast. - } else if (DestVT == MVT::i1) { - // Cast to bool is a comparison against zero, not truncation to zero. - SDOperand Zero = isInteger(SrcVT) ? DAG.getConstant(0, N.getValueType()) : - DAG.getConstantFP(0.0, N.getValueType()); - setValue(&I, DAG.getSetCC(MVT::i1, N, Zero, ISD::SETNE)); - } else if (isInteger(SrcVT)) { - if (isInteger(DestVT)) { // Int -> Int cast - if (DestVT < SrcVT) // Truncating cast? - setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); - else if (I.getOperand(0)->getType()->isSigned()) - setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N)); - else - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); - } else if (isFloatingPoint(DestVT)) { // Int -> FP cast - if (I.getOperand(0)->getType()->isSigned()) - setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N)); - else - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N)); - } else { - assert(0 && "Unknown cast!"); - } - } else if (isFloatingPoint(SrcVT)) { - if (isFloatingPoint(DestVT)) { // FP -> FP cast - if (DestVT < SrcVT) // Rounding cast? - setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N)); - else - setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N)); - } else if (isInteger(DestVT)) { // FP -> Int cast. - if (I.getType()->isSigned()) - setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N)); - else - setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N)); - } else { - assert(0 && "Unknown cast!"); - } - } else { - assert(SrcVT == MVT::Vector && "Unknown cast!"); - assert(DestVT != MVT::Vector && "Casts to vector already handled!"); - // This is a cast from a vector to something else. This is always a bit - // convert. Get information about the input vector. + return; + } + MVT::ValueType SrcVT = N.getValueType(); + if (SrcVT == MVT::Vector) { + // This is a cast from a vctor to something else. + // Get information about the input vector. setValue(&I, DAG.getNode(ISD::VBIT_CONVERT, DestVT, N)); + return; } + + // BitCast assures us that source and destination are the same size so this + // is either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, DestVT, N)); // convert types + else + setValue(&I, N); // noop cast. } void SelectionDAGLowering::visitInsertElement(User &I) { @@ -1259,7 +1706,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { OI != E; ++OI) { Value *Idx = *OI; if (const StructType *StTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getValue(); + unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->MemberOffsets[Field]; @@ -1272,13 +1719,14 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { // If this is a constant subscript, handle it quickly. if (ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getRawValue() == 0) continue; - + if (CI->getZExtValue() == 0) continue; uint64_t Offs; - if (ConstantSInt *CSI = dyn_cast(CI)) - Offs = (int64_t)TD->getTypeSize(Ty)*CSI->getValue(); + if (CI->getType()->isSigned()) + Offs = (int64_t) + TD->getTypeSize(Ty)*cast(CI)->getSExtValue(); else - Offs = TD->getTypeSize(Ty)*cast(CI)->getValue(); + Offs = + TD->getTypeSize(Ty)*cast(CI)->getZExtValue(); N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs)); continue; } @@ -1387,7 +1835,7 @@ SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr, L = DAG.getVecLoad(PTy->getNumElements(), PVT, Root, Ptr, DAG.getSrcValue(SV)); } else { - L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, isVolatile); + L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SV, 0, isVolatile); } if (isVolatile) @@ -1403,7 +1851,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { Value *SrcV = I.getOperand(0); SDOperand Src = getValue(SrcV); SDOperand Ptr = getValue(I.getOperand(1)); - DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), + DAG.setRoot(DAG.getStore(getRoot(), Src, Ptr, I.getOperand(1), 0, I.isVolatile())); } @@ -1530,10 +1978,10 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0; case Intrinsic::frameaddress: visitFrameReturnAddress(I, true); return 0; case Intrinsic::setjmp: - return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp(); + return "_setjmp"+!TLI.usesUnderscoreSetJmp(); break; case Intrinsic::longjmp: - return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp(); + return "_longjmp"+!TLI.usesUnderscoreLongJmp(); break; case Intrinsic::memcpy_i32: case Intrinsic::memcpy_i64: @@ -1915,6 +2363,8 @@ GetRegistersForValue(const std::string &ConstrCode, MVT::ValueType RegVT; MVT::ValueType ValueVT = VT; + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. if (PhysReg.first) { if (VT == MVT::Other) ValueVT = *PhysReg.second->vt_begin(); @@ -1944,10 +2394,36 @@ GetRegistersForValue(const std::string &ConstrCode, return RegsForValue(Regs, RegVT, ValueVT); } - // This is a reference to a register class. Allocate NumRegs consecutive, - // available, registers from the class. - std::vector RegClassRegs = - TLI.getRegClassForInlineAsmConstraint(ConstrCode, VT); + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + std::vector RegClassRegs; + if (PhysReg.second) { + // If this is an early clobber or tied register, our regalloc doesn't know + // how to maintain the constraint. If it isn't, go ahead and create vreg + // and let the regalloc do the right thing. + if (!isOutReg || !isInReg) { + if (VT == MVT::Other) + ValueVT = *PhysReg.second->vt_begin(); + RegVT = *PhysReg.second->vt_begin(); + + // Create the appropriate number of virtual registers. + SSARegMap *RegMap = DAG.getMachineFunction().getSSARegMap(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegMap->createVirtualRegister(PhysReg.second)); + + return RegsForValue(Regs, RegVT, ValueVT); + } + + // Otherwise, we can't allocate it. Let the code below figure out how to + // maintain these constraints. + RegClassRegs.assign(PhysReg.second->begin(), PhysReg.second->end()); + + } else { + // This is a reference to a register class that doesn't directly correspond + // to an LLVM register class. Allocate NumRegs consecutive, available, + // registers from the class. + RegClassRegs = TLI.getRegClassForInlineAsmConstraint(ConstrCode, VT); + } const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo(); MachineFunction &MF = *CurMBB->getParent(); @@ -2003,11 +2479,6 @@ void SelectionDAGLowering::visitInlineAsm(CallInst &I) { SDOperand AsmStr = DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other); - // Note, we treat inline asms both with and without side-effects as the same. - // If an inline asm doesn't have side effects and doesn't access memory, we - // could not choose to not chain it. - bool hasSideEffects = IA->hasSideEffects(); - std::vector Constraints = IA->ParseConstraints(); std::vector ConstraintVTs; @@ -2145,7 +2616,11 @@ void SelectionDAGLowering::visitInlineAsm(CallInst &I) { GetRegistersForValue(ConstraintCode, ConstraintVTs[i], true, UsesInputRegister, OutputRegs, InputRegs); - assert(!Regs.Regs.empty() && "Couldn't allocate output reg!"); + if (Regs.Regs.empty()) { + cerr << "Couldn't allocate output reg for contraint '" + << ConstraintCode << "'!\n"; + exit(1); + } if (!Constraints[i].isIndirectOutput) { assert(RetValRegs.Regs.empty() && @@ -2211,8 +2686,13 @@ void SelectionDAGLowering::visitInlineAsm(CallInst &I) { CTy = TLI.getConstraintType(ConstraintCode[0]); if (CTy == TargetLowering::C_Other) { - if (!TLI.isOperandValidForConstraint(InOperandVal, ConstraintCode[0])) - assert(0 && "MATCH FAIL!"); + InOperandVal = TLI.isOperandValidForConstraint(InOperandVal, + ConstraintCode[0], DAG); + if (!InOperandVal.Val) { + cerr << "Invalid operand for inline asm constraint '" + << ConstraintCode << "'!\n"; + exit(1); + } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = 3 /*IMM*/ | (1 << 3); @@ -2349,9 +2829,9 @@ void SelectionDAGLowering::visitFree(FreeInst &I) { // basic blocks, and the scheduler passes ownership of it to this method. MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, MachineBasicBlock *MBB) { - std::cerr << "If a target marks an instruction with " - "'usesCustomDAGSchedInserter', it must implement " - "TargetLowering::InsertAtEndOfBasicBlock!\n"; + cerr << "If a target marks an instruction with " + << "'usesCustomDAGSchedInserter', it must implement " + << "TargetLowering::InsertAtEndOfBasicBlock!\n"; abort(); return 0; } @@ -2384,6 +2864,32 @@ void SelectionDAGLowering::visitVACopy(CallInst &I) { DAG.getSrcValue(I.getOperand(2)))); } +/// ExpandScalarFormalArgs - Recursively expand the formal_argument node, either +/// bit_convert it or join a pair of them with a BUILD_PAIR when appropriate. +static SDOperand ExpandScalarFormalArgs(MVT::ValueType VT, SDNode *Arg, + unsigned &i, SelectionDAG &DAG, + TargetLowering &TLI) { + if (TLI.getTypeAction(VT) != TargetLowering::Expand) + return SDOperand(Arg, i++); + + MVT::ValueType EVT = TLI.getTypeToTransformTo(VT); + unsigned NumVals = MVT::getSizeInBits(VT) / MVT::getSizeInBits(EVT); + if (NumVals == 1) { + return DAG.getNode(ISD::BIT_CONVERT, VT, + ExpandScalarFormalArgs(EVT, Arg, i, DAG, TLI)); + } else if (NumVals == 2) { + SDOperand Lo = ExpandScalarFormalArgs(EVT, Arg, i, DAG, TLI); + SDOperand Hi = ExpandScalarFormalArgs(EVT, Arg, i, DAG, TLI); + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + return DAG.getNode(ISD::BUILD_PAIR, VT, Lo, Hi); + } else { + // Value scalarized into many values. Unimp for now. + assert(0 && "Cannot expand i64 -> i16 yet!"); + } + return SDOperand(); +} + /// TargetLowering::LowerArguments - This is the default LowerArguments /// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all /// targets are migrated to using FORMAL_ARGUMENTS, this hook should be @@ -2414,8 +2920,8 @@ TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { // If this is a large integer, it needs to be broken up into small // integers. Figure out what the destination type is and how many small // integers it turns into. - MVT::ValueType NVT = getTypeToTransformTo(VT); - unsigned NumVals = MVT::getSizeInBits(VT)/MVT::getSizeInBits(NVT); + MVT::ValueType NVT = getTypeToExpandTo(VT); + unsigned NumVals = getNumElements(VT); for (unsigned i = 0; i != NumVals; ++i) RetVals.push_back(NVT); } else { @@ -2473,23 +2979,10 @@ TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { } case Expand: if (VT != MVT::Vector) { - // If this is a large integer, it needs to be reassembled from small - // integers. Figure out what the source elt type is and how many small - // integers it is. - MVT::ValueType NVT = getTypeToTransformTo(VT); - unsigned NumVals = MVT::getSizeInBits(VT)/MVT::getSizeInBits(NVT); - if (NumVals == 2) { - SDOperand Lo = SDOperand(Result, i++); - SDOperand Hi = SDOperand(Result, i++); - - if (!isLittleEndian()) - std::swap(Lo, Hi); - - Ops.push_back(DAG.getNode(ISD::BUILD_PAIR, VT, Lo, Hi)); - } else { - // Value scalarized into many values. Unimp for now. - assert(0 && "Cannot expand i64 -> i16 yet!"); - } + // If this is a large integer or a floating point node that needs to be + // expanded, it needs to be reassembled from small integers. Figure out + // what the source elt type is and how many small integers it is. + Ops.push_back(ExpandScalarFormalArgs(VT, Result, i, DAG, *this)); } else { // Otherwise, this is a vector type. We only support legal vectors // right now. @@ -2519,6 +3012,39 @@ TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { } +/// ExpandScalarCallArgs - Recursively expand call argument node by +/// bit_converting it or extract a pair of elements from the larger node. +static void ExpandScalarCallArgs(MVT::ValueType VT, SDOperand Arg, + bool isSigned, + SmallVector &Ops, + SelectionDAG &DAG, + TargetLowering &TLI) { + if (TLI.getTypeAction(VT) != TargetLowering::Expand) { + Ops.push_back(Arg); + Ops.push_back(DAG.getConstant(isSigned, MVT::i32)); + return; + } + + MVT::ValueType EVT = TLI.getTypeToTransformTo(VT); + unsigned NumVals = MVT::getSizeInBits(VT) / MVT::getSizeInBits(EVT); + if (NumVals == 1) { + Arg = DAG.getNode(ISD::BIT_CONVERT, EVT, Arg); + ExpandScalarCallArgs(EVT, Arg, isSigned, Ops, DAG, TLI); + } else if (NumVals == 2) { + SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, EVT, Arg, + DAG.getConstant(0, TLI.getPointerTy())); + SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, EVT, Arg, + DAG.getConstant(1, TLI.getPointerTy())); + if (!TLI.isLittleEndian()) + std::swap(Lo, Hi); + ExpandScalarCallArgs(EVT, Lo, isSigned, Ops, DAG, TLI); + ExpandScalarCallArgs(EVT, Hi, isSigned, Ops, DAG, TLI); + } else { + // Value scalarized into many values. Unimp for now. + assert(0 && "Cannot expand i64 -> i16 yet!"); + } +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just inserts an ISD::CALL node, which is later custom /// lowered by the target to something concrete. FIXME: When all targets are @@ -2562,24 +3088,7 @@ TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, // If this is a large integer, it needs to be broken down into small // integers. Figure out what the source elt type is and how many small // integers it is. - MVT::ValueType NVT = getTypeToTransformTo(VT); - unsigned NumVals = MVT::getSizeInBits(VT)/MVT::getSizeInBits(NVT); - if (NumVals == 2) { - SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, NVT, Op, - DAG.getConstant(0, getPointerTy())); - SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, NVT, Op, - DAG.getConstant(1, getPointerTy())); - if (!isLittleEndian()) - std::swap(Lo, Hi); - - Ops.push_back(Lo); - Ops.push_back(DAG.getConstant(isSigned, MVT::i32)); - Ops.push_back(Hi); - Ops.push_back(DAG.getConstant(isSigned, MVT::i32)); - } else { - // Value scalarized into many values. Unimp for now. - assert(0 && "Cannot expand i64 -> i16 yet!"); - } + ExpandScalarCallArgs(VT, Op, isSigned, Ops, DAG, *this); } else { // Otherwise, this is a vector type. We only support legal vectors // right now. @@ -2622,8 +3131,8 @@ TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, // If this is a large integer, it needs to be reassembled from small // integers. Figure out what the source elt type is and how many small // integers it is. - MVT::ValueType NVT = getTypeToTransformTo(VT); - unsigned NumVals = MVT::getSizeInBits(VT)/MVT::getSizeInBits(NVT); + MVT::ValueType NVT = getTypeToExpandTo(VT); + unsigned NumVals = getNumElements(VT); for (unsigned i = 0; i != NumVals; ++i) RetTys.push_back(NVT); } else { @@ -2690,7 +3199,10 @@ TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy, bool isVarArg, ResVal = DAG.getNode(ISD::TRUNCATE, VT, ResVal); } else { assert(MVT::isFloatingPoint(VT)); - ResVal = DAG.getNode(ISD::FP_ROUND, VT, ResVal); + if (getTypeAction(VT) == Expand) + ResVal = DAG.getNode(ISD::BIT_CONVERT, VT, ResVal); + else + ResVal = DAG.getNode(ISD::FP_ROUND, VT, ResVal); } } } else if (RetTys.size() == 3) { @@ -2732,7 +3244,7 @@ SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op, } void SelectionDAGLowering::visitFrameReturnAddress(CallInst &I, bool isFrame) { - unsigned Depth = (unsigned)cast(I.getOperand(1))->getValue(); + unsigned Depth = (unsigned)cast(I.getOperand(1))->getZExtValue(); std::pair Result = TLI.LowerFrameReturnAddress(isFrame, getRoot(), Depth, DAG); setValue(&I, Result.first); @@ -2775,13 +3287,12 @@ static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT, static SDOperand getMemsetStringVal(MVT::ValueType VT, SelectionDAG &DAG, TargetLowering &TLI, std::string &Str, unsigned Offset) { - MVT::ValueType CurVT = VT; uint64_t Val = 0; unsigned MSB = getSizeInBits(VT) / 8; if (TLI.isLittleEndian()) Offset = Offset + MSB - 1; for (unsigned i = 0; i != MSB; ++i) { - Val = (Val << 8) | Str[Offset]; + Val = (Val << 8) | (unsigned char)Str[Offset]; Offset += TLI.isLittleEndian() ? -1 : 1; } return DAG.getConstant(Val, VT); @@ -2900,7 +3411,7 @@ void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) { } if (G) { GlobalVariable *GV = dyn_cast(G->getGlobal()); - if (GV) { + if (GV && GV->isConstant()) { Str = GV->getStringValue(false); if (!Str.empty()) { CopyFromStr = true; @@ -3004,7 +3515,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI) { while (isa(InsertPt)) ++InsertPt; InsertedCast = - new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt); + CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", + InsertPt); MadeChange = true; } @@ -3026,9 +3538,10 @@ static Instruction *InsertGEPComputeCode(Instruction *&V, BasicBlock *BB, Value *PtrOffset) { if (V) return V; // Already computed. + // Figure out the insertion point BasicBlock::iterator InsertPt; if (BB == GEPI->getParent()) { - // If insert into the GEP's block, insert right after the GEP. + // If GEP is already inserted into BB, insert right after the GEP. InsertPt = GEPI; ++InsertPt; } else { @@ -3042,11 +3555,14 @@ static Instruction *InsertGEPComputeCode(Instruction *&V, BasicBlock *BB, // operand). if (CastInst *CI = dyn_cast(Ptr)) if (CI->getParent() != BB && isa(CI->getOperand(0)->getType())) - Ptr = new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt); + Ptr = CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), + "", InsertPt); // Add the offset, cast it to the right type. Ptr = BinaryOperator::createAdd(Ptr, PtrOffset, "", InsertPt); - return V = new CastInst(Ptr, GEPI->getType(), "", InsertPt); + // Ptr is an integer type, GEPI is pointer type ==> IntToPtr + return V = CastInst::create(Instruction::IntToPtr, Ptr, GEPI->getType(), + "", InsertPt); } /// ReplaceUsesOfGEPInst - Replace all uses of RepPtr with inserted code to @@ -3063,8 +3579,9 @@ static void ReplaceUsesOfGEPInst(Instruction *RepPtr, Value *Ptr, while (!RepPtr->use_empty()) { Instruction *User = cast(RepPtr->use_back()); - // If the user is a Pointer-Pointer cast, recurse. - if (isa(User) && isa(User->getType())) { + // If the user is a Pointer-Pointer cast, recurse. Only BitCast can be + // used for a Pointer-Pointer cast. + if (isa(User)) { ReplaceUsesOfGEPInst(User, Ptr, PtrOffset, DefBB, GEPI, InsertedExprs); // Drop the use of RepPtr. The cast is dead. Don't delete it now, else we @@ -3091,7 +3608,8 @@ static void ReplaceUsesOfGEPInst(Instruction *RepPtr, Value *Ptr, if (GEPI->getType() != RepPtr->getType()) { BasicBlock::iterator IP = NewVal; ++IP; - NewVal = new CastInst(NewVal, RepPtr->getType(), "", IP); + // NewVal must be a GEP which must be pointer type, so BitCast + NewVal = new BitCastInst(NewVal, RepPtr->getType(), "", IP); } User->replaceUsesOfWith(RepPtr, NewVal); } @@ -3126,7 +3644,7 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, E = GEPI->op_end(); OI != E; ++OI) { if (ConstantInt *CI = dyn_cast(*OI)) { - if (CI->getRawValue()) { + if (CI->getZExtValue()) { hasConstantIndex = true; break; } @@ -3137,7 +3655,8 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, // If this is a "GEP X, 0, 0, 0", turn this into a cast. if (!hasConstantIndex && !hasVariableIndex) { - Value *NC = new CastInst(GEPI->getOperand(0), GEPI->getType(), + /// The GEP operand must be a pointer, so must its result -> BitCast + Value *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), GEPI->getName(), GEPI); GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); @@ -3152,14 +3671,14 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, // constant offset (which we now know is non-zero) and deal with it later. uint64_t ConstantOffset = 0; const Type *UIntPtrTy = TD->getIntPtrType(); - Value *Ptr = new CastInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI); + Value *Ptr = new PtrToIntInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI); const Type *Ty = GEPI->getOperand(0)->getType(); for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, E = GEPI->op_end(); OI != E; ++OI) { Value *Idx = *OI; if (const StructType *StTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getValue(); + unsigned Field = cast(Idx)->getZExtValue(); if (Field) ConstantOffset += TD->getStructLayout(StTy)->MemberOffsets[Field]; Ty = StTy->getElementType(Field); @@ -3168,24 +3687,23 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, // Handle constant subscripts. if (ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getRawValue() == 0) continue; - - if (ConstantSInt *CSI = dyn_cast(CI)) - ConstantOffset += (int64_t)TD->getTypeSize(Ty)*CSI->getValue(); + if (CI->getZExtValue() == 0) continue; + if (CI->getType()->isSigned()) + ConstantOffset += (int64_t)TD->getTypeSize(Ty)*CI->getSExtValue(); else - ConstantOffset+=TD->getTypeSize(Ty)*cast(CI)->getValue(); + ConstantOffset += TD->getTypeSize(Ty)*CI->getZExtValue(); continue; } // Ptr = Ptr + Idx * ElementSize; // Cast Idx to UIntPtrTy if needed. - Idx = new CastInst(Idx, UIntPtrTy, "", GEPI); + Idx = CastInst::createIntegerCast(Idx, UIntPtrTy, true/*SExt*/, "", GEPI); uint64_t ElementSize = TD->getTypeSize(Ty); // Mask off bits that should not be set. ElementSize &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); - Constant *SizeCst = ConstantUInt::get(UIntPtrTy, ElementSize); + Constant *SizeCst = ConstantInt::get(UIntPtrTy, ElementSize); // Multiply by the element size and add to the base. Idx = BinaryOperator::createMul(Idx, SizeCst, "", GEPI); @@ -3195,7 +3713,7 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, // Make sure that the offset fits in uintptr_t. ConstantOffset &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); - Constant *PtrOffset = ConstantUInt::get(UIntPtrTy, ConstantOffset); + Constant *PtrOffset = ConstantInt::get(UIntPtrTy, ConstantOffset); // Okay, we have now emitted all of the variable index parts to the BB that // the GEP is defined in. Loop over all of the using instructions, inserting @@ -3214,68 +3732,101 @@ static bool OptimizeGEPExpression(GetElementPtrInst *GEPI, return true; } -/// SplitCritEdgesForPHIConstants - If this block has any PHI nodes with -/// constant operands, and if any of the edges feeding the PHI node are -/// critical, split them so that the assignments of a constant to a register -/// will not be executed on a path that isn't relevant. -void SelectionDAGISel::SplitCritEdgesForPHIConstants(BasicBlock *BB) { - // The most common case is that this is a PHI node with two incoming - // successors handle this case efficiently, because it is simple. - PHINode *PN = cast(BB->begin()); - if (PN->getNumIncomingValues() == 2) { - // If neither edge is critical, we never need to split. - if (PN->getIncomingBlock(0)->getTerminator()->getNumSuccessors() == 1 && - PN->getIncomingBlock(1)->getTerminator()->getNumSuccessors() == 1) - return; + +/// SplitEdgeNicely - Split the critical edge from TI to it's specified +/// successor if it will improve codegen. We only do this if the successor has +/// phi nodes (otherwise critical edges are ok). If there is already another +/// predecessor of the succ that is empty (and thus has no phi nodes), use it +/// instead of introducing a new block. +static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { + BasicBlock *TIBB = TI->getParent(); + BasicBlock *Dest = TI->getSuccessor(SuccNum); + assert(isa(Dest->begin()) && + "This should only be called if Dest has a PHI!"); + + /// TIPHIValues - This array is lazily computed to determine the values of + /// PHIs in Dest that TI would provide. + std::vector TIPHIValues; + + // Check to see if Dest has any blocks that can be used as a split edge for + // this terminator. + for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { + BasicBlock *Pred = *PI; + // To be usable, the pred has to end with an uncond branch to the dest. + BranchInst *PredBr = dyn_cast(Pred->getTerminator()); + if (!PredBr || !PredBr->isUnconditional() || + // Must be empty other than the branch. + &Pred->front() != PredBr) + continue; - BasicBlock::iterator BBI = BB->begin(); - while ((PN = dyn_cast(BBI++))) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (isa(PN->getIncomingValue(i))) - SplitCriticalEdge(PN->getIncomingBlock(i), BB); + // Finally, since we know that Dest has phi nodes in it, we have to make + // sure that jumping to Pred will have the same affect as going to Dest in + // terms of PHI values. + PHINode *PN; + unsigned PHINo = 0; + bool FoundMatch = true; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast(I)); ++I, ++PHINo) { + if (PHINo == TIPHIValues.size()) + TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); + + // If the PHI entry doesn't work, we can't use this pred. + if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { + FoundMatch = false; + break; + } + } + + // If we found a workable predecessor, change TI to branch to Succ. + if (FoundMatch) { + Dest->removePredecessor(TIBB); + TI->setSuccessor(SuccNum, Pred); + return; } - return; } - // Otherwise, things are a bit trickier. - - // BE SMART HERE. - - BasicBlock::iterator BBI = BB->begin(); - while ((PN = dyn_cast(BBI++))) { - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (isa(PN->getIncomingValue(i))) - SplitCriticalEdge(PN->getIncomingBlock(i), BB); - } + SplitCriticalEdge(TI, SuccNum, P, true); } bool SelectionDAGISel::runOnFunction(Function &Fn) { MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine()); RegMap = MF.getSSARegMap(); - DEBUG(std::cerr << "\n\n\n=== " << Fn.getName() << "\n"); + DOUT << "\n\n\n=== " << Fn.getName() << "\n"; - // First, split all critical edges for PHI nodes with incoming values that are - // constants, this way the load of the constant into a vreg will not be placed - // into MBBs that are used some other way. + // First, split all critical edges. // // In this pass we also look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. // - // bool MadeChange = true; while (MadeChange) { MadeChange = false; for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - // If this block has any PHI nodes with constant operands, and if any of the - // edges feeding the PHI node are critical, split them. - if (isa(BB->begin())) - SplitCritEdgesForPHIConstants(BB); + // Split all critical edges where the dest block has a PHI. + TerminatorInst *BBTI = BB->getTerminator(); + if (BBTI->getNumSuccessors() > 1) { + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) + if (isa(BBTI->getSuccessor(i)->begin()) && + isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, this); + } + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - if (GetElementPtrInst *GEPI = dyn_cast(I)) { + + if (CallInst *CI = dyn_cast(I)) { + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (isa(CI->getCalledValue())) + if (const TargetAsmInfo *TAI = + TLI.getTargetMachine().getTargetAsmInfo()) { + if (TAI->ExpandInlineAsm(CI)) + BBI = BB->begin(); + } + } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { MadeChange |= OptimizeGEPExpression(GEPI, TLI.getTargetData()); } else if (CastInst *CI = dyn_cast(I)) { // If the source of the cast is a constant, then this should have @@ -3324,10 +3875,9 @@ bool SelectionDAGISel::runOnFunction(Function &Fn) { return true; } - -SDOperand SelectionDAGISel:: -CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { - SDOperand Op = SDL.getValue(V); +SDOperand SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, + unsigned Reg) { + SDOperand Op = getValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); @@ -3336,9 +3886,8 @@ CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { // register use. MVT::ValueType SrcVT = Op.getValueType(); MVT::ValueType DestVT = TLI.getTypeToTransformTo(SrcVT); - SelectionDAG &DAG = SDL.DAG; if (SrcVT == DestVT) { - return DAG.getCopyToReg(SDL.getRoot(), Reg, Op); + return DAG.getCopyToReg(getRoot(), Reg, Op); } else if (SrcVT == MVT::Vector) { // Handle copies from generic vectors to registers. MVT::ValueType PTyElementVT, PTyLegalElementVT; @@ -3355,7 +3904,7 @@ CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { // VEXTRACT_VECTOR_ELT'ing them, converting them to PTyLegalElementVT, then // copying them into output registers. SmallVector OutChains; - SDOperand Root = SDL.getRoot(); + SDOperand Root = getRoot(); for (unsigned i = 0; i != NE; ++i) { SDOperand Elt = DAG.getNode(ISD::VEXTRACT_VECTOR_ELT, PTyElementVT, Op, DAG.getConstant(i, TLI.getPointerTy())); @@ -3382,20 +3931,26 @@ CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { } return DAG.getNode(ISD::TokenFactor, MVT::Other, &OutChains[0], OutChains.size()); - } else if (SrcVT < DestVT) { + } else if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) { // The src value is promoted to the register. if (MVT::isFloatingPoint(SrcVT)) Op = DAG.getNode(ISD::FP_EXTEND, DestVT, Op); else Op = DAG.getNode(ISD::ANY_EXTEND, DestVT, Op); - return DAG.getCopyToReg(SDL.getRoot(), Reg, Op); + return DAG.getCopyToReg(getRoot(), Reg, Op); } else { + DestVT = TLI.getTypeToExpandTo(SrcVT); + unsigned NumVals = TLI.getNumElements(SrcVT); + if (NumVals == 1) + return DAG.getCopyToReg(getRoot(), Reg, + DAG.getNode(ISD::BIT_CONVERT, DestVT, Op)); + assert(NumVals == 2 && "1 to 4 (and more) expansion not implemented!"); // The src value is expanded into multiple registers. SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT, Op, DAG.getConstant(0, TLI.getPointerTy())); SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT, Op, DAG.getConstant(1, TLI.getPointerTy())); - Op = DAG.getCopyToReg(SDL.getRoot(), Reg, Lo); + Op = DAG.getCopyToReg(getRoot(), Reg, Lo); return DAG.getCopyToReg(Op, Reg+1, Hi); } } @@ -3419,7 +3974,7 @@ LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL, // whereever we got it to the vreg that other BB's will reference it as. if (FuncInfo.ValueMap.count(AI)) { SDOperand Copy = - CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]); + SDL.CopyValueToVirtualRegister(AI, FuncInfo.ValueMap[AI]); UnorderedChains.push_back(Copy); } } @@ -3455,7 +4010,7 @@ void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, std::map::iterator VMI =FuncInfo.ValueMap.find(I); if (VMI != FuncInfo.ValueMap.end()) UnorderedChains.push_back( - CopyValueToVirtualRegister(SDL, I, VMI->second)); + SDL.CopyValueToVirtualRegister(I, VMI->second)); } // Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -3465,63 +4020,78 @@ void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, // BB. As such, the start of the BB might correspond to a different MBB than // the end. // + TerminatorInst *TI = LLVMBB->getTerminator(); // Emit constants only once even if used by multiple PHI nodes. std::map ConstantsOut; + // Vector bool would be better, but vector is really slow. + std::vector SuccsHandled; + if (TI->getNumSuccessors()) + SuccsHandled.resize(BB->getParent()->getNumBlockIDs()); + // Check successor nodes PHI nodes that expect a constant to be available from // this block. - TerminatorInst *TI = LLVMBB->getTerminator(); for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + unsigned SuccMBBNo = SuccMBB->getNumber(); + if (SuccsHandled[SuccMBBNo]) continue; + SuccsHandled[SuccMBBNo] = true; - MachineBasicBlock::iterator MBBI = FuncInfo.MBBMap[SuccBB]->begin(); + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); PHINode *PN; // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::iterator I = SuccBB->begin(); - (PN = dyn_cast(I)); ++I) - if (!PN->use_empty()) { - unsigned Reg; - Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); - if (Constant *C = dyn_cast(PHIOp)) { - unsigned &RegOut = ConstantsOut[C]; - if (RegOut == 0) { - RegOut = FuncInfo.CreateRegForValue(C); - UnorderedChains.push_back( - CopyValueToVirtualRegister(SDL, C, RegOut)); - } - Reg = RegOut; - } else { - Reg = FuncInfo.ValueMap[PHIOp]; - if (Reg == 0) { - assert(isa(PHIOp) && - FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && - "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegForValue(PHIOp); - UnorderedChains.push_back( - CopyValueToVirtualRegister(SDL, PHIOp, Reg)); - } + (PN = dyn_cast(I)); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + unsigned Reg; + Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (Constant *C = dyn_cast(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegForValue(C); + UnorderedChains.push_back( + SDL.CopyValueToVirtualRegister(C, RegOut)); } - - // Remember that this register needs to added to the machine PHI node as - // the input for this MBB. - MVT::ValueType VT = TLI.getValueType(PN->getType()); - unsigned NumElements; - if (VT != MVT::Vector) - NumElements = TLI.getNumElements(VT); - else { - MVT::ValueType VT1,VT2; - NumElements = - TLI.getPackedTypeBreakdown(cast(PN->getType()), - VT1, VT2); + Reg = RegOut; + } else { + Reg = FuncInfo.ValueMap[PHIOp]; + if (Reg == 0) { + assert(isa(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegForValue(PHIOp); + UnorderedChains.push_back( + SDL.CopyValueToVirtualRegister(PHIOp, Reg)); } - for (unsigned i = 0, e = NumElements; i != e; ++i) - PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + MVT::ValueType VT = TLI.getValueType(PN->getType()); + unsigned NumElements; + if (VT != MVT::Vector) + NumElements = TLI.getNumElements(VT); + else { + MVT::ValueType VT1,VT2; + NumElements = + TLI.getPackedTypeBreakdown(cast(PN->getType()), + VT1, VT2); + } + for (unsigned i = 0, e = NumElements; i != e; ++i) + PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + } } ConstantsOut.clear(); @@ -3563,14 +4133,14 @@ void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) { // Run the DAG combiner in pre-legalize mode. DAG.Combine(false, AA); - DEBUG(std::cerr << "Lowered selection DAG:\n"); + DOUT << "Lowered selection DAG:\n"; DEBUG(DAG.dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. DAG.Legalize(); - DEBUG(std::cerr << "Legalized selection DAG:\n"); + DOUT << "Legalized selection DAG:\n"; DEBUG(DAG.dump()); // Run the DAG combiner in post-legalize mode. @@ -3582,7 +4152,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) { // code to the MachineBasicBlock. InstructionSelectBasicBlock(DAG); - DEBUG(std::cerr << "Selected machine code:\n"); + DOUT << "Selected machine code:\n"; DEBUG(BB->dump()); } @@ -3648,6 +4218,18 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF, return; } + // If the switch block involved a branch to one of the actual successors, we + // need to update PHI nodes in that block. + for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = PHINodesToUpdate[i].first; + assert(PHI->getOpcode() == TargetInstrInfo::PHI && + "This is not a machine PHI node that we are updating!"); + if (BB->isSuccessor(PHI->getParent())) { + PHI->addRegOperand(PHINodesToUpdate[i].second, false); + PHI->addMachineBasicBlockOperand(BB); + } + } + // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SwitchCases.size(); i != e; ++i) { @@ -3668,7 +4250,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF, // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. - while ((BB = SwitchCases[i].LHSBB)) { // Handle LHS and RHS. + while ((BB = SwitchCases[i].TrueBB)) { // Handle LHS and RHS. for (MachineBasicBlock::iterator Phi = BB->begin(); Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ // This value for this PHI node is recorded in PHINodesToUpdate, get it. @@ -3683,14 +4265,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF, } // Don't process RHS if same block as LHS. - if (BB == SwitchCases[i].RHSBB) - SwitchCases[i].RHSBB = 0; + if (BB == SwitchCases[i].FalseBB) + SwitchCases[i].FalseBB = 0; // If we haven't handled the RHS, do so now. Otherwise, we're done. - SwitchCases[i].LHSBB = SwitchCases[i].RHSBB; - SwitchCases[i].RHSBB = 0; + SwitchCases[i].TrueBB = SwitchCases[i].FalseBB; + SwitchCases[i].FalseBB = 0; } - assert(SwitchCases[i].LHSBB == 0 && SwitchCases[i].RHSBB == 0); + assert(SwitchCases[i].TrueBB == 0 && SwitchCases[i].FalseBB == 0); } } @@ -3812,12 +4394,13 @@ SelectInlineAsmMemoryOperands(std::vector &Ops, SelectionDAG &DAG) { // Otherwise, this is a memory operand. Ask the target to select it. std::vector SelOps; if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) { - std::cerr << "Could not match memory address. Inline asm failure!\n"; + cerr << "Could not match memory address. Inline asm failure!\n"; exit(1); } // Add this to the output node. - Ops.push_back(DAG.getConstant(4/*MEM*/ | (SelOps.size() << 3), MVT::i32)); + Ops.push_back(DAG.getTargetConstant(4/*MEM*/ | (SelOps.size() << 3), + MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; }