From: Dan Gohman Date: Sat, 29 May 2010 17:53:24 +0000 (+0000) Subject: Reorder some code in SelectionDAGBuilder. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=462f6b57b6276502e1279d0e508c0b9fc24feb50;p=oota-llvm.git Reorder some code in SelectionDAGBuilder. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105105 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dbad0b89dd7..9f8f0c4dd75 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -70,108 +70,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; - - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() {} - - RegsForValue(const SmallVector ®s, - EVT regvt, EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - - RegsForValue(const SmallVector ®s, - const SmallVector ®vts, - const SmallVector &valuevts) - : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - EVT RegisterVT = tli.getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } - - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal(const TargetLowering &TLI) { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; - if (!TLI.isTypeLegal(RegisterVT)) - return false; - } - return true; - } - - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } - - - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const; - }; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -523,2418 +421,2680 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, } } +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector ValueVTs; -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { - AA = &aa; - GFI = gfi; - TD = DAG.getTarget().getTargetData(); -} - -/// clear - Clear out the current SelectionDAG and the associated -/// state and prepare this SelectionDAGBuilder object to be used -/// for a new block. This doesn't clear out information about -/// additional blocks that are needed to complete switch lowering -/// or PHI node updating; that information is cleared out as it is -/// consumed. -void SelectionDAGBuilder::clear() { - NodeMap.clear(); - PendingLoads.clear(); - PendingExports.clear(); - CurDebugLoc = DebugLoc(); - HasTailCall = false; -} + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector RegVTs; -/// getRoot - Return the current virtual root of the Selection DAG, -/// flushing any PendingLoad items. This must be done before emitting -/// a store or any other node that may need to be ordered after any -/// prior load instructions. -/// -SDValue SelectionDAGBuilder::getRoot() { - if (PendingLoads.empty()) - return DAG.getRoot(); + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector Regs; - if (PendingLoads.size() == 1) { - SDValue Root = PendingLoads[0]; - DAG.setRoot(Root); - PendingLoads.clear(); - return Root; - } + RegsForValue() {} - // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, - &PendingLoads[0], PendingLoads.size()); - PendingLoads.clear(); - DAG.setRoot(Root); - return Root; -} + RegsForValue(const SmallVector ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} -/// getControlRoot - Similar to getRoot, but instead of flushing all the -/// PendingLoad items, flush all the PendingExports items. It is necessary -/// to do this before emitting a terminator instruction. -/// -SDValue SelectionDAGBuilder::getControlRoot() { - SDValue Root = DAG.getRoot(); + RegsForValue(const SmallVector ®s, + const SmallVector ®vts, + const SmallVector &valuevts) + : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - if (PendingExports.empty()) - return Root; + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); - // Turn all of the CopyToReg chains into one factored node. - if (Root.getOpcode() != ISD::EntryToken) { - unsigned i = 0, e = PendingExports.size(); - for (; i != e; ++i) { - assert(PendingExports[i].getNode()->getNumOperands() > 1); - if (PendingExports[i].getNode()->getOperand(0) == Root) - break; // Don't add the root if we already indirectly depend on it. + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } } - if (i == e) - PendingExports.push_back(Root); - } - - Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, - &PendingExports[0], - PendingExports.size()); - PendingExports.clear(); - DAG.setRoot(Root); - return Root; -} + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } -void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { - if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. - DAG.AssignOrdering(Node, SDNodeOrder); + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - AssignOrderingToNode(Node->getOperand(I).getNode()); -} + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; -void SelectionDAGBuilder::visit(const Instruction &I) { - // Set up outgoing PHI node register values before emitting the terminator. - if (isa(&I)) - HandlePHINodesInSuccessorBlocks(I.getParent()); + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; - CurDebugLoc = I.getDebugLoc(); - - visit(I.getOpcode(), I); - - if (!isa(&I) && !HasTailCall) - CopyToExportRegsIfNeeded(&I); - - CurDebugLoc = DebugLoc(); -} - -void SelectionDAGBuilder::visitPHI(const PHINode &) { - llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); -} - -void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { - // Note: this doesn't use InstVisitor, because it has to work with - // ConstantExpr's in addition to instructions. - switch (Opcode) { - default: llvm_unreachable("Unknown instruction type encountered!"); - // Build the switch statement using the Instruction.def file. -#define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; -#include "llvm/Instruction.def" - } - - // Assign the ordering to the freshly created DAG nodes. - if (NodeMap.count(&I)) { - ++SDNodeOrder; - AssignOrderingToNode(getValue(&I).getNode()); - } + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const; + }; } -SDValue SelectionDAGBuilder::getValue(const Value *V) { - SDValue &N = NodeMap[V]; - if (N.getNode()) return N; - - if (const Constant *C = dyn_cast(V)) { - EVT VT = TLI.getValueType(V->getType(), true); +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (const ConstantInt *CI = dyn_cast(C)) - return N = DAG.getConstant(*CI, VT); + // Assemble the legal parts into the final values. + SmallVector Values(ValueVTs.size()); + SmallVector Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; - if (const GlobalValue *GV = dyn_cast(C)) - return N = DAG.getGlobalAddress(GV, VT); + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } - if (isa(C)) - return N = DAG.getConstant(0, TLI.getPointerTy()); + Chain = P.getValue(1); - if (const ConstantFP *CFP = dyn_cast(C)) - return N = DAG.getConstantFP(*CFP, VT); + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; - if (isa(C) && !V->getType()->isAggregateType()) - return N = DAG.getUNDEF(VT); + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - if (const ConstantExpr *CE = dyn_cast(C)) { - visit(CE->getOpcode(), *CE); - SDValue N1 = NodeMap[V]; - assert(N1.getNode() && "visit didn't populate the NodeMap!"); - return N1; - } + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - if (isa(C) || isa(C)) { - SmallVector Constants; - for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); - OI != OE; ++OI) { - SDNode *Val = getValue(*OI).getNode(); - // If the operand is an empty aggregate, there are no values. - if (!Val) continue; - // Add each leaf value from the operand to the Constants list - // to form a flattened list of all the values. - for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) - Constants.push_back(SDValue(Val, i)); + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } } - return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurDebugLoc()); + Parts[i] = P; } - if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { - assert((isa(C) || isa(C)) && - "Unknown struct or array constant!"); + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } - SmallVector ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); - unsigned NumElts = ValueVTs.size(); - if (NumElts == 0) - return SDValue(); // empty struct - SmallVector Constants(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - EVT EltVT = ValueVTs[i]; - if (isa(C)) - Constants[i] = DAG.getUNDEF(EltVT); - else if (EltVT.isFloatingPoint()) - Constants[i] = DAG.getConstantFP(0, EltVT); - else - Constants[i] = DAG.getConstant(0, EltVT); - } + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} - return DAG.getMergeValues(&Constants[0], NumElts, - getCurDebugLoc()); - } +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (const BlockAddress *BA = dyn_cast(C)) - return DAG.getBlockAddress(BA, VT); + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; - const VectorType *VecTy = cast(V->getType()); - unsigned NumElements = VecTy->getNumElements(); + getCopyToParts(DAG, dl, + Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } - // Now that we know the number and type of the elements, get that number of - // elements into the Ops array based on what kind of constant it is. - SmallVector Ops; - if (const ConstantVector *CP = dyn_cast(C)) { - for (unsigned i = 0; i != NumElements; ++i) - Ops.push_back(getValue(CP->getOperand(i))); + // Copy the parts into the registers. + SmallVector Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { - assert(isa(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); - - SDValue Op; - if (EltVT.isFloatingPoint()) - Op = DAG.getConstantFP(0, EltVT); - else - Op = DAG.getConstant(0, EltVT); - Ops.assign(NumElements, Op); + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); } - // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - VT, &Ops[0], Ops.size()); - } - - // If this is a static alloca, generate it as the frameindex instead of - // computation. - if (const AllocaInst *AI = dyn_cast(V)) { - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + Chains[i] = Part.getValue(0); } - unsigned InReg = FuncInfo.ValueMap[V]; - assert(InReg && "Value not in map!"); - - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); - SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); } -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl &OutVTs, - SmallVectorImpl &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl *Offsets = 0) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); } + } +} - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { + AA = &aa; + GFI = gfi; + TD = DAG.getTarget().getTargetData(); +} - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } - } +/// clear - Clear out the current SelectionDAG and the associated +/// state and prepare this SelectionDAGBuilder object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGBuilder::clear() { + NodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + CurDebugLoc = DebugLoc(); + HasTailCall = false; } -void SelectionDAGBuilder::visitRet(const ReturnInst &I) { - SDValue Chain = getControlRoot(); - SmallVector Outs; +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGBuilder::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); - if (!FuncInfo.CanLowerReturn) { - unsigned DemoteReg = FuncInfo.DemoteRegister; - const Function *F = I.getParent()->getParent(); + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } - // Emit a store of the return value through the virtual register. - // Leave Outs empty so that LowerReturn won't try to load return - // registers the usual way. - SmallVector PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), - PtrValueVTs); + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingLoads[0], PendingLoads.size()); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} - SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); - SDValue RetOp = getValue(I.getOperand(0)); +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGBuilder::getControlRoot() { + SDValue Root = DAG.getRoot(); - SmallVector ValueVTs; - SmallVector Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); - unsigned NumValues = ValueVTs.size(); + if (PendingExports.empty()) + return Root; - SmallVector Chains(NumValues); - EVT PtrVT = PtrValueVTs[0]; - for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, - DAG.getConstant(Offsets[i], PtrVT)); - Chains[i] = - DAG.getStore(Chain, getCurDebugLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, false, 0); + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. } - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); - } else if (I.getNumOperands() != 0) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues) { - SDValue RetOp = getValue(I.getOperand(0)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (i == e) + PendingExports.push_back(Root); + } - const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + &PendingExports[0], + PendingExports.size()); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} - // FIXME: C calling convention requires the return type to be promoted - // to at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } +void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { + if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. + DAG.AssignOrdering(Node, SDNodeOrder); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); - SmallVector Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) + AssignOrderingToNode(Node->getOperand(I).getNode()); +} - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) - Flags.setInReg(); +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); - // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) - Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) - Flags.setZExt(); + CurDebugLoc = I.getDebugLoc(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); - } - } - } + visit(I.getOpcode(), I); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CallingConv::ID CallConv = - DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, getCurDebugLoc(), DAG); + if (!isa(&I) && !HasTailCall) + CopyToExportRegsIfNeeded(&I); - // Verify that the target's LowerReturn behaved as expected. - assert(Chain.getNode() && Chain.getValueType() == MVT::Other && - "LowerReturn didn't return a valid chain!"); + CurDebugLoc = DebugLoc(); +} - // Update the DAG with the new chain value resulting from return lowering. - DAG.setRoot(Chain); +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); } -/// CopyToExportRegsIfNeeded - If the given value has virtual registers -/// created for it, emit nodes to copy the value into the virtual -/// registers. -void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { - DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) { - assert(!V->use_empty() && "Unused value assigned virtual registers!"); - CopyValueToVirtualRegister(V, VMI->second); +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: llvm_unreachable("Unknown instruction type encountered!"); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; +#include "llvm/Instruction.def" + } + + // Assign the ordering to the freshly created DAG nodes. + if (NodeMap.count(&I)) { + ++SDNodeOrder; + AssignOrderingToNode(getValue(&I).getNode()); } } -/// ExportFromCurrentBlock - If this condition isn't known to be exported from -/// the current basic block, add it to ValueMap now so that we'll get a -/// CopyTo/FromReg. -void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { - // No need to export constants. - if (!isa(V) && !isa(V)) return; - - // Already exported? - if (FuncInfo.isExportedInst(V)) return; - - unsigned Reg = FuncInfo.InitializeRegForValue(V); - CopyValueToVirtualRegister(V, Reg); -} +SDValue SelectionDAGBuilder::getValue(const Value *V) { + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; -bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, - const BasicBlock *FromBB) { - // The operands of the setcc have to be in this block. We don't know - // how to export them from some other block. - if (const Instruction *VI = dyn_cast(V)) { - // Can export from current BB. - if (VI->getParent() == FromBB) - return true; + if (const Constant *C = dyn_cast(V)) { + EVT VT = TLI.getValueType(V->getType(), true); - // Is already exported, noop. - return FuncInfo.isExportedInst(V); - } + if (const ConstantInt *CI = dyn_cast(C)) + return N = DAG.getConstant(*CI, VT); - // If this is an argument, we can export it if the BB is the entry block or - // if it is already exported. - if (isa(V)) { - if (FromBB == &FromBB->getParent()->getEntryBlock()) - return true; + if (const GlobalValue *GV = dyn_cast(C)) + return N = DAG.getGlobalAddress(GV, VT); - // Otherwise, can only export this if it is already exported. - return FuncInfo.isExportedInst(V); - } + if (isa(C)) + return N = DAG.getConstant(0, TLI.getPointerTy()); - // Otherwise, constants can always be exported. - return true; -} + if (const ConstantFP *CFP = dyn_cast(C)) + return N = DAG.getConstantFP(*CFP, VT); -static bool InBlock(const Value *V, const BasicBlock *BB) { - if (const Instruction *I = dyn_cast(V)) - return I->getParent() == BB; - return true; -} + if (isa(C) && !V->getType()->isAggregateType()) + return N = DAG.getUNDEF(VT); -/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. -/// This function emits a branch and is used at the leaves of an OR or an -/// AND operator tree. -/// -void -SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB) { - const BasicBlock *BB = CurBB->getBasicBlock(); + if (const ConstantExpr *CE = dyn_cast(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the NodeMap!"); + return N1; + } - // If the leaf of the tree is a comparison, merge the condition into - // the caseblock. - if (const CmpInst *BOp = dyn_cast(Cond)) { - // The operands of the cmp have to be in this block. We don't know - // how to export them from some other block. If this is the first block - // of the sequence, no exporting is needed. - if (CurBB == SwitchBB || - (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && - isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { - ISD::CondCode Condition; - if (const ICmpInst *IC = dyn_cast(Cond)) { - Condition = getICmpCondCode(IC->getPredicate()); - } else if (const FCmpInst *FC = dyn_cast(Cond)) { - Condition = getFCmpCondCode(FC->getPredicate()); - } else { - Condition = ISD::SETEQ; // silence warning. - llvm_unreachable("Unknown compare instruction"); + if (isa(C) || isa(C)) { + SmallVector Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); } - CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB); - SwitchCases.push_back(CB); - return; + return DAG.getMergeValues(&Constants[0], Constants.size(), + getCurDebugLoc()); } - } - // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB); - SwitchCases.push_back(CB); -} + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { + assert((isa(C) || isa(C)) && + "Unknown struct or array constant!"); -/// FindMergedConditions - If Cond is an expression like -void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - MachineBasicBlock *CurBB, - MachineBasicBlock *SwitchBB, - unsigned Opc) { - // If this node is not part of the or/and tree, emit it as a branch. - const Instruction *BOp = dyn_cast(Cond); - if (!BOp || !(isa(BOp) || isa(BOp)) || - (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || - BOp->getParent() != CurBB->getBasicBlock() || - !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || - !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { - EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); - return; - } + SmallVector ValueVTs; + ComputeValueVTs(TLI, C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + EVT EltVT = ValueVTs[i]; + if (isa(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, EltVT); + else + Constants[i] = DAG.getConstant(0, EltVT); + } - // Create TmpBB after CurBB. - MachineFunction::iterator BBI = CurBB; - MachineFunction &MF = DAG.getMachineFunction(); - MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); - CurBB->getParent()->insert(++BBI, TmpBB); + return DAG.getMergeValues(&Constants[0], NumElts, + getCurDebugLoc()); + } - if (Opc == Instruction::Or) { - // Codegen X | Y as: - // jmp_if_X TBB - // jmp TmpBB - // TmpBB: - // jmp_if_Y TBB - // jmp FBB - // + if (const BlockAddress *BA = dyn_cast(C)) + return DAG.getBlockAddress(BA, VT); - // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); + const VectorType *VecTy = cast(V->getType()); + unsigned NumElements = VecTy->getNumElements(); - // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); - } else { - assert(Opc == Instruction::And && "Unknown merge op!"); - // Codegen X & Y as: - // jmp_if_X TmpBB - // jmp FBB - // TmpBB: - // jmp_if_Y TBB - // jmp FBB - // - // This requires creation of TmpBB after CurBB. + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector Ops; + if (const ConstantVector *CP = dyn_cast(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CP->getOperand(i))); + } else { + assert(isa(C) && "Unknown vector constant!"); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); - // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, EltVT); + else + Op = DAG.getConstant(0, EltVT); + Ops.assign(NumElements, Op); + } - // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size()); } -} -/// If the set of cases should be emitted as a series of branches, return true. -/// If we should emit this as a bunch of and/or'd together conditions, return -/// false. -bool -SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ - if (Cases.size() != 2) return true; - - // If this is two comparisons of the same values or'd or and'd together, they - // will get folded into a single comparison, so don't emit two blocks. - if ((Cases[0].CmpLHS == Cases[1].CmpLHS && - Cases[0].CmpRHS == Cases[1].CmpRHS) || - (Cases[0].CmpRHS == Cases[1].CmpLHS && - Cases[0].CmpLHS == Cases[1].CmpRHS)) { - return false; + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast(V)) { + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } - // Handle: (X != null) | (Y != null) --> (X|Y) != 0 - // Handle: (X == null) & (Y == null) --> (X|Y) == 0 - if (Cases[0].CmpRHS == Cases[1].CmpRHS && - Cases[0].CC == Cases[1].CC && - isa(Cases[0].CmpRHS) && - cast(Cases[0].CmpRHS)->isNullValue()) { - if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) - return false; - if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) - return false; - } - - return true; + unsigned InReg = FuncInfo.ValueMap[V]; + assert(InReg && "Value not in map!"); + + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } -void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +static void getReturnInfo(const Type* ReturnType, + Attributes attr, SmallVectorImpl &OutVTs, + SmallVectorImpl &OutFlags, + const TargetLowering &TLI, + SmallVectorImpl *Offsets = 0) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; - // Update machine-CFG edges. - MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = BrMBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; - if (I.isUnconditional()) { - // Update machine-CFG edges. - BrMBB->addSuccessor(Succ0MBB); + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } - // If this is not a fall-through branch, emit the branch. - if (Succ0MBB != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Succ0MBB))); + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); - return; + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + OutVTs.push_back(PartVT); + OutFlags.push_back(Flags); + if (Offsets) + { + Offsets->push_back(Offset); + Offset += PartSize; + } + } } +} - // If this condition is one of the special cases we handle, do special stuff - // now. - const Value *CondVal = I.getCondition(); - MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + SDValue Chain = getControlRoot(); + SmallVector Outs; - // If this is a series of conditions that are or'd or and'd together, emit - // this as a sequence of branches instead of setcc's with and/or operations. - // For example, instead of something like: - // cmp A, B - // C = seteq - // cmp D, E - // F = setle - // or C, F - // jnz foo - // Emit: - // cmp A, B - // je foo - // cmp D, E - // jle foo - // - if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (BOp->hasOneUse() && - (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { - FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - BOp->getOpcode()); - // If the compares in later blocks need to use values not currently - // exported from this block, export them now. This block should always - // be the first entry. - assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; + const Function *F = I.getParent()->getParent(); - // Allow some cases to be rejected. - if (ShouldEmitAsBranches(SwitchCases)) { - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { - ExportFromCurrentBlock(SwitchCases[i].CmpLHS); - ExportFromCurrentBlock(SwitchCases[i].CmpRHS); - } + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector PtrValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); - // Emit the branch for this block. - visitSwitchCase(SwitchCases[0], BrMBB); - SwitchCases.erase(SwitchCases.begin()); - return; - } + SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); - // Okay, we decided not to do this, remove any inserted MBB's and clear - // SwitchCases. - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) - FuncInfo.MF->erase(SwitchCases[i].ThisBB); + SmallVector ValueVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); - SwitchCases.clear(); + SmallVector Chains(NumValues); + EVT PtrVT = PtrValueVTs[0]; + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, + DAG.getConstant(Offsets[i], PtrVT)); + Chains[i] = + DAG.getStore(Chain, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + Add, NULL, Offsets[i], false, false, 0); } - } - // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, BrMBB); + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + } else if (I.getNumOperands() != 0) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; - // Use visitSwitchCase to actually insert the fast branch sequence for this - // cond branch. - visitSwitchCase(CB, BrMBB); -} + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; -/// visitSwitchCase - Emits the necessary code to represent a single node in -/// the binary search tree resulting from lowering a switch instruction. -void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, - MachineBasicBlock *SwitchBB) { - SDValue Cond; - SDValue CondLHS = getValue(CB.CmpLHS); - DebugLoc dl = getCurDebugLoc(); + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; - // Build the setcc now. - if (CB.CmpMHS == NULL) { - // Fold "(X == true)" to X and "(X == false)" to !X to - // handle common cases produced by branch lowering. - if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && - CB.CC == ISD::SETEQ) - Cond = CondLHS; - else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && - CB.CC == ISD::SETEQ) { - SDValue True = DAG.getConstant(1, CondLHS.getValueType()); - Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); - } else - Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); - } else { - assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + // FIXME: C calling convention requires the return type to be promoted + // to at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } - const APInt& Low = cast(CB.CmpLHS)->getValue(); - const APInt& High = cast(CB.CmpRHS)->getValue(); + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + SmallVector Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); - SDValue CmpOp = getValue(CB.CmpMHS); - EVT VT = CmpOp.getValueType(); + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); - if (cast(CB.CmpLHS)->isMinValue(true)) { - Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETLE); - } else { - SDValue SUB = DAG.getNode(ISD::SUB, dl, - VT, CmpOp, DAG.getConstant(Low, VT)); - Cond = DAG.getSetCC(dl, MVT::i1, SUB, - DAG.getConstant(High-Low, VT), ISD::SETULE); + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); + } } } - // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, + Outs, getCurDebugLoc(), DAG); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); - // If the lhs block is the next block, invert the condition so that we can - // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == NextBlock) { - std::swap(CB.TrueBB, CB.FalseBB); - SDValue True = DAG.getConstant(1, Cond.getValueType()); - Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); - } + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); +} - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, getControlRoot(), Cond, - DAG.getBasicBlock(CB.TrueBB)); +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); + } +} - // If the branch was constant folded, fix up the CFG. - if (BrCond.getOpcode() == ISD::BR) { - SwitchBB->removeSuccessor(CB.FalseBB); - } else { - // Otherwise, go ahead and insert the false branch. - if (BrCond == getControlRoot()) - SwitchBB->removeSuccessor(CB.TrueBB); +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { + // No need to export constants. + if (!isa(V) && !isa(V)) return; - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); - } + // Already exported? + if (FuncInfo.isExportedInst(V)) return; - DAG.setRoot(BrCond); + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); } -/// visitJumpTable - Emit JumpTable node in the current MBB -void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { - // Emit the code for the jump table - assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TLI.getPointerTy(); - SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), - JT.Reg, PTy); - SDValue Table = DAG.getJumpTable(JT.JTI, PTy); - SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), - MVT::Other, Index.getValue(1), - Table, Index); - DAG.setRoot(BrJumpTable); -} +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (const Instruction *VI = dyn_cast(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; -/// visitJumpTableHeader - This function emits necessary code to produce index -/// in the JumpTable from switch case. -void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, - JumpTableHeader &JTH, - MachineBasicBlock *SwitchBB) { - // Subtract the lowest switch case value from the value being switched on and - // conditional branch to default mbb if the result is greater than the - // difference between smallest and largest cases. - SDValue SwitchOp = getValue(JTH.SValue); - EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, - DAG.getConstant(JTH.First, VT)); + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } - // The SDNode we just created, which holds the value being switched on minus - // the smallest case value, needs to be copied to a virtual register so it - // can be used as an index into the jump table in a subsequent basic block. - // This value may be smaller or larger than the target's pointer type, and - // therefore require extension or truncating. - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; - unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), - JumpTableReg, SwitchOp); - JT.Reg = JumpTableReg; + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } - // Emit the range check for the jump table, and branch to the default block - // for the switch statement if the value being switched on exceeds the largest - // case in the switch. - SDValue CMP = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), Sub, - DAG.getConstant(JTH.Last-JTH.First,VT), - ISD::SETUGT); + // Otherwise, constants can always be exported. + return true; +} - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = SwitchBB; +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast(V)) + return I->getParent() == BB; + return true; +} - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB) { + const BasicBlock *BB = CurBB->getBasicBlock(); - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), - MVT::Other, CopyTo, CMP, - DAG.getBasicBlock(JT.Default)); + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == SwitchBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (const ICmpInst *IC = dyn_cast(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else if (const FCmpInst *FC = dyn_cast(Cond)) { + Condition = getFCmpCondCode(FC->getPredicate()); + } else { + Condition = ISD::SETEQ; // silence warning. + llvm_unreachable("Unknown compare instruction"); + } - if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, - DAG.getBasicBlock(JT.MBB)); + CaseBlock CB(Condition, BOp->getOperand(0), + BOp->getOperand(1), NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); + return; + } + } - DAG.setRoot(BrCond); + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + NULL, TBB, FBB, CurBB); + SwitchCases.push_back(CB); } -/// visitBitTestHeader - This function emits necessary code to produce value -/// suitable for "bit tests" -void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, - MachineBasicBlock *SwitchBB) { - // Subtract the minimum value - SDValue SwitchOp = getValue(B.SValue); - EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, - DAG.getConstant(B.First, VT)); +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + unsigned Opc) { + // If this node is not part of the or/and tree, emit it as a branch. + const Instruction *BOp = dyn_cast(Cond); + if (!BOp || !(isa(BOp) || isa(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB); + return; + } - // Check range - SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), - ISD::SETUGT); + // Create TmpBB after CurBB. + MachineFunction::iterator BBI = CurBB; + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); - SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), - TLI.getPointerTy()); + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // - B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), - B.Reg, ShiftOp); + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc); - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = SwitchBB; - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. - MachineBasicBlock* MBB = B.Cases[0].ThisBB; + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc); - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc); + } +} - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), - MVT::Other, CopyTo, RangeCmp, - DAG.getBasicBlock(B.Default)); +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ + if (Cases.size() != 2) return true; - if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, - DAG.getBasicBlock(MBB)); + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } - DAG.setRoot(BrRange); + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa(Cases[0].CmpRHS) && + cast(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; } -/// visitBitTestCase - this function produces one "bit test" -void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, - unsigned Reg, - BitTestCase &B, - MachineBasicBlock *SwitchBB) { - // Make desired shift - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, - TLI.getPointerTy()); - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); - - // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); - SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), - ISD::SETNE); - - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), - MVT::Other, getControlRoot(), - AndCmp, DAG.getBasicBlock(B.TargetBB)); + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; - // Set NextBlock to be the MBB immediately after the current one, if any. - // This is used to avoid emitting unnecessary branches to the next block. + // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = SwitchBB; + MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, - DAG.getBasicBlock(NextMBB)); - - DAG.setRoot(BrAnd); -} - -void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + if (I.isUnconditional()) { + // Update machine-CFG edges. + BrMBB->addSuccessor(Succ0MBB); - // Retrieve successors. - MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; - MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; + // If this is not a fall-through branch, emit the branch. + if (Succ0MBB != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); - const Value *Callee(I.getCalledValue()); - if (isa(Callee)) - visitInlineAsm(&I); - else - LowerCallTo(&I, getValue(Callee), false, LandingPad); + return; + } - // If the value of the invoke is used outside of its defining block, make it - // available as a virtual register. - CopyToExportRegsIfNeeded(&I); + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; - // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (const BinaryOperator *BOp = dyn_cast(CondVal)) { + if (BOp->hasOneUse() && + (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + BOp->getOpcode()); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); - // Drop into normal successor. - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Return))); -} + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } -void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { -} + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0], BrMBB); + SwitchCases.erase(SwitchCases.begin()); + return; + } -/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for -/// small case ranges). -bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - Case& BackCase = *(CR.Range.second-1); + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SwitchCases[i].ThisBB); - // Size is the number of Cases represented by this range. - size_t Size = CR.Range.second - CR.Range.first; - if (Size > 3) - return false; + SwitchCases.clear(); + } + } - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), + NULL, Succ0MBB, Succ1MBB, BrMBB); - // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CR.CaseBB; + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB, BrMBB); +} - if (++BBI != FuncInfo.MF->end()) - NextBlock = BBI; +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + DebugLoc dl = getCurDebugLoc(); - // TODO: If any two of the cases has the same destination, and if one value - // is the same as the other, but has one bit unset that the other has set, - // use bit manipulation to do two compares at once. For example: - // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // Build the setcc now. + if (CB.CmpMHS == NULL) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); - // Rearrange the case blocks so that the last one falls through if possible. - if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { - // The last case block won't fall through into 'NextBlock' if we emit the - // branches in this order. See if rearranging a case value would help. - for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { - if (I->BB == NextBlock) { - std::swap(*I, BackCase); - break; - } - } - } + const APInt& Low = cast(CB.CmpLHS)->getValue(); + const APInt& High = cast(CB.CmpRHS)->getValue(); - // Create a CaseBlock record representing a conditional branch to - // the Case's target mbb if the value being switched on SV is equal - // to C. - MachineBasicBlock *CurBlock = CR.CaseBB; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { - MachineBasicBlock *FallThrough; - if (I != E-1) { - FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); - CurMF->insert(BBI, FallThrough); + SDValue CmpOp = getValue(CB.CmpMHS); + EVT VT = CmpOp.getValueType(); - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); + if (cast(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), + ISD::SETLE); } else { - // If the last case doesn't match, go to the default block. - FallThrough = Default; + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, VT), ISD::SETULE); } + } - const Value *RHS, *LHS, *MHS; - ISD::CondCode CC; - if (I->High == I->Low) { - // This is just small small case range :) containing exactly 1 case - CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = NULL; - } else { - CC = ISD::SETLE; - LHS = I->Low; MHS = SV; RHS = I->High; - } - CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + // Update successor info + SwitchBB->addSuccessor(CB.TrueBB); + SwitchBB->addSuccessor(CB.FalseBB); - // If emitting the first comparison, just call visitSwitchCase to emit the - // code into the current block. Otherwise, push the CaseBlock onto the - // vector to be later processed by SDISel, and insert the node's MBB - // before the next MBB. - if (CurBlock == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; - CurBlock = FallThrough; + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); } - return true; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !DisableJumpTables && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); -} + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); -static APInt ComputeRange(const APInt &First, const APInt &Last) { - APInt LastExt(Last), FirstExt(First); - uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - LastExt.sext(BitWidth); FirstExt.sext(BitWidth); - return (LastExt - FirstExt + 1ULL); + // If the branch was constant folded, fix up the CFG. + if (BrCond.getOpcode() == ISD::BR) { + SwitchBB->removeSuccessor(CB.FalseBB); + } else { + // Otherwise, go ahead and insert the false branch. + if (BrCond == getControlRoot()) + SwitchBB->removeSuccessor(CB.TrueBB); + + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); + } + + DAG.setRoot(BrCond); } -/// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB) { - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + EVT PTy = TLI.getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + MVT::Other, Index.getValue(1), + Table, Index); + DAG.setRoot(BrJumpTable); +} - const APInt &First = cast(FrontCase.Low)->getValue(); - const APInt &Last = cast(BackCase.High)->getValue(); +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(JTH.First, VT)); - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) - TSize += I->size(); + // The SDNode we just created, which holds the value being switched on minus + // the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - if (!areJTsAllowed(TLI) || TSize.ult(4)) - return false; + unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; - APInt Range = ComputeRange(First, Last); - double Density = TSize.roundToDouble() / Range.roundToDouble(); - if (Density < 0.4) - return false; + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), Sub, + DAG.getConstant(JTH.Last-JTH.First,VT), + ISD::SETUGT); - DEBUG(dbgs() << "Lowering jump table\n" - << "First entry: " << First << ". Last entry: " << Last << '\n' - << "Range: " << Range - << "Size: " << TSize << ". Density: " << Density << "\n\n"); + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + if (JT.MBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); - // Create a new basic block to hold the code for loading the address - // of the jump table, and jumping to it. Update successor information; - // we will either branch to the default case for the switch, or the jump - // table. - MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + DAG.setRoot(BrCond); +} - // Build a vector of destination BBs, corresponding to each target - // of the jump table. If the value of the jump table slot corresponds to - // a case statement, push the case's BB onto the vector, otherwise, push - // the default BB. - std::vector DestBBs; - APInt TEI = First; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { - const APInt &Low = cast(I->Low)->getValue(); - const APInt &High = cast(I->High)->getValue(); +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + DAG.getConstant(B.First, VT)); - if (Low.sle(TEI) && TEI.sle(High)) { - DestBBs.push_back(I->BB); - if (TEI==High) - ++I; - } else { - DestBBs.push_back(Default); - } - } + // Check range + SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(Sub.getValueType()), + Sub, DAG.getConstant(B.Range, VT), + ISD::SETUGT); - // Update successor info. Add one edge to each unique successor. - BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); - for (std::vector::iterator I = DestBBs.begin(), - E = DestBBs.end(); I != E; ++I) { - if (!SuccsHandled[(*I)->getNumber()]) { - SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); - } - } + SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), + TLI.getPointerTy()); - // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); - unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) - ->createJumpTableIndex(DestBBs); + B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + B.Reg, ShiftOp); - // Set the jump table information so that we can codegen it as a second - // MachineBasicBlock - JumpTable JT(-1U, JTI, JumpTableBB, Default); - JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); - if (CR.CaseBB == SwitchBB) - visitJumpTableHeader(JT, JTH, SwitchBB); + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; - JTCases.push_back(JumpTableBlock(JTH, JT)); + MachineBasicBlock* MBB = B.Cases[0].ThisBB; - return true; + SwitchBB->addSuccessor(B.Default); + SwitchBB->addSuccessor(MBB); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + if (MBB != NextBlock) + BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + DAG.getBasicBlock(MBB)); + + DAG.setRoot(BrRange); } -/// handleBTSplitSwitchCase - emit comparison and split binary search tree into -/// 2 subtrees. -bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { - // Get the MachineFunction which holds the current MBB. This is used when - // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = FuncInfo.MF; +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB) { + // Make desired shift + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, + TLI.getPointerTy()); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); - Case& FrontCase = *CR.Range.first; - Case& BackCase = *(CR.Range.second-1); - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); + SwitchBB->addSuccessor(B.TargetBB); + SwitchBB->addSuccessor(NextMBB); - // Size is the number of Cases represented by this range. - unsigned Size = CR.Range.second - CR.Range.first; + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + AndCmp, DAG.getBasicBlock(B.TargetBB)); - const APInt &First = cast(FrontCase.Low)->getValue(); - const APInt &Last = cast(BackCase.High)->getValue(); - double FMetric = 0; - CaseItr Pivot = CR.Range.first + Size/2; + // Set NextBlock to be the MBB immediately after the current one, if any. + // This is used to avoid emitting unnecessary branches to the next block. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = SwitchBB; + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; - // Select optimal pivot, maximizing sum density of LHS and RHS. This will - // (heuristically) allow us to emit JumpTable's later. - APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) - TSize += I->size(); + if (NextMBB != NextBlock) + BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB)); - APInt LSize = FrontCase.size(); - APInt RSize = TSize-LSize; - DEBUG(dbgs() << "Selecting best pivot: \n" - << "First: " << First << ", Last: " << Last <<'\n' - << "LSize: " << LSize << ", RSize: " << RSize << '\n'); - for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; - J!=E; ++I, ++J) { - const APInt &LEnd = cast(I->High)->getValue(); - const APInt &RBegin = cast(J->Low)->getValue(); - APInt Range = ComputeRange(LEnd, RBegin); - assert((Range - 2ULL).isNonNegative() && - "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / - (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / - (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); - // Should always split in some non-trivial place - DEBUG(dbgs() <<"=>Step\n" - << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' - << "LDensity: " << LDensity - << ", RDensity: " << RDensity << '\n' - << "Metric: " << Metric << '\n'); - if (FMetric < Metric) { - Pivot = J; - FMetric = Metric; - DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); - } - - LSize += J->size(); - RSize -= J->size(); - } - if (areJTsAllowed(TLI)) { - // If our case is dense we *really* should handle it earlier! - assert((FMetric > 0) && "Should handle dense range earlier!"); - } else { - Pivot = CR.Range.first + Size/2; - } - - CaseRange LHSR(CR.Range.first, Pivot); - CaseRange RHSR(Pivot, CR.Range.second); - Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = 0, *TrueBB = 0; - - // We know that we branch to the LHS if the Value being switched on is - // less than the Pivot value, C. We use this to optimize our binary - // tree a bit, by recognizing that if SV is greater than or equal to the - // LHS's Case Value, and that Case Value is exactly one less than the - // Pivot's Value, then we can branch directly to the LHS's Target, - // rather than creating a leaf node for it. - if ((LHSR.second - LHSR.first) == 1 && - LHSR.first->High == CR.GE && - cast(C)->getValue() == - (cast(CR.GE)->getValue() + 1LL)) { - TrueBB = LHSR.first->BB; - } else { - TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, TrueBB); - WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); + DAG.setRoot(BrAnd); +} - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; - // Similar to the optimization above, if the Value being switched on is - // known to be less than the Constant CR.LT, and the current Case Value - // is CR.LT - 1, then we can branch directly to the target block for - // the current Case Value, rather than emitting a RHS leaf node for it. - if ((RHSR.second - RHSR.first) == 1 && CR.LT && - cast(RHSR.first->Low)->getValue() == - (cast(CR.LT)->getValue() - 1LL)) { - FalseBB = RHSR.first->BB; - } else { - FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, FalseBB); - WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } + const Value *Callee(I.getCalledValue()); + if (isa(Callee)) + visitInlineAsm(&I); + else + LowerCallTo(&I, getValue(Callee), false, LandingPad); - // Create a CaseBlock record representing a conditional branch to - // the LHS node if the value being switched on SV is less than C. - // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + CopyToExportRegsIfNeeded(&I); - if (CR.CaseBB == SwitchBB) - visitSwitchCase(CB, SwitchBB); - else - SwitchCases.push_back(CB); + // Update successor info + InvokeMBB->addSuccessor(Return); + InvokeMBB->addSuccessor(LandingPad); - return true; + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); } -/// handleBitTestsSwitchCase - if current case range has few destination and -/// range span less, than machine word bitwidth, encode case range into series -/// of masks and emit bit tests with these masks. -bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB){ - EVT PTy = TLI.getPointerTy(); - unsigned IntPtrBits = PTy.getSizeInBits(); +void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { +} - Case& FrontCase = *CR.Range.first; +/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for +/// small case ranges). +bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { Case& BackCase = *(CR.Range.second-1); + // Size is the number of Cases represented by this range. + size_t Size = CR.Range.second - CR.Range.first; + if (Size > 3) + return false; + // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; - // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) - return false; - - size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) { - // Single case counts one, case range - two. - numCmps += (I->Low == I->High ? 1 : 2); - } - - // Count unique destinations - SmallSet Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { - Dests.insert(I->BB); - if (Dests.size() > 3) - // Don't bother the code below, if there are too much unique destinations - return false; - } - DEBUG(dbgs() << "Total number of unique destinations: " - << Dests.size() << '\n' - << "Total number of comparisons: " << numCmps << '\n'); - - // Compute span of values. - const APInt& minValue = cast(FrontCase.Low)->getValue(); - const APInt& maxValue = cast(BackCase.High)->getValue(); - APInt cmpRange = maxValue - minValue; - - DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' - << "Low bound: " << minValue << '\n' - << "High bound: " << maxValue << '\n'); - - if (cmpRange.uge(IntPtrBits) || - (!(Dests.size() == 1 && numCmps >= 3) && - !(Dests.size() == 2 && numCmps >= 5) && - !(Dests.size() >= 3 && numCmps >= 6))) - return false; - - DEBUG(dbgs() << "Emitting bit tests\n"); - APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineFunction::iterator BBI = CR.CaseBB; - // Optimize the case where all the case values fit in a - // word without having to subtract minValue. In this case, - // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { - cmpRange = maxValue; - } else { - lowBound = minValue; - } + if (++BBI != FuncInfo.MF->end()) + NextBlock = BBI; - CaseBitsVector CasesBits; - unsigned i, count = 0; + // TODO: If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { - MachineBasicBlock* Dest = I->BB; - for (i = 0; i < count; ++i) - if (Dest == CasesBits[i].BB) + // Rearrange the case blocks so that the last one falls through if possible. + if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + // The last case block won't fall through into 'NextBlock' if we emit the + // branches in this order. See if rearranging a case value would help. + for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + if (I->BB == NextBlock) { + std::swap(*I, BackCase); break; - - if (i == count) { - assert((count < 3) && "Too much destinations to test!"); - CasesBits.push_back(CaseBits(0, Dest, 0)); - count++; + } } + } - const APInt& lowValue = cast(I->Low)->getValue(); - const APInt& highValue = cast(I->High)->getValue(); + // Create a CaseBlock record representing a conditional branch to + // the Case's target mbb if the value being switched on SV is equal + // to C. + MachineBasicBlock *CurBlock = CR.CaseBB; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { + MachineBasicBlock *FallThrough; + if (I != E-1) { + FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); + CurMF->insert(BBI, FallThrough); - uint64_t lo = (lowValue - lowBound).getZExtValue(); - uint64_t hi = (highValue - lowBound).getZExtValue(); + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } else { + // If the last case doesn't match, go to the default block. + FallThrough = Default; + } - for (uint64_t j = lo; j <= hi; j++) { - CasesBits[i].Mask |= 1ULL << j; - CasesBits[i].Bits++; + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->High == I->Low) { + // This is just small small case range :) containing exactly 1 case + CC = ISD::SETEQ; + LHS = SV; RHS = I->High; MHS = NULL; + } else { + CC = ISD::SETLE; + LHS = I->Low; MHS = SV; RHS = I->High; } + CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); - } - std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); + // If emitting the first comparison, just call visitSwitchCase to emit the + // code into the current block. Otherwise, push the CaseBlock onto the + // vector to be later processed by SDISel, and insert the node's MBB + // before the next MBB. + if (CurBlock == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); - BitTestInfo BTC; + CurBlock = FallThrough; + } - // Figure out which block is immediately after the current one. - MachineFunction::iterator BBI = CR.CaseBB; - ++BBI; + return true; +} - const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - - DEBUG(dbgs() << "Cases:\n"); - for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { - DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask - << ", Bits: " << CasesBits[i].Bits - << ", BB: " << CasesBits[i].BB << '\n'); - - MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); - CurMF->insert(BBI, CaseBB); - BTC.push_back(BitTestCase(CasesBits[i].Mask, - CaseBB, - CasesBits[i].BB)); - - // Put SV in a virtual register to make it available from the new blocks. - ExportFromCurrentBlock(SV); - } - - BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, BTC); - - if (CR.CaseBB == SwitchBB) - visitBitTestHeader(BTB, SwitchBB); - - BitTestCases.push_back(BTB); +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return !DisableJumpTables && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} - return true; +static APInt ComputeRange(const APInt &First, const APInt &Last) { + APInt LastExt(Last), FirstExt(First); + uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; + LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + return (LastExt - FirstExt + 1ULL); } -/// Clusterify - Transform simple list of Cases into list of CaseRange's -size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - size_t numCmps = 0; +/// handleJTSwitchCase - Emit jumptable for current switch case range +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB) { + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); - // Start with "simple" cases - for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; - Cases.push_back(Case(SI.getSuccessorValue(i), - SI.getSuccessorValue(i), - SMBB)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); + const APInt &First = cast(FrontCase.Low)->getValue(); + const APInt &Last = cast(BackCase.High)->getValue(); - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { - const APInt& nextValue = cast(J->Low)->getValue(); - const APInt& currentValue = cast(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - J = Cases.erase(J); - } else { - I = J++; - } - } + if (!areJTsAllowed(TLI) || TSize.ult(4)) + return false; - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; - } + APInt Range = ComputeRange(First, Last); + double Density = TSize.roundToDouble() / Range.roundToDouble(); + if (Density < 0.4) + return false; - return numCmps; -} + DEBUG(dbgs() << "Lowering jump table\n" + << "First entry: " << First << ". Last entry: " << Last << '\n' + << "Range: " << Range + << "Size: " << TSize << ". Density: " << Density << "\n\n"); -void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; - MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; - // If there is only the default destination, branch to it if it is not the - // next basic block. Otherwise, just fall through. - if (SI.getNumOperands() == 2) { - // Update machine-CFG edges. + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); - // If this is not a fall-through branch, emit the branch. - SwitchMBB->addSuccessor(Default); - if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default))); + // Create a new basic block to hold the code for loading the address + // of the jump table, and jumping to it. Update successor information; + // we will either branch to the default case for the switch, or the jump + // table. + MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, JumpTableBB); + CR.CaseBB->addSuccessor(Default); + CR.CaseBB->addSuccessor(JumpTableBB); - return; + // Build a vector of destination BBs, corresponding to each target + // of the jump table. If the value of the jump table slot corresponds to + // a case statement, push the case's BB onto the vector, otherwise, push + // the default BB. + std::vector DestBBs; + APInt TEI = First; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { + const APInt &Low = cast(I->Low)->getValue(); + const APInt &High = cast(I->High)->getValue(); + + if (Low.sle(TEI) && TEI.sle(High)) { + DestBBs.push_back(I->BB); + if (TEI==High) + ++I; + } else { + DestBBs.push_back(Default); + } } - // If there are any non-default case statements, create a vector of Cases - // representing each one, and sort the vector so that we can efficiently - // create a binary search tree from them. - CaseVector Cases; - size_t numCmps = Clusterify(Cases, SI); - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'); - numCmps = 0; + // Update successor info. Add one edge to each unique successor. + BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); + for (std::vector::iterator I = DestBBs.begin(), + E = DestBBs.end(); I != E; ++I) { + if (!SuccsHandled[(*I)->getNumber()]) { + SuccsHandled[(*I)->getNumber()] = true; + JumpTableBB->addSuccessor(*I); + } + } - // Get the Value to be switched on and default basic blocks, which will be - // inserted into CaseBlock records, representing basic blocks in the binary - // search tree. - const Value *SV = SI.getOperand(0); + // Create a jump table index for this jump table. + unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) + ->createJumpTableIndex(DestBBs); - // Push the initial CaseRec onto the worklist - CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,0,0, - CaseRange(Cases.begin(),Cases.end()))); + // Set the jump table information so that we can codegen it as a second + // MachineBasicBlock + JumpTable JT(-1U, JTI, JumpTableBB, Default); + JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); + if (CR.CaseBB == SwitchBB) + visitJumpTableHeader(JT, JTH, SwitchBB); - while (!WorkList.empty()) { - // Grab a record representing a case range to process off the worklist - CaseRec CR = WorkList.back(); - WorkList.pop_back(); + JTCases.push_back(JumpTableBlock(JTH, JT)); - if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; + return true; +} - // If the range has few cases (two or less) emit a series of specific - // tests. - if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) - continue; +/// handleBTSplitSwitchCase - emit comparison and split binary search tree into +/// 2 subtrees. +bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock *Default, + MachineBasicBlock *SwitchBB) { + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; - // If the switch has more than 5 blocks, and at least 40% dense, and the - // target supports indirect branches, then emit a jump table rather than - // lowering the switch to a binary tree of conditional branches. - if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) - continue; + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; - // Emit binary tree. We need to pick a pivot, and push left and right ranges - // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); - } -} + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); -void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + // Size is the number of Cases represented by this range. + unsigned Size = CR.Range.second - CR.Range.first; - // Update machine-CFG edges with unique successors. - SmallVector succs; - succs.reserve(I.getNumSuccessors()); - for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) - succs.push_back(I.getSuccessor(i)); - array_pod_sort(succs.begin(), succs.end()); - succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + const APInt &First = cast(FrontCase.Low)->getValue(); + const APInt &Last = cast(BackCase.High)->getValue(); + double FMetric = 0; + CaseItr Pivot = CR.Range.first + Size/2; - DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), - MVT::Other, getControlRoot(), - getValue(I.getAddress()))); -} + // Select optimal pivot, maximizing sum density of LHS and RHS. This will + // (heuristically) allow us to emit JumpTable's later. + APInt TSize(First.getBitWidth(), 0); + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) + TSize += I->size(); -void SelectionDAGBuilder::visitFSub(const User &I) { - // -0.0 - X --> fneg - const Type *Ty = I.getType(); - if (Ty->isVectorTy()) { - if (ConstantVector *CV = dyn_cast(I.getOperand(0))) { - const VectorType *DestTy = cast(I.getType()); - const Type *ElTy = DestTy->getElementType(); - unsigned VL = DestTy->getNumElements(); - std::vector NZ(VL, ConstantFP::getNegativeZero(ElTy)); - Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); - if (CV == CNZ) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } + APInt LSize = FrontCase.size(); + APInt RSize = TSize-LSize; + DEBUG(dbgs() << "Selecting best pivot: \n" + << "First: " << First << ", Last: " << Last <<'\n' + << "LSize: " << LSize << ", RSize: " << RSize << '\n'); + for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; + J!=E; ++I, ++J) { + const APInt &LEnd = cast(I->High)->getValue(); + const APInt &RBegin = cast(J->Low)->getValue(); + APInt Range = ComputeRange(LEnd, RBegin); + assert((Range - 2ULL).isNonNegative() && + "Invalid case distance"); + double LDensity = (double)LSize.roundToDouble() / + (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize.roundToDouble() / + (Last - RBegin + 1ULL).roundToDouble(); + double Metric = Range.logBase2()*(LDensity+RDensity); + // Should always split in some non-trivial place + DEBUG(dbgs() <<"=>Step\n" + << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' + << "LDensity: " << LDensity + << ", RDensity: " << RDensity << '\n' + << "Metric: " << Metric << '\n'); + if (FMetric < Metric) { + Pivot = J; + FMetric = Metric; + DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); } + + LSize += J->size(); + RSize -= J->size(); + } + if (areJTsAllowed(TLI)) { + // If our case is dense we *really* should handle it earlier! + assert((FMetric > 0) && "Should handle dense range earlier!"); + } else { + Pivot = CR.Range.first + Size/2; } - if (ConstantFP *CFP = dyn_cast(I.getOperand(0))) - if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } + CaseRange LHSR(CR.Range.first, Pivot); + CaseRange RHSR(Pivot, CR.Range.second); + Constant *C = Pivot->Low; + MachineBasicBlock *FalseBB = 0, *TrueBB = 0; - visitBinary(I, ISD::FSUB); -} + // We know that we branch to the LHS if the Value being switched on is + // less than the Pivot value, C. We use this to optimize our binary + // tree a bit, by recognizing that if SV is greater than or equal to the + // LHS's Case Value, and that Case Value is exactly one less than the + // Pivot's Value, then we can branch directly to the LHS's Target, + // rather than creating a leaf node for it. + if ((LHSR.second - LHSR.first) == 1 && + LHSR.first->High == CR.GE && + cast(C)->getValue() == + (cast(CR.GE)->getValue() + 1LL)) { + TrueBB = LHSR.first->BB; + } else { + TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, TrueBB); + WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); -void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), - Op1.getValueType(), Op1, Op2)); -} + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); + } -void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - if (!I.getType()->isVectorTy() && - Op2.getValueType() != TLI.getShiftAmountTy()) { - // If the operand is smaller than the shift count type, promote it. - EVT PTy = TLI.getPointerTy(); - EVT STy = TLI.getShiftAmountTy(); - if (STy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); - // If the operand is larger than the shift count type but the shift - // count type has enough bits to represent any shift value, truncate - // it now. This is a common case and it exposes the truncate to - // optimization early. - else if (STy.getSizeInBits() >= - Log2_32_Ceil(Op2.getValueType().getSizeInBits())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); - // Otherwise we'll need to temporarily settle for some other - // convenient type; type legalization will make adjustments as - // needed. - else if (PTy.bitsLT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), Op2); - else if (PTy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), Op2); + // Similar to the optimization above, if the Value being switched on is + // known to be less than the Constant CR.LT, and the current Case Value + // is CR.LT - 1, then we can branch directly to the target block for + // the current Case Value, rather than emitting a RHS leaf node for it. + if ((RHSR.second - RHSR.first) == 1 && CR.LT && + cast(RHSR.first->Low)->getValue() == + (cast(CR.LT)->getValue() - 1LL)) { + FalseBB = RHSR.first->BB; + } else { + FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, FalseBB); + WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); } - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), - Op1.getValueType(), Op1, Op2)); -} + // Create a CaseBlock record representing a conditional branch to + // the LHS node if the value being switched on SV is less than C. + // Otherwise, branch to LHS. + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); -void SelectionDAGBuilder::visitICmp(const User &I) { - ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (const ICmpInst *IC = dyn_cast(&I)) - predicate = IC->getPredicate(); - else if (const ConstantExpr *IC = dyn_cast(&I)) - predicate = ICmpInst::Predicate(IC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Opcode = getICmpCondCode(predicate); + if (CR.CaseBB == SwitchBB) + visitSwitchCase(CB, SwitchBB); + else + SwitchCases.push_back(CB); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); + return true; } -void SelectionDAGBuilder::visitFCmp(const User &I) { - FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (const FCmpInst *FC = dyn_cast(&I)) - predicate = FC->getPredicate(); - else if (const ConstantExpr *FC = dyn_cast(&I)) - predicate = FCmpInst::Predicate(FC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Condition = getFCmpCondCode(predicate); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); -} +/// handleBitTestsSwitchCase - if current case range has few destination and +/// range span less, than machine word bitwidth, encode case range into series +/// of masks and emit bit tests with these masks. +bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + const Value* SV, + MachineBasicBlock* Default, + MachineBasicBlock *SwitchBB){ + EVT PTy = TLI.getPointerTy(); + unsigned IntPtrBits = PTy.getSizeInBits(); -void SelectionDAGBuilder::visitSelect(const User &I) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; + Case& FrontCase = *CR.Range.first; + Case& BackCase = *(CR.Range.second-1); - SmallVector Values(NumValues); - SDValue Cond = getValue(I.getOperand(0)); - SDValue TrueVal = getValue(I.getOperand(1)); - SDValue FalseVal = getValue(I.getOperand(2)); + // Get the MachineFunction which holds the current MBB. This is used when + // inserting any additional MBBs necessary to represent the switch. + MachineFunction *CurMF = FuncInfo.MF; - for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), - Cond, - SDValue(TrueVal.getNode(), - TrueVal.getResNo() + i), - SDValue(FalseVal.getNode(), - FalseVal.getResNo() + i)); + // If target does not have legal shift left, do not emit bit tests at all. + if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + return false; - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); -} + size_t numCmps = 0; + for (CaseItr I = CR.Range.first, E = CR.Range.second; + I!=E; ++I) { + // Single case counts one, case range - two. + numCmps += (I->Low == I->High ? 1 : 2); + } -void SelectionDAGBuilder::visitTrunc(const User &I) { - // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); -} + // Count unique destinations + SmallSet Dests; + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + Dests.insert(I->BB); + if (Dests.size() > 3) + // Don't bother the code below, if there are too much unique destinations + return false; + } + DEBUG(dbgs() << "Total number of unique destinations: " + << Dests.size() << '\n' + << "Total number of comparisons: " << numCmps << '\n'); -void SelectionDAGBuilder::visitZExt(const User &I) { - // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). - // ZExt also can't be a cast to bool for same reason. So, nothing much to do - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); -} + // Compute span of values. + const APInt& minValue = cast(FrontCase.Low)->getValue(); + const APInt& maxValue = cast(BackCase.High)->getValue(); + APInt cmpRange = maxValue - minValue; -void SelectionDAGBuilder::visitSExt(const User &I) { - // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). - // SExt also can't be a cast to bool for same reason. So, nothing much to do - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); -} + DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' + << "Low bound: " << minValue << '\n' + << "High bound: " << maxValue << '\n'); -void SelectionDAGBuilder::visitFPTrunc(const User &I) { - // FPTrunc is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), - DestVT, N, DAG.getIntPtrConstant(0))); -} + if (cmpRange.uge(IntPtrBits) || + (!(Dests.size() == 1 && numCmps >= 3) && + !(Dests.size() == 2 && numCmps >= 5) && + !(Dests.size() >= 3 && numCmps >= 6))) + return false; -void SelectionDAGBuilder::visitFPExt(const User &I){ - // FPTrunc is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); -} + DEBUG(dbgs() << "Emitting bit tests\n"); + APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); -void SelectionDAGBuilder::visitFPToUI(const User &I) { - // FPToUI is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); -} + // Optimize the case where all the case values fit in a + // word without having to subtract minValue. In this case, + // we can optimize away the subtraction. + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { + cmpRange = maxValue; + } else { + lowBound = minValue; + } -void SelectionDAGBuilder::visitFPToSI(const User &I) { - // FPToSI is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); -} + CaseBitsVector CasesBits; + unsigned i, count = 0; -void SelectionDAGBuilder::visitUIToFP(const User &I) { - // UIToFP is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); -} + for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + MachineBasicBlock* Dest = I->BB; + for (i = 0; i < count; ++i) + if (Dest == CasesBits[i].BB) + break; -void SelectionDAGBuilder::visitSIToFP(const User &I){ - // SIToFP is never a no-op cast, no need to check - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); -} + if (i == count) { + assert((count < 3) && "Too much destinations to test!"); + CasesBits.push_back(CaseBits(0, Dest, 0)); + count++; + } -void SelectionDAGBuilder::visitPtrToInt(const User &I) { - // What to do depends on the size of the integer and the size of the pointer. - // We can either truncate, zero extend, or no-op, accordingly. - SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); -} + const APInt& lowValue = cast(I->Low)->getValue(); + const APInt& highValue = cast(I->High)->getValue(); -void SelectionDAGBuilder::visitIntToPtr(const User &I) { - // What to do depends on the size of the integer and the size of the pointer. - // We can either truncate, zero extend, or no-op, accordingly. - SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); -} + uint64_t lo = (lowValue - lowBound).getZExtValue(); + uint64_t hi = (highValue - lowBound).getZExtValue(); -void SelectionDAGBuilder::visitBitCast(const User &I) { - SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + for (uint64_t j = lo; j <= hi; j++) { + CasesBits[i].Mask |= 1ULL << j; + CasesBits[i].Bits++; + } - // BitCast assures us that source and destination are the same size so this is - // either a BIT_CONVERT or a no-op. - if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), - DestVT, N)); // convert types. - else - setValue(&I, N); // noop cast. -} + } + std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); -void SelectionDAGBuilder::visitInsertElement(const User &I) { - SDValue InVec = getValue(I.getOperand(0)); - SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(2))); - setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), - InVec, InVal, InIdx)); -} + BitTestInfo BTC; -void SelectionDAGBuilder::visitExtractElement(const User &I) { - SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); -} + // Figure out which block is immediately after the current one. + MachineFunction::iterator BBI = CR.CaseBB; + ++BBI; -// Utility for visitShuffleVector - Returns true if the mask is mask starting -// from SIndx and increasing to the element length (undefs are allowed). -static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { - unsigned MaskNumElts = Mask.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) - if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) - return false; - return true; -} + const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); -void SelectionDAGBuilder::visitShuffleVector(const User &I) { - SmallVector Mask; - SDValue Src1 = getValue(I.getOperand(0)); - SDValue Src2 = getValue(I.getOperand(1)); + DEBUG(dbgs() << "Cases:\n"); + for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { + DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask + << ", Bits: " << CasesBits[i].Bits + << ", BB: " << CasesBits[i].BB << '\n'); - // Convert the ConstantVector mask operand into an array of ints, with -1 - // representing undef values. - SmallVector MaskElts; - cast(I.getOperand(2))->getVectorElements(MaskElts); - unsigned MaskNumElts = MaskElts.size(); - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (isa(MaskElts[i])) - Mask.push_back(-1); - else - Mask.push_back(cast(MaskElts[i])->getSExtValue()); + MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); + CurMF->insert(BBI, CaseBB); + BTC.push_back(BitTestCase(CasesBits[i].Mask, + CaseBB, + CasesBits[i].BB)); + + // Put SV in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(SV); } - EVT VT = TLI.getValueType(I.getType()); - EVT SrcVT = Src1.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); + BitTestBlock BTB(lowBound, cmpRange, SV, + -1U, (CR.CaseBB == SwitchBB), + CR.CaseBB, Default, BTC); - if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &Mask[0])); - return; - } + if (CR.CaseBB == SwitchBB) + visitBitTestHeader(BTB, SwitchBB); - // Normalize the shuffle vector since mask and vector length don't match. - if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { - // Mask is longer than the source vectors and is a multiple of the source - // vectors. We can use concatenate vector to make the mask and vectors - // lengths match. - if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), - VT, Src1, Src2)); - return; - } + BitTestCases.push_back(BTB); - // Pad both vectors with undefs to make them the same length as the mask. - unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.getOpcode() == ISD::UNDEF; - bool Src2U = Src2.getOpcode() == ISD::UNDEF; - SDValue UndefVal = DAG.getUNDEF(SrcVT); + return true; +} - SmallVector MOps1(NumConcat, UndefVal); - SmallVector MOps2(NumConcat, UndefVal); - MOps1[0] = Src1; - MOps2[0] = Src2; +/// Clusterify - Transform simple list of Cases into list of CaseRange's +size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { + size_t numCmps = 0; - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, - &MOps1[0], NumConcat); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, - &MOps2[0], NumConcat); + // Start with "simple" cases + for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + Cases.push_back(Case(SI.getSuccessorValue(i), + SI.getSuccessorValue(i), + SMBB)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); - // Readjust mask for new input vector length. - SmallVector MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + const APInt& nextValue = cast(J->Low)->getValue(); + const APInt& currentValue = cast(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + J = Cases.erase(J); + } else { + I = J++; + } } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0])); - return; + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } - if (SrcNumElts > MaskNumElts) { - // Analyze the access pattern of the vector to see if we can extract - // two subvectors and do the shuffle. The analysis is done by calculating - // the range of elements the mask access on both vectors. - int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; - int MaxRange[2] = {-1, -1}; + return numCmps; +} - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - int Input = 0; - if (Idx < 0) - continue; +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; - if (Idx >= (int)SrcNumElts) { - Input = 1; - Idx -= SrcNumElts; - } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } + // Figure out which block is immediately after the current one. + MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - // Check if the access is smaller than the vector size and can we find - // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. - int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { - RangeUse[Input] = 0; // Unused - StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts < SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } - } - } - - if (RangeUse[0] == 0 && RangeUse[1] == 0) { - setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. - return; - } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { - // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { - SDValue &Src = Input == 0 ? Src1 : Src2; - if (RangeUse[Input] == 0) - Src = DAG.getUNDEF(VT); - else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, - Src, DAG.getIntPtrConstant(StartIdx[Input])); - } + // If there is only the default destination, branch to it if it is not the + // next basic block. Otherwise, just fall through. + if (SI.getNumOperands() == 2) { + // Update machine-CFG edges. - // Calculate new mask. - SmallVector MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); - } + // If this is not a fall-through branch, emit the branch. + SwitchMBB->addSuccessor(Default); + if (Default != NextBlock) + DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Default))); - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, - &MappedOps[0])); - return; - } + return; } - // We can't use either concat vectors or extract subvectors so fall back to - // replacing the shuffle with extract and build vector. - // to insert and build vector. - EVT EltVT = VT.getVectorElementType(); - EVT PtrVT = TLI.getPointerTy(); - SmallVector Ops; - for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; - - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); - - Ops.push_back(Res); - } - } + // If there are any non-default case statements, create a vector of Cases + // representing each one, and sort the vector so that we can efficiently + // create a binary search tree from them. + CaseVector Cases; + size_t numCmps = Clusterify(Cases, SI); + DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'); + numCmps = 0; - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), - VT, &Ops[0], Ops.size())); -} + // Get the Value to be switched on and default basic blocks, which will be + // inserted into CaseBlock records, representing basic blocks in the binary + // search tree. + const Value *SV = SI.getOperand(0); -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { - const Value *Op0 = I.getOperand(0); - const Value *Op1 = I.getOperand(1); - const Type *AggTy = I.getType(); - const Type *ValTy = Op1->getType(); - bool IntoUndef = isa(Op0); - bool FromUndef = isa(Op1); + // Push the initial CaseRec onto the worklist + CaseRecVector WorkList; + WorkList.push_back(CaseRec(SwitchMBB,0,0, + CaseRange(Cases.begin(),Cases.end()))); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + while (!WorkList.empty()) { + // Grab a record representing a case range to process off the worklist + CaseRec CR = WorkList.back(); + WorkList.pop_back(); - SmallVector AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); - SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; - unsigned NumAggValues = AggValueVTs.size(); - unsigned NumValValues = ValValueVTs.size(); - SmallVector Values(NumAggValues); + // If the range has few cases (two or less) emit a series of specific + // tests. + if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) + continue; - SDValue Agg = getValue(Op0); - SDValue Val = getValue(Op1); - unsigned i = 0; - // Copy the beginning value(s) from the original aggregate. - for (; i != LinearIndex; ++i) - Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Agg.getNode(), Agg.getResNo() + i); - // Copy values from the inserted value(s). - for (; i != LinearIndex + NumValValues; ++i) - Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); - // Copy remaining value(s) from the original aggregate. - for (; i != NumAggValues; ++i) - Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Agg.getNode(), Agg.getResNo() + i); + // If the switch has more than 5 blocks, and at least 40% dense, and the + // target supports indirect branches, then emit a jump table rather than + // lowering the switch to a binary tree of conditional branches. + if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) + continue; - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&AggValueVTs[0], NumAggValues), - &Values[0], NumAggValues)); + // Emit binary tree. We need to pick a pivot, and push left and right ranges + // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. + handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + } } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { - const Value *Op0 = I.getOperand(0); - const Type *AggTy = Op0->getType(); - const Type *ValTy = I.getType(); - bool OutOfUndef = isa(Op0); +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + // Update machine-CFG edges with unique successors. + SmallVector succs; + succs.reserve(I.getNumSuccessors()); + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) + succs.push_back(I.getSuccessor(i)); + array_pod_sort(succs.begin(), succs.end()); + succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); + for (unsigned i = 0, e = succs.size(); i != e; ++i) + IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); - SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); +} - unsigned NumValValues = ValValueVTs.size(); - SmallVector Values(NumValValues); +void SelectionDAGBuilder::visitFSub(const User &I) { + // -0.0 - X --> fneg + const Type *Ty = I.getType(); + if (Ty->isVectorTy()) { + if (ConstantVector *CV = dyn_cast(I.getOperand(0))) { + const VectorType *DestTy = cast(I.getType()); + const Type *ElTy = DestTy->getElementType(); + unsigned VL = DestTy->getNumElements(); + std::vector NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } + } + } - SDValue Agg = getValue(Op0); - // Copy out the selected value(s). - for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) - Values[i - LinearIndex] = - OutOfUndef ? - DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : - SDValue(Agg.getNode(), Agg.getResNo() + i); + if (ConstantFP *CFP = dyn_cast(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&ValValueVTs[0], NumValValues), - &Values[0], NumValValues)); + visitBinary(I, ISD::FSUB); } -void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); - const Type *Ty = I.getOperand(0)->getType(); - - for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); - OI != E; ++OI) { - const Value *Idx = *OI; - if (const StructType *StTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getZExtValue(); - if (Field) { - // N = N + Offset - uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, - DAG.getIntPtrConstant(Offset)); - } - - Ty = StTy->getElementType(Field); - } else if (const UnionType *UnTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getZExtValue(); - - // Offset canonically 0 for unions, but type changes - Ty = UnTy->getElementType(Field); - } else { - Ty = cast(Ty)->getElementType(); +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); +} - // If this is a constant subscript, handle it quickly. - if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getZExtValue() == 0) continue; - uint64_t Offs = - TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); - SDValue OffsVal; - EVT PTy = TLI.getPointerTy(); - unsigned PtrBits = PTy.getSizeInBits(); - if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(Offs, MVT::i64)); - else - OffsVal = DAG.getIntPtrConstant(Offs); - - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, - OffsVal); - continue; - } - - // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), - TD->getTypeAllocSize(Ty)); - SDValue IdxN = getValue(Idx); - - // If the index is smaller or larger than intptr_t, truncate or extend - // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); - - // If this is a multiply by a power of two, turn it into a shl - // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), - N.getValueType(), IdxN, - DAG.getConstant(Amt, TLI.getPointerTy())); - } else { - SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); - IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), - N.getValueType(), IdxN, Scale); - } - } - - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), - N.getValueType(), N, IdxN); - } +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + if (!I.getType()->isVectorTy() && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + EVT PTy = TLI.getPointerTy(); + EVT STy = TLI.getShiftAmountTy(); + if (STy.bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (STy.getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (PTy.bitsLT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), Op2); + else if (PTy.bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), Op2); } - setValue(&I, N); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + Op1.getValueType(), Op1, Op2)); } -void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { - // If this is a fixed sized alloca in the entry block of the function, - // allocate it statically on the stack. - if (FuncInfo.StaticAllocaMap.count(&I)) - return; // getValue will auto-populate this. - - const Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), - I.getAlignment()); - - SDValue AllocSize = getValue(I.getArraySize()); - - EVT IntPtr = TLI.getPointerTy(); - if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); - - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, - AllocSize, - DAG.getConstant(TySize, IntPtr)); - - // Handle alignment. If the requested alignment is less than or equal to - // the stack alignment, ignore it. If the size is greater than or equal to - // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); - if (Align <= StackAlign) - Align = 0; - - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign-1)); - - // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); - - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; - SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), - VTs, Ops, 3); - setValue(&I, DSA); - DAG.setRoot(DSA.getValue(1)); +void SelectionDAGBuilder::visitICmp(const User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (const ICmpInst *IC = dyn_cast(&I)) + predicate = IC->getPredicate(); + else if (const ConstantExpr *IC = dyn_cast(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); - // Inform the Frame Information that we have just allocated a variable-sized - // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } -void SelectionDAGBuilder::visitLoad(const LoadInst &I) { - const Value *SV = I.getOperand(0); - SDValue Ptr = getValue(SV); - - const Type *Ty = I.getType(); - - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; - unsigned Alignment = I.getAlignment(); +void SelectionDAGBuilder::visitFCmp(const User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (const FCmpInst *FC = dyn_cast(&I)) + predicate = FC->getPredicate(); + else if (const ConstantExpr *FC = dyn_cast(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); +} +void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; - SmallVector Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) - return; - - SDValue Root; - bool ConstantMemory = false; - if (I.isVolatile()) - // Serialize volatile loads with other side effects. - Root = getRoot(); - else if (AA->pointsToConstantMemory(SV)) { - // Do not serialize (non-volatile) loads of constant memory with anything. - Root = DAG.getEntryNode(); - ConstantMemory = true; - } else { - // Do not serialize non-volatile loads against each other. - Root = DAG.getRoot(); - } + if (NumValues == 0) return; SmallVector Values(NumValues); - SmallVector Chains(NumValues); - EVT PtrVT = Ptr.getValueType(); - for (unsigned i = 0; i != NumValues; ++i) { - SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), - PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, - isNonTemporal, Alignment); - - Values[i] = L; - Chains[i] = L.getValue(1); - } + SDValue Cond = getValue(I.getOperand(0)); + SDValue TrueVal = getValue(I.getOperand(1)); + SDValue FalseVal = getValue(I.getOperand(2)); - if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); - if (isVolatile) - DAG.setRoot(Chain); - else - PendingLoads.push_back(Chain); - } + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), + TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), + Cond, + SDValue(TrueVal.getNode(), + TrueVal.getResNo() + i), + SDValue(FalseVal.getNode(), + FalseVal.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } -void SelectionDAGBuilder::visitStore(const StoreInst &I) { - const Value *SrcV = I.getOperand(0); - const Value *PtrV = I.getOperand(1); +void SelectionDAGBuilder::visitTrunc(const User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); +} - SmallVector ValueVTs; - SmallVector Offsets; - ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) - return; +void SelectionDAGBuilder::visitZExt(const User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); +} - // Get the lowered operands. Note that we do this after - // checking if NumResults is zero, because with zero results - // the operands won't have values in the map. - SDValue Src = getValue(SrcV); - SDValue Ptr = getValue(PtrV); +void SelectionDAGBuilder::visitSExt(const User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); +} - SDValue Root = getRoot(); - SmallVector Chains(NumValues); - EVT PtrVT = Ptr.getValueType(); - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; - unsigned Alignment = I.getAlignment(); +void SelectionDAGBuilder::visitFPTrunc(const User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + DestVT, N, DAG.getIntPtrConstant(0))); +} - for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)); - Chains[i] = DAG.getStore(Root, getCurDebugLoc(), - SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, - isNonTemporal, Alignment); - } - - DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues)); +void SelectionDAGBuilder::visitFPExt(const User &I){ + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } -/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC -/// node. -void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { - bool HasChain = !I.doesNotAccessMemory(); - bool OnlyLoad = HasChain && I.onlyReadsMemory(); - - // Build the operand list. - SmallVector Ops; - if (HasChain) { // If this intrinsic has side-effects, chainify it. - if (OnlyLoad) { - // We don't need to serialize loads against other loads. - Ops.push_back(DAG.getRoot()); - } else { - Ops.push_back(getRoot()); - } - } - - // Info is set by getTgtMemInstrinsic - TargetLowering::IntrinsicInfo Info; - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); - - // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic) - Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); - - // Add all operands of the call to the operand list. - for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { - SDValue Op = getValue(I.getOperand(i)); - assert(TLI.isTypeLegal(Op.getValueType()) && - "Intrinsic uses a non-legal type?"); - Ops.push_back(Op); - } +void SelectionDAGBuilder::visitFPToUI(const User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); +} - SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); -#ifndef NDEBUG - for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { - assert(TLI.isTypeLegal(ValueVTs[Val]) && - "Intrinsic uses a non-legal type?"); - } -#endif // NDEBUG +void SelectionDAGBuilder::visitFPToSI(const User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); +} - if (HasChain) - ValueVTs.push_back(MVT::Other); +void SelectionDAGBuilder::visitUIToFP(const User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} - SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); +void SelectionDAGBuilder::visitSIToFP(const User &I){ + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); +} - // Create the node. - SDValue Result; - if (IsTgtIntrinsic) { - // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), - VTs, &Ops[0], Ops.size(), - Info.memVT, Info.ptrVal, Info.offset, - Info.align, Info.vol, - Info.readMem, Info.writeMem); - } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), - VTs, &Ops[0], Ops.size()); - } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), - VTs, &Ops[0], Ops.size()); - } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), - VTs, &Ops[0], Ops.size()); - } +void SelectionDAGBuilder::visitPtrToInt(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} - if (HasChain) { - SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); - if (OnlyLoad) - PendingLoads.push_back(Chain); - else - DAG.setRoot(Chain); - } +void SelectionDAGBuilder::visitIntToPtr(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); +} - if (!I.getType()->isVoidTy()) { - if (const VectorType *PTy = dyn_cast(I.getType())) { - EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); - } +void SelectionDAGBuilder::visitBitCast(const User &I) { + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, Result); - } + // BitCast assures us that source and destination are the same size so this is + // either a BIT_CONVERT or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + DestVT, N)); // convert types. + else + setValue(&I, N); // noop cast. } -/// GetSignificand - Get the significand and build it into a floating-point -/// number with exponent of 1: -/// -/// Op = (Op & 0x007fffff) | 0x3f800000; -/// -/// where Op is the hexidecimal representation of floating point value. -static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { - SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x007fffff, MVT::i32)); - SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, - DAG.getConstant(0x3f800000, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +void SelectionDAGBuilder::visitInsertElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(2))); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), + InVec, InVal, InIdx)); } -/// GetExponent - Get the exponent: -/// -/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); -/// -/// where Op is the hexidecimal representation of floating point value. -static SDValue -GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - DebugLoc dl) { - SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, - DAG.getConstant(0x7f800000, MVT::i32)); - SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, TLI.getPointerTy())); - SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, - DAG.getConstant(127, MVT::i32)); - return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +void SelectionDAGBuilder::visitExtractElement(const User &I) { + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), + TLI.getPointerTy(), + getValue(I.getOperand(1))); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + TLI.getValueType(I.getType()), InVec, InIdx)); } -/// getF32Constant - Get 32-bit floating point constant. -static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +// Utility for visitShuffleVector - Returns true if the mask is mask starting +// from SIndx and increasing to the element length (undefs are allowed). +static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { + unsigned MaskNumElts = Mask.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) + if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx))) + return false; + return true; } -/// Inlined utility function to implement binary input atomic intrinsics for -/// visitIntrinsicCall: I is a call instruction -/// Op is the associated NodeType for I -const char * -SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, - ISD::NodeType Op) { - SDValue Root = getRoot(); - SDValue L = - DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), - Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - I.getOperand(1)); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; -} +void SelectionDAGBuilder::visitShuffleVector(const User &I) { + SmallVector Mask; + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); + // Convert the ConstantVector mask operand into an array of ints, with -1 + // representing undef values. + SmallVector MaskElts; + cast(I.getOperand(2))->getVectorElements(MaskElts); + unsigned MaskNumElts = MaskElts.size(); + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (isa(MaskElts[i])) + Mask.push_back(-1); + else + Mask.push_back(cast(MaskElts[i])->getSExtValue()); + } - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} + EVT VT = TLI.getValueType(I.getType()); + EVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); -/// visitExp - Lower an exp intrinsic. Handles the special sequences for -/// limited-precision mode. -void -SelectionDAGBuilder::visitExp(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &Mask[0])); + return; + } - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && - LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + VT, Src1, Src2)); + return; + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector MOps1(NumConcat, UndefVal); + SmallVector MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps1[0], NumConcat); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurDebugLoc(), VT, + &MOps2[0], NumConcat); + + // Readjust mask for new input vector length. + SmallVector MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx); + else + MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + int Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not + // Extract. + int StartIdx[2]; // StartIdx to extract from + for (int Input=0; Input < 2; ++Input) { + if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { + // Fits within range but we should see if we can find a good + // start index that is a multiple of the mask length. + if (MaxRange[Input] < (int)MaskNumElts) { + RangeUse[Input] = 1; // Extract from beginning of the vector + StartIdx[Input] = 0; + } else { + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts < SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + } + } + + if (RangeUse[0] == 0 && RangeUse[1] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + // Extract appropriate subvector and generate a vector shuffle + for (int Input=0; Input < 2; ++Input) { + SDValue &Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) + Src = DAG.getUNDEF(VT); + else + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, + Src, DAG.getIntPtrConstant(StartIdx[Input])); + } + + // Calculate new mask. + SmallVector MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + MappedOps.push_back(Idx); + else if (Idx < (int)SrcNumElts) + MappedOps.push_back(Idx - StartIdx[0]); + else + MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + EVT EltVT = VT.getVectorElementType(); + EVT PtrVT = TLI.getPointerTy(); + SmallVector Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + } else { + int Idx = Mask[i]; + SDValue Res; + + if (Idx < (int)SrcNumElts) + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src1, DAG.getConstant(Idx, PtrVT)); + else + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src2, + DAG.getConstant(Idx - SrcNumElts, PtrVT)); + + Ops.push_back(Res); + } + } + + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + VT, &Ops[0], Ops.size())); +} + +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + const Type *AggTy = I.getType(); + const Type *ValTy = Op1->getType(); + bool IntoUndef = isa(Op0); + bool FromUndef = isa(Op1); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector AggValueVTs; + ComputeValueVTs(TLI, AggTy, AggValueVTs); + SmallVector ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector Values(NumAggValues); + + SDValue Agg = getValue(Op0); + SDValue Val = getValue(Op1); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&AggValueVTs[0], NumAggValues), + &Values[0], NumAggValues)); +} + +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Type *AggTy = Op0->getType(); + const Type *ValTy = I.getType(); + bool OutOfUndef = isa(Op0); + + unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, + I.idx_begin(), I.idx_end()); + + SmallVector ValValueVTs; + ComputeValueVTs(TLI, ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + SmallVector Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValValueVTs[0], NumValValues), + &Values[0], NumValValues)); +} + +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { + SDValue N = getValue(I.getOperand(0)); + const Type *Ty = I.getOperand(0)->getType(); + + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + const Value *Idx = *OI; + if (const StructType *StTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + DAG.getIntPtrConstant(Offset)); + } + + Ty = StTy->getElementType(Field); + } else if (const UnionType *UnTy = dyn_cast(Ty)) { + unsigned Field = cast(Idx)->getZExtValue(); + + // Offset canonically 0 for unions, but type changes + Ty = UnTy->getElementType(Field); + } else { + Ty = cast(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const ConstantInt *CI = dyn_cast(Idx)) { + if (CI->getZExtValue() == 0) continue; + uint64_t Offs = + TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); + SDValue OffsVal; + EVT PTy = TLI.getPointerTy(); + unsigned PtrBits = PTy.getSizeInBits(); + if (PtrBits < 64) + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(Offs, MVT::i64)); + else + OffsVal = DAG.getIntPtrConstant(Offs); + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + OffsVal); + continue; + } + + // N = N + Idx * ElementSize; + APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + TD->getTypeAllocSize(Ty)); + SDValue IdxN = getValue(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + unsigned Amt = ElementSize.logBase2(); + IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + N.getValueType(), IdxN, + DAG.getConstant(Amt, TLI.getPointerTy())); + } else { + SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); + IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N.getValueType(), N, IdxN); + } + } + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + const Type *Ty = I.getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + EVT IntPtr = TLI.getPointerTy(); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. + AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign-1)); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + VTs, Ops, 3); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + // Inform the Frame Information that we have just allocated a variable-sized + // object. + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); +} + +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + const Type *Ty = I.getType(); + + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + unsigned Alignment = I.getAlignment(); + + SmallVector ValueVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (I.isVolatile()) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(SV)) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SmallVector Values(NumValues); + SmallVector Chains(NumValues); + EVT PtrVT = Ptr.getValueType(); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + A, SV, Offsets[i], isVolatile, + isNonTemporal, Alignment); + + Values[i] = L; + Chains[i] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], NumValues), + &Values[0], NumValues)); +} + +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); + + SmallVector ValueVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector Chains(NumValues); + EVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata("nontemporal") != 0; + unsigned Alignment = I.getAlignment(); + + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)); + Chains[i] = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, PtrV, Offsets[i], isVolatile, + isNonTemporal, Alignment); + } + + DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues)); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic) + Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); + + // Add all operands of the call to the operand list. + for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { + SDValue Op = getValue(I.getOperand(i)); + assert(TLI.isTypeLegal(Op.getValueType()) && + "Intrinsic uses a non-legal type?"); + Ops.push_back(Op); + } + + SmallVector ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); +#ifndef NDEBUG + for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { + assert(TLI.isTypeLegal(ValueVTs[Val]) && + "Intrinsic uses a non-legal type?"); + } +#endif // NDEBUG + + if (HasChain) + ValueVTs.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + VTs, &Ops[0], Ops.size(), + Info.memVT, Info.ptrVal, Info.offset, + Info.align, Info.vol, + Info.readMem, Info.writeMem); + } else if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + VTs, &Ops[0], Ops.size()); + } + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (!I.getType()->isVoidTy()) { + if (const VectorType *PTy = dyn_cast(I.getType())) { + EVT VT = TLI.getValueType(PTy); + Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + } + + setValue(&I, Result); + } +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, MVT::i32)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexidecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + DebugLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, MVT::i32)); + SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, TLI.getPointerTy())); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt) { + return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); +} + +/// Inlined utility function to implement binary input atomic intrinsics for +/// visitIntrinsicCall: I is a call instruction +/// Op is the associated NodeType for I +const char * +SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, + ISD::NodeType Op) { + SDValue Root = getRoot(); + SDValue L = + DAG.getAtomic(Op, getCurDebugLoc(), + getValue(I.getOperand(2)).getValueType().getSimpleVT(), + Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + I.getOperand(1)); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + +// implVisitAluOverflow - Lower arithmetic overflow instrinsics. +const char * +SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; +} + +/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +void +SelectionDAGBuilder::visitExp(const CallInst &I) { + SDValue result; + DebugLoc dl = getCurDebugLoc(); + + if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getOperand(1)); // Put the exponent in the right bit position for later addition to the @@ -4320,642 +4480,432 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::atomic_load_umax: return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); case Intrinsic::atomic_swap: - return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); - - case Intrinsic::invariant_start: - case Intrinsic::lifetime_start: - // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); - return 0; - case Intrinsic::invariant_end: - case Intrinsic::lifetime_end: - // Discard region information. - return 0; - } -} - -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { - const PointerType *PT = cast(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast(PT->getElementType()); - const Type *RetTy = FTy->getReturnType(); - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = 0; - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Args.reserve(CS.arg_size()); - - // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; - SmallVector Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); - - bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, DAG); - - SDValue DemoteStackSlot; - - if (!CanLowerReturn) { - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( - FTy->getReturnType()); - MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); - } - - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - SDValue ArgNode = getValue(*i); - Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); - - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.Alignment = CS.getParamAlignment(attrInd); - Args.push_back(Entry); - } - - if (LandingPad) { - // Insert a label before the invoke call to mark the try range. This can be - // used to detect deletion of the invoke via the MachineModuleInfo. - BeginLabel = MMI.getContext().CreateTempSymbol(); - - // For SjLj, keep track of which landing pads go with which invokes - // so as to maintain the ordering of pads in the LSDA. - unsigned CallSiteIndex = MMI.getCurrentCallSite(); - if (CallSiteIndex) { - MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); - // Now that the call site is handled, stop tracking it. - MMI.setCurrentCallSite(0); - } - - // Both PendingLoads and PendingExports must be flushed here; - // this call might not return. - (void)getRoot(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); - } - - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && - !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) - isTailCall = false; - - std::pair Result = - TLI.LowerCallTo(getRoot(), RetTy, - CS.paramHasAttr(0, Attribute::SExt), - CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), - CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), - CS.getCallingConv(), - isTailCall, - !CS.getInstruction()->use_empty(), - Callee, Args, DAG, getCurDebugLoc()); - assert((isTailCall || Result.second.getNode()) && - "Non-null chain expected with non-tail call!"); - assert((Result.second.getNode() || !Result.first.getNode()) && - "Null value expected with tail call!"); - if (Result.first.getNode()) { - setValue(CS.getInstruction(), Result.first); - } else if (!CanLowerReturn && Result.second.getNode()) { - // The instruction result is the result of loading from the - // hidden sret parameter. - SmallVector PVTs; - const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - - ComputeValueVTs(TLI, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); - SmallVector Values(NumValues); - SmallVector Chains(NumValues); - - for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, - DemoteStackSlot, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, false, 1); - Values[i] = L; - Chains[i] = L.getValue(1); - } - - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); - PendingLoads.push_back(Chain); - - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - SmallVector RetTys; - RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys); - ISD::NodeType AssertOp = ISD::DELETED_NODE; - SmallVector ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - - SDValue ReturnValue = - getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, - RegisterVT, VT, AssertOp); - ReturnValues.push_back(ReturnValue); - CurReg += NumRegs; - } - - setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), - DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size())); - - } - - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. - if (Result.second.getNode()) - DAG.setRoot(Result.second); - else - HasTailCall = true; - - if (LandingPad) { - // Insert a label at the end of the invoke call to mark the try range. This - // can be used to detect deletion of the invoke via the MachineModuleInfo. - MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); - - // Inform MachineModuleInfo of range. - MMI.addInvoke(LandingPad, BeginLabel, EndLabel); - } -} - -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - if (const ICmpInst *IC = dyn_cast(*UI)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - -static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - const Type *LoadTy, - SelectionDAGBuilder &Builder) { - - // Check to see if this load can be trivially constant folded, e.g. if the - // input is from a string literal. - if (const Constant *LoadInput = dyn_cast(PtrVal)) { - // Cast pointer to the type we really want to load. - LoadInput = ConstantExpr::getBitCast(const_cast(LoadInput), - PointerType::getUnqual(LoadTy)); - - if (const Constant *LoadCst = - ConstantFoldLoadFromConstPtr(const_cast(LoadInput), - Builder.TD)) - return Builder.getValue(LoadCst); - } - - // Otherwise, we have to emit the load. If the pointer is to unfoldable but - // still constant memory, the input chain can be the entry node. - SDValue Root; - bool ConstantMemory = false; + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); - // Do not serialize (non-volatile) loads of constant memory with anything. - if (Builder.AA->pointsToConstantMemory(PtrVal)) { - Root = Builder.DAG.getEntryNode(); - ConstantMemory = true; - } else { - // Do not serialize non-volatile loads against each other. - Root = Builder.DAG.getRoot(); + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + return 0; } - - SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, - Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, - false /*volatile*/, - false /*nontemporal*/, 1 /* align=1 */); - - if (!ConstantMemory) - Builder.PendingLoads.push_back(LoadVal.getValue(1)); - return LoadVal; } +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + const PointerType *PT = cast(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast(PT->getElementType()); + const Type *RetTy = FTy->getReturnType(); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = 0; -/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. -bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumOperands() != 4) - return false; + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); - const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); - if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getOperand(3)->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; + // Check whether the function can return without sret-demotion. + SmallVector OutVTs; + SmallVector OutsFlags; + SmallVector Offsets; + getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + OutVTs, OutsFlags, TLI, &Offsets); - const ConstantInt *Size = dyn_cast(I.getOperand(3)); + bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), + FTy->isVarArg(), OutVTs, OutsFlags, DAG); - // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 - // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { - bool ActuallyDoIt = true; - MVT LoadVT; - const Type *LoadTy; - switch (Size->getZExtValue()) { - default: - LoadVT = MVT::Other; - LoadTy = 0; - ActuallyDoIt = false; - break; - case 2: - LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); - break; - case 4: - LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); - break; - case 8: - LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); - break; - /* - case 16: - LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); - LoadTy = VectorType::get(LoadTy, 4); - break; - */ - } + SDValue DemoteStackSlot; - // This turns into unaligned loads. We only do this if the target natively - // supports the MVT we'll be loading or if it is small enough (<= 4) that - // we'll only produce a small number of byte loads. + if (!CanLowerReturn) { + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + FTy->getReturnType()); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + FTy->getReturnType()); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - // Require that we can find a legal MVT, and only do this if the target - // supports unaligned loads of that type. Expanding into byte loads would - // bloat the code. - if (ActuallyDoIt && Size->getZExtValue() > 4) { - // TODO: Handle 5 byte compare as 4-byte + 1 byte. - // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) - ActuallyDoIt = false; - } + DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.Alignment = Align; + Args.push_back(Entry); + RetTy = Type::getVoidTy(FTy->getContext()); + } - if (ActuallyDoIt) { - SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); - SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + SDValue ArgNode = getValue(*i); + Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, - ISD::SETNE); - EVT CallVT = TLI.getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); - return true; - } + unsigned attrInd = i - CS.arg_begin() + 1; + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); + Entry.Alignment = CS.getParamAlignment(attrInd); + Args.push_back(Entry); } + if (LandingPad) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().CreateTempSymbol(); - return false; -} + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + } -void SelectionDAGBuilder::visitCall(const CallInst &I) { - const char *RenameFn = 0; - if (Function *F = I.getCalledFunction()) { - if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); - if (II) { - if (unsigned IID = II->getIntrinsicID(F)) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; - } - } - if (unsigned IID = F->getIntrinsicID()) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; - } - } + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI.LowerCallTo. + if (isTailCall && + !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) + isTailCall = false; - // Check for well-known libc/libm calls. If the function is internal, it - // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { - if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.getType() == I.getOperand(2)->getType()) { - SDValue LHS = getValue(I.getOperand(1)); - SDValue RHS = getValue(I.getOperand(2)); - setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), - LHS.getValueType(), LHS, RHS)); - return; - } - } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType()) { - SDValue Tmp = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); - return; - } - } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); - return; - } - } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); - return; - } - } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); - return; - } - } else if (Name == "memcmp") { - if (visitMemCmpCall(I)) - return; - } + std::pair Result = + TLI.LowerCallTo(getRoot(), RetTy, + CS.paramHasAttr(0, Attribute::SExt), + CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), + CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), + CS.getCallingConv(), + isTailCall, + !CS.getInstruction()->use_empty(), + Callee, Args, DAG, getCurDebugLoc()); + assert((isTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + if (Result.first.getNode()) { + setValue(CS.getInstruction(), Result.first); + } else if (!CanLowerReturn && Result.second.getNode()) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector PVTs; + const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + + ComputeValueVTs(TLI, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + unsigned NumValues = OutVTs.size(); + SmallVector Values(NumValues); + SmallVector Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + DemoteStackSlot, + DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + Add, NULL, Offsets[i], false, false, 1); + Values[i] = L; + Chains[i] = L.getValue(1); } - } else if (isa(I.getOperand(0))) { - visitInlineAsm(&I); - return; - } - SDValue Callee; - if (!RenameFn) - Callee = getValue(I.getOperand(0)); - else - Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + PendingLoads.push_back(Chain); + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + SmallVector RetTys; + RetTy = FTy->getReturnType(); + ComputeValueVTs(TLI, RetTy, RetTys); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SmallVector ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); + + SDValue ReturnValue = + getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, + RegisterVT, VT, AssertOp); + ReturnValues.push_back(ReturnValue); + CurReg += NumRegs; + } - // Check if we can potentially perform a tail call. More detailed checking is - // be done within LowerCallTo, after more information about the call is known. - LowerCallTo(&I, Callee, I.isTailCall()); -} + setValue(CS.getInstruction(), + DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size())); -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, - FunctionLoweringInfo &FuncInfo, - DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + } - // Assemble the legal parts into the final values. - SmallVector Values(ValueVTs.size()); - SmallVector Parts; - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - // Copy the legal parts from the registers. - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. + if (Result.second.getNode()) + DAG.setRoot(Result.second); + else + HasTailCall = true; - Parts.resize(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue P; - if (Flag == 0) { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); - } else { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); - *Flag = P.getValue(2); - } + if (LandingPad) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); + DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); - Chain = P.getValue(1); + // Inform MachineModuleInfo of range. + MMI.addInvoke(LandingPad, BeginLabel, EndLabel); + } +} - // If the source register was virtual and if we know something about it, - // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[SlotNo]; +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (const ICmpInst *IC = dyn_cast(*UI)) + if (IC->isEquality()) + if (const Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + const Type *LoadTy, + SelectionDAGBuilder &Builder) { - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (const Constant *LoadInput = dyn_cast(PtrVal)) { + // Cast pointer to the type we really want to load. + LoadInput = ConstantExpr::getBitCast(const_cast(LoadInput), + PointerType::getUnqual(LoadTy)); - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } + if (const Constant *LoadCst = + ConstantFoldLoadFromConstPtr(const_cast(LoadInput), + Builder.TD)) + return Builder.getValue(LoadCst); + } - Parts[i] = P; - } + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); - Part += NumRegs; - Parts.clear(); + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); } - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + false /*volatile*/, + false /*nontemporal*/, 1 /* align=1 */); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; } -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Get the list of the values's legal parts. - unsigned NumRegs = Regs.size(); - SmallVector Parts(NumRegs); - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; +/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) + if (I.getNumOperands() != 4) + return false; - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); - Part += NumParts; - } + const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || + !I.getOperand(3)->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; - // Copy the parts into the registers. - SmallVector Chains(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue Part; - if (Flag == 0) { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); - } else { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); - *Flag = Part.getValue(1); + const ConstantInt *Size = dyn_cast(I.getOperand(3)); + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + bool ActuallyDoIt = true; + MVT LoadVT; + const Type *LoadTy; + switch (Size->getZExtValue()) { + default: + LoadVT = MVT::Other; + LoadTy = 0; + ActuallyDoIt = false; + break; + case 2: + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(Size->getContext()); + break; + case 4: + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + break; + case 8: + LoadVT = MVT::i64; + LoadTy = Type::getInt64Ty(Size->getContext()); + break; + /* + case 16: + LoadVT = MVT::v4i32; + LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = VectorType::get(LoadTy, 4); + break; + */ } - Chains[i] = Part.getValue(0); - } - - if (NumRegs == 1 || Flag) - // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is - // flagged to it. That is the CopyToReg nodes and the user are considered - // a single scheduling unit. If we create a TokenFactor and return it as - // chain, then the TokenFactor is both a predecessor (operand) of the - // user as well as a successor (the TF operands are flagged to the user). - // c1, f1 = CopyToReg - // c2, f2 = CopyToReg - // c3 = TokenFactor c1, c2 - // ... - // = op c3, ..., f2 - Chain = Chains[NumRegs-1]; - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); -} + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Require that we can find a legal MVT, and only do this if the target + // supports unaligned loads of that type. Expanding into byte loads would + // bloat the code. + if (ActuallyDoIt && Size->getZExtValue() > 4) { + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + ActuallyDoIt = false; + } - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); - if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); - Ops.push_back(Res); + if (ActuallyDoIt) { + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); + SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; - for (unsigned i = 0; i != NumRegs; ++i) { - assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + ISD::SETNE); + EVT CallVT = TLI.getValueType(I.getType(), true); + setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + return true; } } + + + return false; } -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; +void SelectionDAGBuilder::visitCall(const CallInst &I) { + const char *RenameFn = 0; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); + if (II) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; } } + if (unsigned IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } } - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + if (!F->hasLocalLinkage() && F->hasName()) { + StringRef Name = F->getName(); + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if (I.getNumOperands() == 3 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPointTy() && + I.getType() == I.getOperand(1)->getType() && + I.getType() == I.getOperand(2)->getType()) { + SDValue LHS = getValue(I.getOperand(1)); + SDValue RHS = getValue(I.getOperand(2)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPointTy() && + I.getType() == I.getOperand(1)->getType()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPointTy() && + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPointTy() && + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPointTy() && + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if (Name == "memcmp") { + if (visitMemCmpCall(I)) + return; } + } + } else if (isa(I.getOperand(0))) { + visitInlineAsm(&I); + return; } - return FoundRC; -} + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getOperand(0)); + else + Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + + // Check if we can potentially perform a tail call. More detailed checking is + // be done within LowerCallTo, after more information about the call is known. + LowerCallTo(&I, Callee, I.isTailCall()); +} namespace llvm { + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : @@ -5044,8 +4994,56 @@ private: Regs.insert(*Aliases); } }; + } // end llvm namespace. +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the