X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FSelectionDAGBuilder.cpp;h=b2b93c9ff57a1dba845cc85a216e0bc9a0e6cee7;hb=6ef94175d1bbab95f195770bb3c559b3ab38c4e5;hp=452f5614b7bf6a2e2e939f702acfe059af1a595f;hpb=4ecc82e592745d6995db83ea158b3c15011a84f8;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 452f5614b7b..b2b93c9ff57 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -50,7 +50,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision", // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer -static cl::opt -MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), - cl::init(64), cl::Hidden); +static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, @@ -283,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } // Vector/Vector bitcast. - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + if (ValueVT.getSizeInBits() == PartVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + bool Smaller = ValueVT.bitsLE(PartVT); + return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT, Val); + } - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle cases such as i8 -> <1 x i1> + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartVT) { + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, ValueVT.getScalarType(), Val); + } + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } @@ -429,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& + PartVT.getVectorElementType() == ValueVT.getVectorElementType() && PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in @@ -449,13 +469,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); - } else { + } else if (PartVT.isVector() && + PartVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + bool Smaller = PartVT.bitsLE(ValueVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); + } else{ // Vector -> scalar conversion. - assert(ValueVT.getVectorElementType() == PartVT && - ValueVT.getVectorNumElements() == 1 && + assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); + + bool Smaller = ValueVT.bitsLE(PartVT); + Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), + DL, PartVT, Val); } Parts[0] = Val; @@ -546,7 +578,7 @@ namespace { : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) { + unsigned Reg, Type *Ty) { ComputeValueVTs(tli, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { @@ -641,16 +673,17 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || - !RegisterVT.isInteger() || RegisterVT.isVector() || - !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) + !RegisterVT.isInteger() || RegisterVT.isVector()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) continue; - - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[Regs[Part+i]]; unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. @@ -785,11 +818,20 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - DanglingDebugInfoMap.clear(); CurDebugLoc = DebugLoc(); HasTailCall = false; } +/// clearDanglingDebugInfo - Clear the dangling debug information +/// map. This function is seperated from the clear so that debug +/// information that is dangling in a basic block can be properly +/// resolved in a different basic block. This allows the +/// SelectionDAG to resolve dangling debug information attached +/// to PHI nodes. +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + /// getRoot - Return the current virtual root of the Selection DAG, /// flushing any PendingLoad items. This must be done before emitting /// a store or any other node that may need to be ordered after any @@ -908,7 +950,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, Val.getResNo(), Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } - } else + } else DEBUG(dbgs() << "Dropping debug info for " << DI); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } @@ -1027,7 +1069,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const BlockAddress *BA = dyn_cast(C)) return DAG.getBlockAddress(BA, VT); - const VectorType *VecTy = cast(V->getType()); + VectorType *VecTy = cast(V->getType()); unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of @@ -1129,15 +1171,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; - // FIXME: C calling convention requires the return type to be promoted - // to at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); @@ -1152,9 +1187,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setInReg(); // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) + if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { @@ -1184,6 +1219,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { /// created for it, emit nodes to copy the value into the virtual /// registers. void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); @@ -1232,6 +1271,26 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, return true; } +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src, + MachineBasicBlock *Dst) { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (!BPI) + return 0; + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + return BPI->getEdgeWeight(SrcBB, DstBB); +} + +void SelectionDAGBuilder:: +addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, + uint32_t Weight /* = 0 */) { + if (!Weight) + Weight = getEdgeWeight(Src, Dst); + Src->addSuccessor(Dst, Weight); +} + + static bool InBlock(const Value *V, const BasicBlock *BB) { if (const Instruction *I = dyn_cast(V)) return I->getParent() == BB; @@ -1417,7 +1476,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TLI.isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1501,8 +1560,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } // Update successor info - SwitchBB->addSuccessor(CB.TrueBB); - SwitchBB->addSuccessor(CB.FalseBB); + addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1646,8 +1705,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - SwitchBB->addSuccessor(B.Default); - SwitchBB->addSuccessor(MBB); + addSuccessorWithWeight(SwitchBB, B.Default); + addSuccessorWithWeight(SwitchBB, MBB); SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, CopyTo, RangeCmp, @@ -1670,7 +1729,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; - if (CountPopulation_64(B.Mask) == 1) { + unsigned PopCount = CountPopulation_64(B.Mask); + if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), @@ -1678,6 +1738,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ShiftOp, DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(VT), + ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), + ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, @@ -1692,8 +1759,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, ISD::SETNE); } - SwitchBB->addSuccessor(B.TargetBB); - SwitchBB->addSuccessor(NextMBB); + addSuccessorWithWeight(SwitchBB, B.TargetBB); + addSuccessorWithWeight(SwitchBB, NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -1743,6 +1810,49 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) { } +void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); +} + +void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { + assert(FuncInfo.MBB->isLandingPad() && + "Call to landingpad not in landing pad!"); + + MachineBasicBlock *MBB = FuncInfo.MBB; + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + AddLandingPadInfo(LP, MMI, MBB); + + SmallVector ValueVTs; + ComputeValueVTs(TLI, LP.getType(), ValueVTs); + + // Insert the EXCEPTIONADDR instruction. + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = DAG.getRoot(); + SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1); + SDValue Chain = Op1.getValue(1); + + // Insert the EHSELECTION instruction. + VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + Ops[0] = Op1; + Ops[1] = Chain; + SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2); + Chain = Op2.getValue(1); + Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32); + + Ops[0] = Op1; + Ops[1] = Op2; + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Ops[0], 2); + + std::pair RetPair = std::make_pair(Res, Chain); + setValue(&LP, RetPair.first); + DAG.setRoot(RetPair.second); +} + /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, @@ -1801,8 +1911,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, ISD::SETEQ); // Update successor info. - SwitchBB->addSuccessor(Small.BB); - SwitchBB->addSuccessor(Default); + addSuccessorWithWeight(SwitchBB, Small.BB); + addSuccessorWithWeight(SwitchBB, Default); // Insert the true branch. SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, @@ -1858,7 +1968,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } - CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock); + + uint32_t ExtraWeight = I->ExtraWeight; + CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, + /* me */ CurBlock, + /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2); // If emitting the first comparison, just call visitSwitchCase to emit the // code into the current block. Otherwise, push the CaseBlock onto the @@ -1888,10 +2002,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) { } /// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - const Value* SV, - MachineBasicBlock* Default, +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, + CaseRecVector &WorkList, + const Value *SV, + MachineBasicBlock *Default, MachineBasicBlock *SwitchBB) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1900,8 +2014,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, const APInt &Last = cast(BackCase.High)->getValue(); APInt TSize(First.getBitWidth(), 0); - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); if (!areJTsAllowed(TLI) || TSize.ult(4)) @@ -1915,7 +2028,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range - << "Size: " << TSize << ". Density: " << Density << "\n\n"); + << ". Size: " << TSize << ". Density: " << Density << "\n\n"); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -1933,8 +2046,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, // table. MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); CurMF->insert(BBI, JumpTableBB); - CR.CaseBB->addSuccessor(Default); - CR.CaseBB->addSuccessor(JumpTableBB); + + addSuccessorWithWeight(CR.CaseBB, Default); + addSuccessorWithWeight(CR.CaseBB, JumpTableBB); // Build a vector of destination BBs, corresponding to each target // of the jump table. If the value of the jump table slot corresponds to @@ -1961,7 +2075,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, E = DestBBs.end(); I != E; ++I) { if (!SuccsHandled[(*I)->getNumber()]) { SuccsHandled[(*I)->getNumber()] = true; - JumpTableBB->addSuccessor(*I); + addSuccessorWithWeight(JumpTableBB, *I); } } @@ -1978,7 +2092,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, visitJumpTableHeader(JT, JTH, SwitchBB); JTCases.push_back(JumpTableBlock(JTH, JT)); - return true; } @@ -2028,9 +2141,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -2248,12 +2365,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { size_t numCmps = 0; + BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (size_t i = 1; i < SI.getNumSuccessors(); ++i) { - MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)]; + BasicBlock *SuccBB = SI.getSuccessor(i); + MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; + + uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; + Cases.push_back(Case(SI.getSuccessorValue(i), SI.getSuccessorValue(i), - SMBB)); + SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -2273,6 +2395,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { I->High = J->High; J = Cases.erase(J); + + if (BranchProbabilityInfo *BPI = FuncInfo.BPI) { + uint32_t CurWeight = currentBB->getBasicBlock() ? + BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16; + uint32_t NextWeight = nextBB->getBasicBlock() ? + BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16; + + BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(), + CurWeight + NextWeight); + } } else { I = J++; } @@ -2376,8 +2508,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { succs.push_back(I.getSuccessor(i)); array_pod_sort(succs.begin(), succs.end()); succs.erase(std::unique(succs.begin(), succs.end()), succs.end()); - for (unsigned i = 0, e = succs.size(); i != e; ++i) - IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]); + for (unsigned i = 0, e = succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]]; + addSuccessorWithWeight(IndirectBrMBB, Succ); + } DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), MVT::Other, getControlRoot(), @@ -2386,7 +2520,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg - const Type *Ty = I.getType(); + Type *Ty = I.getType(); if (isa(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); @@ -2408,19 +2542,19 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - - MVT ShiftTy = TLI.getShiftAmountTy(); - + + MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); DebugLoc DL = getCurDebugLoc(); - + // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); - + // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to @@ -2437,6 +2571,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + // Turn exact SDivs into multiplications. + // FIXME: This should be in DAGCombiner, but it doesn't have access to the + // exact bit. + if (isa(&I) && cast(&I)->isExact() && + !isa(Op1) && + isa(Op2) && !cast(Op2)->isNullValue()) + setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + else + setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + Op1, Op2)); +} + void SelectionDAGBuilder::visitICmp(const User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (const ICmpInst *IC = dyn_cast(&I)) @@ -2474,10 +2624,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue Cond = getValue(I.getOperand(0)); SDValue TrueVal = getValue(I.getOperand(1)); SDValue FalseVal = getValue(I.getOperand(2)); + ISD::NodeType OpCode = Cond.getValueType().isVector() ? + ISD::VSELECT : ISD::SELECT; for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), - TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), + Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), Cond, SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), @@ -2690,7 +2842,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { SrcNumElts+1, SrcNumElts+1}; + int MinRange[2] = { static_cast(SrcNumElts+1), + static_cast(SrcNumElts+1)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { @@ -2798,12 +2951,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); - const Type *AggTy = I.getType(); - const Type *ValTy = Op1->getType(); + Type *AggTy = I.getType(); + Type *ValTy = Op1->getType(); bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2815,16 +2968,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SmallVector Values(NumAggValues); SDValue Agg = getValue(Op0); - SDValue Val = getValue(Op1); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. for (; i != LinearIndex; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); // Copy values from the inserted value(s). - for (; i != LinearIndex + NumValValues; ++i) - Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : - SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } // Copy remaining value(s) from the original aggregate. for (; i != NumAggValues; ++i) Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : @@ -2837,16 +2992,23 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); - const Type *AggTy = Op0->getType(); - const Type *ValTy = I.getType(); + Type *AggTy = Op0->getType(); + Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SmallVector Values(NumValValues); SDValue Agg = getValue(Op0); @@ -2864,12 +3026,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(I.getOperand(0)); - const Type *Ty = I.getOperand(0)->getType(); + Type *Ty = I.getOperand(0)->getType(); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { const Value *Idx = *OI; - if (const StructType *StTy = dyn_cast(Ty)) { + if (StructType *StTy = dyn_cast(Ty)) { unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -2940,7 +3102,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { if (FuncInfo.StaticAllocaMap.count(&I)) return; // getValue will auto-populate this. - const Type *Ty = I.getAllocatedType(); + Type *Ty = I.getAllocatedType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), @@ -2987,10 +3149,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + if (I.isAtomic()) + return visitAtomicLoad(I); + const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); - const Type *Ty = I.getType(); + Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; @@ -3064,6 +3229,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStore(const StoreInst &I) { + if (I.isAtomic()) + return visitAtomicStore(I); + const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); @@ -3114,6 +3282,179 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } +static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, + SynchronizationScope Scope, + bool Before, DebugLoc dl, + SelectionDAG &DAG, + const TargetLowering &TLI) { + // Fence, if necessary + if (Before) { + if (Order == AcquireRelease || Order == SequentiallyConsistent) + Order = Release; + else if (Order == Acquire || Order == Monotonic) + return Chain; + } else { + if (Order == AcquireRelease) + Order = Acquire; + else if (Order == Release || Order == Monotonic) + return Chain; + } + SDValue Ops[3]; + Ops[0] = Chain; + Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); + Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); +} + +void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, + getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), + MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + DebugLoc dl = getCurDebugLoc(); + ISD::NodeType NT; + switch (I.getOperation()) { + default: llvm_unreachable("Unknown atomicrmw operation"); return; + case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; + case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; + case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; + case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; + case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; + case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; + case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; + case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; + case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; + case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; + case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + } + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue L = + DAG.getAtomic(NT, dl, + getValue(I.getValOperand()).getValueType().getSimpleVT(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValOperand()), + I.getPointerOperand(), 0 /* Alignment */, + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitFence(const FenceInst &I) { + DebugLoc dl = getCurDebugLoc(); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); +} + +void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { + DebugLoc dl = getCurDebugLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = EVT::getEVT(I.getType()); + + if (I.getAlignment() * 8 < VT.getSizeInBits()) + report_fatal_error("Cannot generate unaligned atomic load"); + + SDValue L = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + SDValue OutChain = L.getValue(1); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { + DebugLoc dl = getCurDebugLoc(); + + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + + if (I.getAlignment() * 8 < VT.getSizeInBits()) + report_fatal_error("Cannot generate unaligned atomic store"); + + if (TLI.getInsertFencesForAtomic()) + InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, + DAG, TLI); + + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValueOperand()), + I.getPointerOperand(), I.getAlignment(), + TLI.getInsertFencesForAtomic() ? Monotonic : Order, + Scope); + + if (TLI.getInsertFencesForAtomic()) + OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, + DAG, TLI); + + DAG.setRoot(OutChain); +} + /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, @@ -3193,7 +3534,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } if (!I.getType()->isVoidTy()) { - if (const VectorType *PTy = dyn_cast(I.getType())) { + if (VectorType *PTy = dyn_cast(I.getType())) { EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); } @@ -3253,7 +3594,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, Root, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - I.getArgOperand(0)); + I.getArgOperand(0), 0 /* Alignment */, + Monotonic, CrossThread); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4018,6 +4360,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +// getTruncatedArgReg - Find underlying register used for an truncated +// argument. +static unsigned getTruncatedArgReg(const SDValue &N) { + if (N.getOpcode() != ISD::TRUNCATE) + return 0; + + const SDValue &Ext = N.getOperand(0); + if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + const SDValue &CFR = Ext.getOperand(0); + if (CFR.getOpcode() == ISD::CopyFromReg) + return cast(CFR.getOperand(1))->getReg(); + else + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); + } + return 0; +} + /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. @@ -4038,24 +4398,18 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; - if (Arg->hasByValAttr()) { - // Byval arguments' frame index is recorded during argument lowering. - // Use this info directly. - Reg = TRI->getFrameRegister(MF); - Offset = FuncInfo.getByValArgumentFrameIndex(Arg); - // If byval argument ofset is not recorded then ignore this. - if (!Offset) - Reg = 0; - } - - if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { - Reg = cast(N.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI->getFrameRegister(MF); + + if (!Reg && N.getNode()) { + if (N.getOpcode() == ISD::CopyFromReg) + Reg = cast(N.getOperand(1))->getReg(); + else + Reg = getTruncatedArgReg(N); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) @@ -4217,9 +4571,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 0, dl, SDNodeOrder); else { - // Can't do anything with other non-AI cases yet. This might be a - // parameter of a callee function that got inlined, for example. - DEBUG(dbgs() << "Dropping debug info for " << DI); + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, 0, N); return 0; } } else if (AI) @@ -4271,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // debug info exists. ++SDNodeOrder; SDDbgValue *SDV; - if (isa(V) || isa(V)) { + if (isa(V) || isa(V) || isa(V)) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, 0, false); } else { @@ -4400,6 +4754,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MMI.setCurrentCallSite(CI->getZExtValue()); return 0; } + case Intrinsic::eh_sjlj_functioncontext: { + // Get and store the index of the function context. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + AllocaInst *FnCtx = cast(I.getArgOperand(0)); + int FI = FuncInfo.StaticAllocaMap[FnCtx]; + MFI->setFunctionContextIndex(FI); + return 0; + } case Intrinsic::eh_sjlj_setjmp: { setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), getValue(I.getArgOperand(0)))); @@ -4545,6 +4907,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -4657,12 +5026,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::TRAMPOLINE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), - Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); - setValue(&I, Res); - DAG.setRoot(Res.getValue(1)); + DAG.setRoot(Res); + return 0; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, + TLI.getPointerTy(), + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::gcroot: @@ -4681,9 +5053,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); return 0; + } + + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); + return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4698,15 +5090,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { - SDValue Ops[4]; + SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, DAG.getVTList(MVT::Other), - &Ops[0], 4, + &Ops[0], 5, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ @@ -4733,7 +5126,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), - MachinePointerInfo(I.getArgOperand(0))); + MachinePointerInfo(I.getArgOperand(0)), 0 /* Alignment */, + Monotonic, CrossThread); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4776,9 +5170,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { - const PointerType *PT = cast(CS.getCalledValue()->getType()); - const FunctionType *FTy = cast(PT->getElementType()); - const Type *RetTy = FTy->getReturnType(); + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = 0; @@ -4793,7 +5187,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), Outs, FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -4805,7 +5201,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; @@ -4823,8 +5219,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - SDValue ArgNode = getValue(*i); - Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); @@ -4887,7 +5289,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // The instruction result is the result of loading from the // hidden sret parameter. SmallVector PVTs; - const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -4936,15 +5338,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size())); - } - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. - if (Result.second.getNode()) - DAG.setRoot(Result.second); - else + // Assign order to nodes here. If the call does not produce a result, it won't + // be mapped to a SDNode and visit() will not assign it an order number. + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. HasTailCall = true; + ++SDNodeOrder; + AssignOrderingToNode(DAG.getRoot().getNode()); + } else { + DAG.setRoot(Result.second); + ++SDNodeOrder; + AssignOrderingToNode(Result.second.getNode()); + } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This @@ -4974,7 +5382,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - const Type *LoadTy, + Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the @@ -5037,7 +5445,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; - const Type *LoadTy; + Type *LoadTy; switch (Size->getZExtValue()) { default: LoadVT = MVT::Other; @@ -5105,14 +5513,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // See if any floating point values are being passed to this function. This is // used to emit an undefined reference to fltused on Windows. - const FunctionType *FT = + FunctionType *FT = cast(I.getCalledValue()->getType()->getContainedType(0)); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (FT->isVarArg() && !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - const Type* T = I.getArgOperand(i)->getType(); - for (po_iterator i = po_begin(T), e = po_end(T); + Type* T = I.getArgOperand(i)->getType(); + for (po_iterator i = po_begin(T), e = po_end(T); i != e; ++i) { if (!i->isFloatingPointTy()) continue; MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); @@ -5210,12 +5618,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -namespace llvm { +namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : - public TargetLowering::AsmOperandInfo { +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. @@ -5257,17 +5664,23 @@ public: if (isa(CallOperandVal)) return TLI.getPointerTy(); - const llvm::Type *OpTy = CallOperandVal->getType(); + llvm::Type *OpTy = CallOperandVal->getType(); + // FIXME: code duplicated from TargetLowering::ParseConstraints(). // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - const llvm::PointerType *PtrTy = dyn_cast(OpTy); + llvm::PointerType *PtrTy = dyn_cast(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); } + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { @@ -5303,55 +5716,7 @@ private: typedef SmallVector SDISelAsmOperandInfoVector; -} // end llvm namespace. - -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} +} // end anonymous namespace /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the @@ -5362,11 +5727,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// -void SelectionDAGBuilder:: -GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set &OutputRegs, - std::set &InputRegs) { - LLVMContext &Context = FuncInfo.Fn->getContext(); +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + DebugLoc DL, + SDISelAsmOperandInfo &OpInfo, + std::set &OutputRegs, + std::set &InputRegs) { + LLVMContext &Context = *DAG.getContext(); // Compute whether this value requires an input register, an output register, // or both. @@ -5412,7 +5779,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5422,7 +5789,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5485,52 +5852,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, return; } - // This is a reference to a register class that doesn't directly correspond - // to an LLVM register class. Allocate NumRegs consecutive, available, - // registers from the class. - std::vector RegClassRegs - = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); - - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); - unsigned NumAllocated = 0; - for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { - unsigned Reg = RegClassRegs[i]; - // See if this register is available. - if ((isOutReg && OutputRegs.count(Reg)) || // Already used. - (isInReg && InputRegs.count(Reg))) { // Already used. - // Make sure we find consecutive registers. - NumAllocated = 0; - continue; - } - - // Check to see if this register is allocatable (i.e. don't give out the - // stack pointer). - const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI); - if (!RC) { // Couldn't allocate this register. - // Reset NumAllocated to make sure we return consecutive registers. - NumAllocated = 0; - continue; - } - - // Okay, this register is good, we can use it. - ++NumAllocated; - - // If we allocated enough consecutive registers, succeed. - if (NumAllocated == NumRegs) { - unsigned RegStart = (i-NumAllocated)+1; - unsigned RegEnd = i+1; - // Mark all of the allocated registers used. - for (unsigned i = RegStart; i != RegEnd; ++i) - Regs.push_back(RegClassRegs[i]); - - OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), - OpInfo.ConstraintVT); - OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); - return; - } - } - // Otherwise, we couldn't allocate enough registers for this. } @@ -5544,7 +5865,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::set OutputRegs, InputRegs; - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + TargetLowering::AsmOperandInfoVector + TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -5568,7 +5891,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - if (const StructType *STy = dyn_cast(CS.getType())) { + if (StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); @@ -5603,7 +5926,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { hasMemory = true; else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { - TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + TargetLowering::ConstraintType + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5634,10 +5958,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || - (OpInfo.ConstraintVT.getSizeInBits() != - Input.ConstraintVT.getSizeInBits())) { + (MatchRC.second != InputRC.second)) { report_fatal_error("Unsupported asm: input constraint" " with a matching output constraint of" " incompatible type!"); @@ -5653,12 +5982,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have // an indirect input, put it in the constpool if we can, otherwise spill // it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. // If the operand is a float, integer, or vector constant, spill to a // constant pool entry to get its address. @@ -5670,7 +6004,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } else { // Otherwise, create a stack slot and emit a store to it before the // asm. - const Type *Ty = OpVal->getType(); + Type *Ty = OpVal->getType(); uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); @@ -5693,7 +6027,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5704,7 +6039,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -5858,7 +6194,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + @@ -5893,8 +6229,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { "Don't know how to handle indirect register inputs yet!"); // Copy the input into the appropriate registers. - if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) + if (OpInfo.AssignedRegs.Regs.empty()) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5909,8 +6244,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands( - InlineAsm::Kind_RegDefEarlyClobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, false, 0, DAG, AsmNodeOperands); break; @@ -6031,7 +6365,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair -TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, +TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, CallingConv::ID CallConv, bool isTailCall, @@ -6048,7 +6382,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); + Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -6065,16 +6399,17 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setSRet(); if (Args[i].isByVal) { Flags.setByVal(); - const PointerType *Ty = cast(Args[i].Ty); - const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = getByValTypeAlignment(ElementTy); - unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); + PointerType *Ty = cast(Args[i].Ty); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy)); // For ByVal, alignment should come from FE. BE will guess if this // info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (Args[i].Alignment) FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (Args[i].isNest) Flags.setNest(); @@ -6180,7 +6515,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6219,6 +6554,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { #include "llvm/CodeGen/SelectionDAGISel.h" +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (EnableFastISel) + return A->use_empty(); + + const BasicBlock *Entry = A->getParent()->begin(); + for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); + UI != E; ++UI) { + const User *U = *UI; + if (cast(U)->getParent() != Entry || isa(U)) + return false; // Use not in entry block. + } + return true; +} + void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); @@ -6256,7 +6610,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); @@ -6271,16 +6625,17 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { Flags.setSRet(); if (F.paramHasAttr(Idx, Attribute::ByVal)) { Flags.setByVal(); - const PointerType *Ty = cast(I->getType()); - const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); - unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + PointerType *Ty = cast(I->getType()); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(TD->getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); + else + FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); } if (F.paramHasAttr(Idx, Attribute::Nest)) Flags.setNest(); @@ -6362,8 +6717,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { if (I->use_empty() && NumValues) SDB->setUnusedArgValue(I, InVals[i]); - for (unsigned Value = 0; Value != NumValues; ++Value) { - EVT VT = ValueVTs[Value]; + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); @@ -6382,21 +6737,42 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { i += NumParts; } - // Note down frame index for byval arguments. - if (I->hasByValAttr() && !ArgValues.empty()) - if (FrameIndexSDNode *FI = - dyn_cast(ArgValues[0].getNode())) - FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; - if (!I->use_empty()) { - SDValue Res; - if (!ArgValues.empty()) - Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); - SDB->setValue(I, Res); + // Note down frame index. + if (FrameIndexSDNode *FI = + dyn_cast(ArgValues[0].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + + SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc()); + + SDB->setValue(I, Res); + if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (LoadSDNode *LNode = + dyn_cast(Res.getOperand(0).getNode())) + if (FrameIndexSDNode *FI = + dyn_cast(LNode->getBasePtr().getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } - // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. It's also subtly incompatible with the hacks FastISel + // uses with vregs. + unsigned Reg = cast(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[I] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(I)) { + FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } } @@ -6442,6 +6818,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Ignore dead phi's. if (PN->use_empty()) continue; + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + unsigned Reg; const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);