X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FSelectionDAG%2FStatepointLowering.cpp;h=050ec2116c5d8b2e783fe0200ff271d33c00e9ec;hb=da801219ba8d6d2a8663d4dd3c14e8e3fca35ba5;hp=8bbfa01e7594ca841de2bcb3fdcb0de903691679;hpb=bb3883dfbab9a48176497066ed40911431e0fda8;p=oota-llvm.git diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 8bbfa01e759..050ec2116c5 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); const unsigned FI = cast(SpillSlot)->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); AllocatedStackSlots.push_back(true); return SpillSlot; @@ -105,92 +109,145 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, return Builder.DAG.getFrameIndex(FI, ValueType); } // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the suceeding path involes a bit of complexity that caused a - // minor bug previously. Unless performance shows this matters, please + // Doing so on the succeeding path involves a bit of complexity that caused + // a minor bug previously. Unless performance shows this matters, please // keep this code as simple as possible. NextSlotToAllocate++; } llvm_unreachable("infinite loop?"); } +/// Utility function for reservePreviousStackSlotForValue. Tries to find +/// stack slot index to which we have spilled value for previous statepoints. +/// LookUpDepth specifies maximum DFS depth this function is allowed to look. +static Optional findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { + // Can not look any further - give up now + if (LookUpDepth <= 0) + return Optional(); + + // Spill location is known for gc relocates + if (isGCRelocate(Val)) { + GCRelocateOperands RelocOps(cast(Val)); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()]; + + auto It = SpillMap.find(RelocOps.getDerivedPtr()); + if (It == SpillMap.end()) + return Optional(); + + return It->second; + } + + // Look through bitcast instructions. + if (const BitCastInst *Cast = dyn_cast(Val)) { + return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); + } + + // Look through phi nodes + // All incoming values should have same known stack slot, otherwise result + // is unknown. + if (const PHINode *Phi = dyn_cast(Val)) { + Optional MergedResult = None; + + for (auto &IncomingValue : Phi->incoming_values()) { + Optional SpillSlot = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); + if (!SpillSlot.hasValue()) + return Optional(); + + if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + return Optional(); + + MergedResult = SpillSlot; + } + return MergedResult; + } + + // TODO: We can do better for PHI nodes. In cases like this: + // ptr = phi(relocated_pointer, not_relocated_pointer) + // statepoint(ptr) + // We will return that stack slot for ptr is unknown. And later we might + // assign different stack slots for ptr and relocated_pointer. This limits + // llvm's ability to remove redundant stores. + // Unfortunately it's hard to accomplish in current infrastructure. + // We use this function to eliminate spill store completely, while + // in example we still need to emit store, but instead of any location + // we need to use special "preferred" location. + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) + // However we need to be careful for cases like this: + // statepoint(i) + // i1 = i+1 + // statepoint(i, i1) + // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just + // put handling of simple modifications in this function like it's done + // for bitcasts we might end up reserving i's slot for 'i+1' because order in + // which we visit values is unspecified. + + // Don't know any information about this instruction + return Optional(); +} + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. -/// This helps to avoid series of loads and stores that only serve to resuffle +/// This helps to avoid series of loads and stores that only serve to reshuffle /// values on the stack between calls. -static void reservePreviousStackSlotForValue(SDValue Incoming, +static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { + SDValue Incoming = Builder.getValue(IncomingValue); + if (isa(Incoming) || isa(Incoming)) { // We won't need to spill this, so no need to check for previously // allocated stack slots return; } - SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); - if (Loc.getNode()) { + SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); + if (OldLocation.getNode()) // duplicates in input return; - } - - // Search back for the load from a stack slot pattern to find the original - // slot we allocated for this value. We could extend this to deal with - // simple modification patterns, but simple dealing with trivial load/store - // sequences helps a lot already. - if (LoadSDNode *Load = dyn_cast(Incoming)) { - if (auto *FI = dyn_cast(Load->getBasePtr())) { - const int Index = FI->getIndex(); - auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), - Builder.FuncInfo.StatepointStackSlots.end(), Index); - if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { - // not one of the lowering stack slots, can't reuse! - // TODO: Actually, we probably could reuse the stack slot if the value - // hasn't changed at all, but we'd need to look for intervening writes - return; - } else { - // This is one of our dedicated lowering slots - const int Offset = - std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); - if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { - // stack slot already assigned to someone else, can't use it! - // TODO: currently we reserve space for gc arguments after doing - // normal allocation for deopt arguments. We should reserve for - // _all_ deopt and gc arguments, then start allocating. This - // will prevent some moves being inserted when vm state changes, - // but gc state doesn't between two calls. - return; - } - // Reserve this stack slot - Builder.StatepointLowering.reserveStackSlot(Offset); - } - // Cache this slot so we find it when going through the normal - // assignment loop. - SDValue Loc = - Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + const int LookUpDepth = 6; + Optional Index = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); + if (!Index.hasValue()) + return; - Builder.StatepointLowering.setLocation(Incoming, Loc); - } + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), *Index); + assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && + "value spilled to the unknown stack slot"); + + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); - // TODO: handle case where a reloaded value flows through a phi to - // another safepoint. e.g. - // bb1: - // a' = relocated... - // bb2: % pred: bb1, bb3, bb4, etc. - // a_phi = phi(a', ...) - // statepoint ... a_phi - // NOTE: This will require reasoning about cross basic block values. This is - // decidedly non trivial and this might not be the right place to do it. We - // don't really have the information we need here... - - // TODO: handle simple updates. If a value is modified and the original - // value is no longer live, it would be nice to put the modified value in the - // same slot. This allows folding of the memory accesses for some - // instructions types (like an increment). - // statepoint (i) - // i1 = i+1 - // statepoint (i1) + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + Builder.StatepointLowering.setLocation(Incoming, Loc); } /// Remove any duplicate (as SDValues) from the derived pointer pairs. This @@ -202,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl &Bases, SmallVectorImpl &Relocs, SelectionDAGBuilder &Builder) { - // This is horribly ineffecient, but I don't care right now + // This is horribly inefficient, but I don't care right now SmallSet Seen; SmallVector NewBases, NewPtrs, NewRelocs; @@ -230,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl &Bases, /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, +lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, SelectionDAGBuilder &Builder, SmallVectorImpl &PendingExports) { ImmutableCallSite CS(ISP.getCallSite()); - SDValue ActualCallee = Builder.getValue(ISP.getActualCallee()); + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = Builder.DAG.getTargetLoweringInfo(); + const auto &DL = Builder.DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), + TLI.getPointerTy(DL, AS)); + } else + ActualCallee = Builder.getValue(ISP.getCalledValue()); assert(CS.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); @@ -247,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad, + ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, false /* IsPatchPoint */); SDNode *CallEnd = CallEndVal.getNode(); @@ -264,29 +337,37 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, // ch, glue = callseq_end ch, glue // get_return_value ch, glue // - // get_return_value can either be a CopyFromReg to grab the return value from - // %RAX, or it can be a LOAD to load a value returned by reference via a stack - // slot. + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. - if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg || - CallEnd->getOpcode() == ISD::LOAD)) - CallEnd = CallEnd->getOperand(0).getNode(); + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - if (HasDef) { - if (CS.isInvoke()) { - // Result value will be used in different basic block for invokes - // so we need to export it now. But statepoint call has a different type - // than the actuall call. It means that standart exporting mechanism will - // create register of the wrong type. So instead we need to create - // register with correct type and save value into it manually. + // Export the result value if needed + const Instruction *GCResult = ISP.getGCResult(); + if (HasDef && GCResult) { + if (GCResult->getParent() != CS.getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. + // Default exporting mechanism will not work here because statepoint call + // has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. // TODO: To eliminate this problem we can remove gc.result intrinsics - // completelly and make statepoint call to return a tuple. + // completely and make statepoint call to return a tuple. unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); - RegsForValue RFV(*Builder.DAG.getContext(), - Builder.DAG.getTargetLoweringInfo(), Reg, - ISP.getActualReturnType()); + RegsForValue RFV( + *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), + Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); SDValue Chain = Builder.DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, @@ -294,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad, PendingExports.push_back(Chain); Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; } else { - // The value of the statepoint itself will be the value of call itself. - // We'll replace the actually call node shortly. gc_result will grab + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab // this value. Builder.setValue(CS.getInstruction(), ReturnValue); } @@ -319,8 +401,7 @@ static void getIncomingStatepointGCValues( SmallVectorImpl &Bases, SmallVectorImpl &Ptrs, SmallVectorImpl &Relocs, ImmutableStatepoint StatepointSite, SelectionDAGBuilder &Builder) { - for (GCRelocateOperands relocateOpers : - StatepointSite.getRelocates(StatepointSite)) { + for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) { Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); Bases.push_back(relocateOpers.getBasePtr()); Ptrs.push_back(relocateOpers.getDerivedPtr()); @@ -359,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // chaining stores one after another, this may allow // a bit more optimal scheduling for them Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - MachinePointerInfo::getFixedStack(Index), + MachinePointerInfo::getFixedStack( + Builder.DAG.getMachineFunction(), Index), false, false, 0); Builder.StatepointLowering.setLocation(Incoming, Loc); @@ -431,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, // to the GCStrategy from there (yet). GCStrategy &S = Builder.GFI->getStrategy(); for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } } for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V); + auto Opt = S.isGCManagedPointer(V->getType()); if (Opt.hasValue()) { assert(Opt.getValue() && "non gc managed pointer relocated"); } @@ -458,15 +540,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : StatepointSite.vm_state_args()) { - SDValue Incoming = Builder.getValue(V); - reservePreviousStackSlotForValue(Incoming, Builder); + reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < Bases.size(); ++i) { - const Value *Base = Bases[i]; - reservePreviousStackSlotForValue(Builder.getValue(Base), Builder); - - const Value *Ptr = Ptrs[i]; - reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder); + reservePreviousStackSlotForValue(Bases[i], Builder); + reservePreviousStackSlotForValue(Ptrs[i], Builder); } // First, prefix the list with the number of unique values to be @@ -524,8 +602,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; - for (GCRelocateOperands RelocateOpers : - StatepointSite.getRelocates(StatepointSite)) { + for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) { const Value *V = RelocateOpers.getDerivedPtr(); SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); @@ -534,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl &Ops, SpillMap[V] = cast(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas - // and constants. For this values we can avoid emiting spill load while + // and constants. For this values we can avoid emitting spill load while // visiting corresponding gc_relocate. // Actually we do not need to record them in this map at all. - // We do this only to check that we are not relocating any unvisited value. + // We do this only to check that we are not relocating any unvisited + // value. SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. // Note that it would be incorrect to teach llvm that all relocates are - // uses of the corresponging values so that it would automatically + // uses of the corresponding values so that it would automatically // export them. Relocates of the spilled values does not use original // value. - if (StatepointSite.getCallSite().isInvoke()) + if (RelocateOpers.getUnderlyingCallSite().getParent() != + StatepointInstr->getParent()) Builder.ExportFromCurrentBlock(V); } } @@ -561,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { } void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) { + ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -573,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check. Don't do this for invokes. It would be too - // expensive to preserve this information across different basic blocks - if (!CS.isInvoke()) { - for (const User *U : CS->users()) { - const CallInst *Call = cast(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); - } + // Consistency check. Check only relocates in the same basic block as thier + // statepoint. + for (const User *U : CS->users()) { + const CallInst *Call = cast(U); + if (isGCRelocate(Call) && Call->getParent() == CS.getParent()) + StatepointLowering.scheduleRelocCall(*Call); } #endif @@ -601,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Get call node, we will replace it later with statepoint SDNode *CallNode = - lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports); + lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -743,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint( // Replace original call DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root - // Remove originall call node + // Remove original call node DAG.DeleteNode(CallNode); // DON'T set the root - under the assumption that it's already set past the @@ -762,14 +839,15 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { Instruction *I = cast(CI.getArgOperand(0)); assert(isStatepoint(I) && "first argument must be a statepoint token"); - if (isa(I)) { - // For invokes we should have stored call result in a virtual register. + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. // We can not use default getValue() functionality to copy value from this // register because statepoint and actuall call return types can be // different, and getValue() will use CopyFromReg of the wrong type, // which is always i32 in our case. - PointerType *CalleeType = - cast(ImmutableStatepoint(I).getActualCallee()->getType()); + PointerType *CalleeType = cast( + ImmutableStatepoint(I).getCalledValue()->getType()); Type *RetTy = cast(CalleeType->getElementType())->getReturnType(); SDValue CopyFromReg = getCopyFromRegs(I, RetTy); @@ -786,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { #ifndef NDEBUG // Consistency check - // We skip this check for invoke statepoints. It would be too expensive to - // preserve validation info through different basic blocks. - if (!RelocateOpers.isTiedToInvoke()) { + // We skip this check for relocates not in the same basic block as thier + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) { StatepointLowering.relocCallVisited(CI); } #endif @@ -815,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities + // it may allow more scheduling opportunities. SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(*DerivedPtrLocation), - false, false, false, 0); + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + *DerivedPtrLocation), + false, false, false, 0); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1));