X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineCombiner.cpp;h=fa43c4dfa05a8d9c57b0e0146a170f60c307602c;hb=4474471834be491ad1c6de19da0fa82bf667e70a;hp=a4bc77edb753a63472b834a300f7601e9174ce6f;hpb=a33f5160ebbedcab3a8ba0fe9c67be9138e13eea;p=oota-llvm.git diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index a4bc77edb75..fa43c4dfa05 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -10,6 +10,7 @@ // The machine combiner pass uses machine trace metrics to ensure the combined // instructions does not lengthen the critical path or the resource depth. //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "machine-combiner" #include "llvm/ADT/Statistic.h" @@ -67,10 +68,11 @@ private: unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); bool - preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, - MachineTraceMetrics::Trace BlockTrace, - SmallVectorImpl &InsInstrs, - DenseMap &InstrIdxForVirtReg); + improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, @@ -121,9 +123,9 @@ unsigned MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, MachineTraceMetrics::Trace BlockTrace) { - SmallVector InstrDepth; - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which @@ -179,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, /// \returns Latency of \p NewRoot unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); // Check each definition in NewRoot and compute the latency unsigned NewRootLatency = 0; @@ -201,53 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, UseMO->findRegisterUseOperandIdx(MO.getReg())); } else { - LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + LatencyOp = TSchedModel.computeInstrLatency(NewRoot); } NewRootLatency = std::max(NewRootLatency, LatencyOp); } return NewRootLatency; } -/// True when the new instruction sequence does not -/// lengthen the critical path. The DAGCombine code sequence ends in MI -/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A -/// necessary condition for the new sequence to replace the old sequence is that -/// it cannot lengthen the critical path. This is decided by the formula -/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). -/// The slack is the number of cycles Root can be delayed before the critical -/// patch becomes longer. -bool MachineCombiner::preservesCriticalPathLen( +/// The combiner's goal may differ based on which pattern it is attempting +/// to optimize. +enum class CombinerObjective { + MustReduceDepth, // The data dependency chain must be improved. + Default // The critical path must not be lengthened. +}; + +static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { + // TODO: If C++ ever gets a real enum class, make this part of the + // MachineCombinerPattern class. + switch (P) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_BY: + case MachineCombinerPattern::REASSOC_XA_YB: + return CombinerObjective::MustReduceDepth; + default: + return CombinerObjective::Default; + } +} + +/// The DAGCombine code sequence ends in MI (Machine Instruction) Root. +/// The new code sequence ends in MI NewRoot. A necessary condition for the new +/// sequence to replace the old sequence is that it cannot lengthen the critical +/// path. The definition of "improve" may be restricted by specifying that the +/// new path improves the data dependency chain (MustReduceDepth). +bool MachineCombiner::improvesCriticalPathLen( MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, - DenseMap &InstrIdxForVirtReg) { - - assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); - // NewRoot is the last instruction in the \p InsInstrs vector - // Get depth and latency of NewRoot + DenseMap &InstrIdxForVirtReg, + MachineCombinerPattern Pattern) { + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector. unsigned NewRootIdx = InsInstrs.size() - 1; MachineInstr *NewRoot = InsInstrs[NewRootIdx]; - unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); - unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - // Get depth, latency and slack of Root + // Get depth and latency of NewRoot and Root. + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; + dbgs() << " RootDepth: " << RootDepth << "\n"); + + // For a transform such as reassociation, the cost equation is + // conservatively calculated so that we must improve the depth (data + // dependency cycles) in the critical path to proceed with the transform. + // Being conservative also protects against inaccuracies in the underlying + // machine trace metrics and CPU models. + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + return NewRootDepth < RootDepth; + + // A more flexible cost calculation for the critical path includes the slack + // of the original code sequence. This may allow the transform to proceed + // even if the instruction depths (data dependency cycles) become worse. + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); unsigned RootLatency = TSchedModel.computeInstrLatency(Root); unsigned RootSlack = BlockTrace.getInstrSlack(Root); - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth - << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency - << " RootSlack: " << RootSlack << "\n"; - dbgs() << " NewRootDepth + NewRootLatency " + DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootLatency: " << RootLatency << "\n"; + dbgs() << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency = " << NewRootDepth + NewRootLatency << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack " + dbgs() << " RootDepth + RootLatency + RootSlack = " << RootDepth + RootLatency + RootSlack << "\n";); - /// True when the new sequence does not lenghten the critical path. - return ((NewRootDepth + NewRootLatency) <= - (RootDepth + RootLatency + RootSlack)); + unsigned NewCycleCount = NewRootDepth + NewRootLatency; + unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; + + return NewCycleCount <= OldCycleCount; } /// helper routine to convert instructions into SC @@ -261,11 +296,14 @@ void MachineCombiner::instr2instrSC( InstrsSC.push_back(SC); } } + /// True when the new instructions do not increase resource length bool MachineCombiner::preservesResourceLen( MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs) { + if (!TSchedModel.hasInstrSchedModel()) + return true; // Compute current resource length @@ -284,7 +322,7 @@ bool MachineCombiner::preservesResourceLen( ArrayRef MSCInsArr = makeArrayRef(InsInstrsSC); ArrayRef MSCDelArr = makeArrayRef(DelInstrsSC); - // Compute new resource length + // Compute new resource length. unsigned ResLenAfterCombine = BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); @@ -300,7 +338,7 @@ bool MachineCombiner::preservesResourceLen( bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { if (OptSize && (NewSize < OldSize)) return true; - if (!TSchedModel.hasInstrSchedModel()) + if (!TSchedModel.hasInstrSchedModelOrItineraries()) return true; return false; } @@ -322,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { auto &MI = *BlockIter++; DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); - SmallVector Pattern; + SmallVector Patterns; // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -345,56 +383,58 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { // // The algorithm does not try to evaluate all patterns and pick the best. // This is only an artificial restriction though. In practice there is - // mostly one pattern and hasPattern() can order patterns based on an - // internal cost heuristic. - - if (TII->hasPattern(MI, Pattern)) { - for (auto P : Pattern) { - SmallVector InsInstrs; - SmallVector DelInstrs; - DenseMap InstrIdxForVirtReg; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + // mostly one pattern, and getMachineCombinerPatterns() can order patterns + // based on an internal cost heuristic. + + if (!TII->getMachineCombinerPatterns(MI, Patterns)) + continue; + + for (auto P : Patterns) { + SmallVector InsInstrs; + SmallVector DelInstrs; + DenseMap InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, P) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); Traces->verifyAnalysis(); - TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, - InstrIdxForVirtReg); - // Found pattern, but did not generate alternative sequence. - // This can happen e.g. when an immediate could not be materialized - // in a single instruction. - if (!InsInstrs.size()) - continue; - // Substitute when we optimize for codesize and the new sequence has - // fewer instructions OR - // the new sequence neither lengthens the critical path nor increases - // resource pressure. - if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || - (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg) && - preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { - for (auto *InstrPtr : InsInstrs) - MBB->insert((MachineBasicBlock::iterator) & MI, - (MachineInstr *)InstrPtr); - for (auto *InstrPtr : DelInstrs) - InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); - - Changed = true; - ++NumInstCombined; - - Traces->invalidate(MBB); - Traces->verifyAnalysis(); - // Eagerly stop after the first pattern fired - break; - } else { - // Cleanup instructions of the alternative code sequence. There is no - // use for them. - for (auto *InstrPtr : InsInstrs) { - MachineFunction *MF = MBB->getParent(); - MF->DeleteMachineInstr((MachineInstr *)InstrPtr); - } - } - InstrIdxForVirtReg.clear(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); } + InstrIdxForVirtReg.clear(); } } @@ -409,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); Traces = &getAnalysis(); - MinInstr = 0; - - OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + MinInstr = nullptr; + OptSize = MF.getFunction()->optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) {