X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=utils%2FTableGen%2FCodeGenRegisters.cpp;h=ca316e96a21adf462180ee329e2bb1766aeeddb7;hb=HEAD;hp=31e152f47da9ade370f697481f0a4fbe8dcea417;hpb=4496906657283b0ed86319a31247beec8c78d4fe;p=oota-llvm.git diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp index 31e152f47da..ca316e96a21 100644 --- a/utils/TableGen/CodeGenRegisters.cpp +++ b/utils/TableGen/CodeGenRegisters.cpp @@ -108,7 +108,7 @@ CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum) EnumValue(Enum), CostPerUse(R->getValueAsInt("CostPerUse")), CoveredBySubRegs(R->getValueAsBit("CoveredBySubRegs")), - NumNativeRegUnits(0), + HasDisjunctSubRegs(false), SubRegsComplete(false), SuperRegsComplete(false), TopoSig(~0u) @@ -146,17 +146,18 @@ void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) { } const std::string &CodeGenRegister::getName() const { + assert(TheDef && "no def"); return TheDef->getName(); } namespace { // Iterate over all register units in a set of registers. class RegUnitIterator { - CodeGenRegister::Set::const_iterator RegI, RegE; - CodeGenRegister::RegUnitList::const_iterator UnitI, UnitE; + CodeGenRegister::Vec::const_iterator RegI, RegE; + CodeGenRegister::RegUnitList::iterator UnitI, UnitE; public: - RegUnitIterator(const CodeGenRegister::Set &Regs): + RegUnitIterator(const CodeGenRegister::Vec &Regs): RegI(Regs.begin()), RegE(Regs.end()), UnitI(), UnitE() { if (RegI != RegE) { @@ -191,32 +192,23 @@ protected: }; } // namespace -// Merge two RegUnitLists maintaining the order and removing duplicates. -// Overwrites MergedRU in the process. -static void mergeRegUnits(CodeGenRegister::RegUnitList &MergedRU, - const CodeGenRegister::RegUnitList &RRU) { - CodeGenRegister::RegUnitList LRU = MergedRU; - MergedRU.clear(); - std::set_union(LRU.begin(), LRU.end(), RRU.begin(), RRU.end(), - std::back_inserter(MergedRU)); -} - // Return true of this unit appears in RegUnits. static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) { - return std::count(RegUnits.begin(), RegUnits.end(), Unit); + return RegUnits.test(Unit); } // Inherit register units from subregisters. // Return true if the RegUnits changed. bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) { - unsigned OldNumUnits = RegUnits.size(); + bool changed = false; for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end(); I != E; ++I) { CodeGenRegister *SR = I->second; // Merge the subregister's units into this register's RegUnits. - mergeRegUnits(RegUnits, SR->RegUnits); + changed |= (RegUnits |= SR->RegUnits); } - return OldNumUnits != RegUnits.size(); + + return changed; } const CodeGenRegister::SubRegMap & @@ -226,6 +218,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { return SubRegs; SubRegsComplete = true; + HasDisjunctSubRegs = ExplicitSubRegs.size() > 1; + // First insert the explicit subregs and make sure they are fully indexed. for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { CodeGenRegister *SR = ExplicitSubRegs[i]; @@ -246,6 +240,7 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { CodeGenRegister *SR = ExplicitSubRegs[i]; const SubRegMap &Map = SR->computeSubRegs(RegBank); + HasDisjunctSubRegs |= SR->HasDisjunctSubRegs; for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE; ++SI) { @@ -362,14 +357,8 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // sub-registers, the other registers won't contribute any more units. for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) { CodeGenRegister *SR = ExplicitSubRegs[i]; - // Explicit sub-registers are usually disjoint, so this is a good way of - // computing the union. We may pick up a few duplicates that will be - // eliminated below. - unsigned N = RegUnits.size(); - RegUnits.append(SR->RegUnits.begin(), SR->RegUnits.end()); - std::inplace_merge(RegUnits.begin(), RegUnits.begin() + N, RegUnits.end()); + RegUnits |= SR->RegUnits; } - RegUnits.erase(std::unique(RegUnits.begin(), RegUnits.end()), RegUnits.end()); // Absent any ad hoc aliasing, we create one register unit per leaf register. // These units correspond to the maximal cliques in the register overlap @@ -388,19 +377,19 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) { // Create a RegUnit representing this alias edge, and add it to both // registers. unsigned Unit = RegBank.newRegUnit(this, AR); - RegUnits.push_back(Unit); - AR->RegUnits.push_back(Unit); + RegUnits.set(Unit); + AR->RegUnits.set(Unit); } // Finally, create units for leaf registers without ad hoc aliases. Note that // a leaf register with ad hoc aliases doesn't get its own unit - it isn't // necessary. This means the aliasing leaf registers can share a single unit. if (RegUnits.empty()) - RegUnits.push_back(RegBank.newRegUnit(this)); + RegUnits.set(RegBank.newRegUnit(this)); // We have now computed the native register units. More may be adopted later // for balancing purposes. - NumNativeRegUnits = RegUnits.size(); + NativeRegUnits = RegUnits; return SubRegs; } @@ -534,7 +523,7 @@ CodeGenRegister::addSubRegsPreOrder(SetVector &OSet, // Get the sum of this register's unit weights. unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const { unsigned Weight = 0; - for (RegUnitList::const_iterator I = RegUnits.begin(), E = RegUnits.end(); + for (RegUnitList::iterator I = RegUnits.begin(), E = RegUnits.end(); I != E; ++I) { Weight += RegBank.getRegUnit(*I).Weight; } @@ -554,7 +543,7 @@ struct TupleExpander : SetTheory::Expander { std::vector Indices = Def->getValueAsListOfDefs("SubRegIndices"); unsigned Dim = Indices.size(); ListInit *SubRegs = Def->getValueAsListInit("SubRegs"); - if (Dim != SubRegs->getSize()) + if (Dim != SubRegs->size()) PrintFatalError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch"); if (Dim < 2) PrintFatalError(Def->getLoc(), @@ -657,17 +646,21 @@ struct TupleExpander : SetTheory::Expander { // CodeGenRegisterClass //===----------------------------------------------------------------------===// +static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) { + std::sort(M.begin(), M.end(), deref()); + M.erase(std::unique(M.begin(), M.end(), deref()), M.end()); +} + CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) : TheDef(R), Name(R->getName()), TopoSigs(RegBank.getNumTopoSigs()), - EnumValue(-1) { + EnumValue(-1), + LaneMask(0) { // Rename anonymous register classes. if (R->getName().size() > 9 && R->getName()[9] == '.') { static unsigned AnonCounter = 0; - R->setName("AnonRegClass_" + utostr(AnonCounter)); - // MSVC2012 ICEs if AnonCounter++ is directly passed to utostr. - ++AnonCounter; + R->setName("AnonRegClass_" + utostr(AnonCounter++)); } std::vector TypeList = R->getValueAsListOfDefs("RegTypes"); @@ -689,9 +682,10 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) for (unsigned i = 0, e = Elements->size(); i != e; ++i) { Orders[0].push_back((*Elements)[i]); const CodeGenRegister *Reg = RegBank.getReg((*Elements)[i]); - Members.insert(Reg); + Members.push_back(Reg); TopoSigs.set(Reg->getTopoSig()); } + sortAndUniqueRegisters(Members); // Alternative allocation orders may be subsets. SetTheory::RecSet Order; @@ -717,6 +711,10 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R) CopyCost = R->getValueAsInt("CopyCost"); Allocatable = R->getValueAsBit("isAllocatable"); AltOrderSelect = R->getValueAsString("AltOrderSelect"); + int AllocationPriority = R->getValueAsInt("AllocationPriority"); + if (AllocationPriority < 0 || AllocationPriority > 63) + PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,63]"); + this->AllocationPriority = AllocationPriority; } // Create an inferred register class that was missing from the .td files. @@ -732,10 +730,10 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, SpillSize(Props.SpillSize), SpillAlignment(Props.SpillAlignment), CopyCost(0), - Allocatable(true) { - for (CodeGenRegister::Set::iterator I = Members.begin(), E = Members.end(); - I != E; ++I) - TopoSigs.set((*I)->getTopoSig()); + Allocatable(true), + AllocationPriority(0) { + for (const auto R : Members) + TopoSigs.set(R->getTopoSig()); } // Compute inherited propertied for a synthesized register class. @@ -753,6 +751,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { CopyCost = Super.CopyCost; Allocatable = Super.Allocatable; AltOrderSelect = Super.AltOrderSelect; + AllocationPriority = Super.AllocationPriority; // Copy all allocation orders, filter out foreign registers from the larger // super-class. @@ -764,15 +763,15 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) { } bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const { - return Members.count(Reg); + return std::binary_search(Members.begin(), Members.end(), Reg, + deref()); } namespace llvm { raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) { OS << "{ S=" << K.SpillSize << ", A=" << K.SpillAlignment; - for (CodeGenRegister::Set::const_iterator I = K.Members->begin(), - E = K.Members->end(); I != E; ++I) - OS << ", " << (*I)->getName(); + for (const auto R : *K.Members) + OS << ", " << R->getName(); return OS << " }"; } } @@ -798,10 +797,10 @@ operator<(const CodeGenRegisterClass::Key &B) const { static bool testSubClass(const CodeGenRegisterClass *A, const CodeGenRegisterClass *B) { return A->SpillAlignment && B->SpillAlignment % A->SpillAlignment == 0 && - A->SpillSize <= B->SpillSize && - std::includes(A->getMembers().begin(), A->getMembers().end(), - B->getMembers().begin(), B->getMembers().end(), - CodeGenRegister::Less()); + A->SpillSize <= B->SpillSize && + std::includes(A->getMembers().begin(), A->getMembers().end(), + B->getMembers().begin(), B->getMembers().end(), + deref()); } /// Sorting predicate for register classes. This provides a topological @@ -925,7 +924,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) { // Configure register Sets to understand register classes and tuples. Sets.addFieldExpander("RegisterClass", "MemberList"); Sets.addFieldExpander("CalleeSavedRegs", "SaveList"); - Sets.addExpander("RegisterTuples", new TupleExpander()); + Sets.addExpander("RegisterTuples", llvm::make_unique()); // Read in the user-defined (named) sub-register indices. // More indices will be synthesized later. @@ -995,7 +994,7 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) { // Allocate user-defined register classes. for (auto *RC : RCs) { - RegClasses.push_back(CodeGenRegisterClass(*this, RC)); + RegClasses.emplace_back(*this, RC); addToMaps(&RegClasses.back()); } @@ -1048,7 +1047,7 @@ void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) { // Create a synthetic sub-class if it is missing. CodeGenRegisterClass* CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC, - const CodeGenRegister::Set *Members, + const CodeGenRegister::Vec *Members, StringRef Name) { // Synthetic sub-class has the same size and alignment as RC. CodeGenRegisterClass::Key K(Members, RC->SpillSize, RC->SpillAlignment); @@ -1057,7 +1056,7 @@ CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC, return FoundI->second; // Sub-class doesn't exist, create a new one. - RegClasses.push_back(CodeGenRegisterClass(*this, Name, K)); + RegClasses.emplace_back(*this, Name, K); addToMaps(&RegClasses.back()); return &RegClasses.back(); } @@ -1165,32 +1164,89 @@ void CodeGenRegBank::computeComposites() { // // Conservatively share a lane mask bit if two sub-register indices overlap in // some registers, but not in others. That shouldn't happen a lot. -void CodeGenRegBank::computeSubRegIndexLaneMasks() { +void CodeGenRegBank::computeSubRegLaneMasks() { // First assign individual bits to all the leaf indices. unsigned Bit = 0; // Determine mask of lanes that cover their registers. CoveringLanes = ~0u; for (auto &Idx : SubRegIndices) { if (Idx.getComposites().empty()) { + if (Bit > 32) { + PrintFatalError( + Twine("Ran out of lanemask bits to represent subregister ") + + Idx.getName()); + } Idx.LaneMask = 1u << Bit; - // Share bit 31 in the unlikely case there are more than 32 leafs. - // - // Sharing bits is harmless; it allows graceful degradation in targets - // with more than 32 vector lanes. They simply get a limited resolution - // view of lanes beyond the 32nd. - // - // See also the comment for getSubRegIndexLaneMask(). - if (Bit < 31) - ++Bit; - else - // Once bit 31 is shared among multiple leafs, the 'lane' it represents - // is no longer covering its registers. - CoveringLanes &= ~(1u << Bit); + ++Bit; } else { Idx.LaneMask = 0; } } + // Compute transformation sequences for composeSubRegIndexLaneMask. The idea + // here is that for each possible target subregister we look at the leafs + // in the subregister graph that compose for this target and create + // transformation sequences for the lanemasks. Each step in the sequence + // consists of a bitmask and a bitrotate operation. As the rotation amounts + // are usually the same for many subregisters we can easily combine the steps + // by combining the masks. + for (const auto &Idx : SubRegIndices) { + const auto &Composites = Idx.getComposites(); + auto &LaneTransforms = Idx.CompositionLaneMaskTransform; + // Go through all leaf subregisters and find the ones that compose with Idx. + // These make out all possible valid bits in the lane mask we want to + // transform. Looking only at the leafs ensure that only a single bit in + // the mask is set. + unsigned NextBit = 0; + for (auto &Idx2 : SubRegIndices) { + // Skip non-leaf subregisters. + if (!Idx2.getComposites().empty()) + continue; + // Replicate the behaviour from the lane mask generation loop above. + unsigned SrcBit = NextBit; + unsigned SrcMask = 1u << SrcBit; + if (NextBit < 31) + ++NextBit; + assert(Idx2.LaneMask == SrcMask); + + // Get the composed subregister if there is any. + auto C = Composites.find(&Idx2); + if (C == Composites.end()) + continue; + const CodeGenSubRegIndex *Composite = C->second; + // The Composed subreg should be a leaf subreg too + assert(Composite->getComposites().empty()); + + // Create Mask+Rotate operation and merge with existing ops if possible. + unsigned DstBit = Log2_32(Composite->LaneMask); + int Shift = DstBit - SrcBit; + uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift : 32+Shift; + for (auto &I : LaneTransforms) { + if (I.RotateLeft == RotateLeft) { + I.Mask |= SrcMask; + SrcMask = 0; + } + } + if (SrcMask != 0) { + MaskRolPair MaskRol = { SrcMask, RotateLeft }; + LaneTransforms.push_back(MaskRol); + } + } + // Optimize if the transformation consists of one step only: Set mask to + // 0xffffffff (including some irrelevant invalid bits) so that it should + // merge with more entries later while compressing the table. + if (LaneTransforms.size() == 1) + LaneTransforms[0].Mask = ~0u; + + // Further compression optimization: For invalid compositions resulting + // in a sequence with 0 entries we can just pick any other. Choose + // Mask 0xffffffff with Rotation 0. + if (LaneTransforms.size() == 0) { + MaskRolPair P = { ~0u, 0 }; + LaneTransforms.push_back(P); + } + } + // FIXME: What if ad-hoc aliasing introduces overlaps that aren't represented // by the sub-register graph? This doesn't occur in any known targets. @@ -1202,6 +1258,23 @@ void CodeGenRegBank::computeSubRegIndexLaneMasks() { if (!Idx.AllSuperRegsCovered) CoveringLanes &= ~Mask; } + + // Compute lane mask combinations for register classes. + for (auto &RegClass : RegClasses) { + unsigned LaneMask = 0; + for (const auto &SubRegIndex : SubRegIndices) { + if (RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr) + continue; + LaneMask |= SubRegIndex.LaneMask; + } + + // For classes without any subregisters set LaneMask to ~0u instead of 0. + // This makes it easier for client code to handle classes uniformly. + if (LaneMask == 0) + LaneMask = ~0u; + + RegClass.LaneMask = LaneMask; + } } namespace { @@ -1222,7 +1295,7 @@ namespace { // for which the unit weight equals the set weight. These units should not have // their weight increased. struct UberRegSet { - CodeGenRegister::Set Regs; + CodeGenRegister::Vec Regs; unsigned Weight; CodeGenRegister::RegUnitList SingularDeterminants; @@ -1251,7 +1324,7 @@ static void computeUberSets(std::vector &UberSets, if (!RegClass.Allocatable) continue; - const CodeGenRegister::Set &Regs = RegClass.getMembers(); + const CodeGenRegister::Vec &Regs = RegClass.getMembers(); if (Regs.empty()) continue; @@ -1259,8 +1332,7 @@ static void computeUberSets(std::vector &UberSets, assert(USetID && "register number 0 is invalid"); AllocatableRegs.insert((*Regs.begin())->EnumValue); - for (CodeGenRegister::Set::const_iterator I = std::next(Regs.begin()), - E = Regs.end(); I != E; ++I) { + for (auto I = std::next(Regs.begin()), E = Regs.end(); I != E; ++I) { AllocatableRegs.insert((*I)->EnumValue); UberSetIDs.join(USetID, (*I)->EnumValue); } @@ -1290,7 +1362,8 @@ static void computeUberSets(std::vector &UberSets, USetID = 0; UberRegSet *USet = &UberSets[USetID]; - USet->Regs.insert(&Reg); + USet->Regs.push_back(&Reg); + sortAndUniqueRegisters(USet->Regs); RegSets[i++] = USet; } } @@ -1324,22 +1397,18 @@ static void computeUberWeights(std::vector &UberSets, if (I->Weight != MaxWeight) { DEBUG( dbgs() << "UberSet " << I - UberSets.begin() << " Weight " << MaxWeight; - for (CodeGenRegister::Set::iterator - UnitI = I->Regs.begin(), UnitE = I->Regs.end(); - UnitI != UnitE; ++UnitI) { - dbgs() << " " << (*UnitI)->getName(); - } + for (auto &Unit : I->Regs) + dbgs() << " " << Unit->getName(); dbgs() << "\n"); // Update the set weight. I->Weight = MaxWeight; } // Find singular determinants. - for (CodeGenRegister::Set::iterator RegI = I->Regs.begin(), - RegE = I->Regs.end(); RegI != RegE; ++RegI) { - if ((*RegI)->getRegUnits().size() == 1 - && (*RegI)->getWeight(RegBank) == I->Weight) - mergeRegUnits(I->SingularDeterminants, (*RegI)->getRegUnits()); + for (const auto R : I->Regs) { + if (R->getRegUnits().count() == 1 && R->getWeight(RegBank) == I->Weight) { + I->SingularDeterminants |= R->getRegUnits(); + } } } } @@ -1357,13 +1426,14 @@ static void computeUberWeights(std::vector &UberSets, static bool normalizeWeight(CodeGenRegister *Reg, std::vector &UberSets, std::vector &RegSets, - std::set &NormalRegs, + SparseBitVector<> &NormalRegs, CodeGenRegister::RegUnitList &NormalUnits, CodeGenRegBank &RegBank) { - bool Changed = false; - if (!NormalRegs.insert(Reg->EnumValue).second) - return Changed; + if (NormalRegs.test(Reg->EnumValue)) + return false; + NormalRegs.set(Reg->EnumValue); + bool Changed = false; const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs(); for (CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(), SRE = SRM.end(); SRI != SRE; ++SRI) { @@ -1387,8 +1457,8 @@ static bool normalizeWeight(CodeGenRegister *Reg, // A register unit's weight can be adjusted only if it is the singular unit // for this register, has not been used to normalize a subregister's set, // and has not already been used to singularly determine this UberRegSet. - unsigned AdjustUnit = Reg->getRegUnits().front(); - if (Reg->getRegUnits().size() != 1 + unsigned AdjustUnit = *Reg->getRegUnits().begin(); + if (Reg->getRegUnits().count() != 1 || hasRegUnit(NormalUnits, AdjustUnit) || hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) { // We don't have an adjustable unit, so adopt a new one. @@ -1406,7 +1476,7 @@ static bool normalizeWeight(CodeGenRegister *Reg, } // Mark these units normalized so superregisters can't change their weights. - mergeRegUnits(NormalUnits, Reg->getRegUnits()); + NormalUnits |= Reg->getRegUnits(); return Changed; } @@ -1431,7 +1501,7 @@ void CodeGenRegBank::computeRegUnitWeights() { Changed = false; for (auto &Reg : Registers) { CodeGenRegister::RegUnitList NormalUnits; - std::set NormalRegs; + SparseBitVector<> NormalRegs; Changed |= normalizeWeight(&Reg, UberSets, RegSets, NormalRegs, NormalUnits, *this); } @@ -1497,6 +1567,12 @@ void CodeGenRegBank::pruneUnitSets() { && UnitWeight == RegUnits[SuperSet.Units.back()].Weight) { DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx << "\n"); + // We can pick any of the set names for the merged set. Go for the + // shortest one to avoid picking the name of one of the classes that are + // artificially created by tablegen. So "FPR128_lo" instead of + // "QQQQ_with_qsub3_in_FPR128_lo". + if (RegUnitSets[SubIdx].Name.size() < RegUnitSets[SuperIdx].Name.size()) + RegUnitSets[SuperIdx].Name = RegUnitSets[SubIdx].Name; break; } } @@ -1548,9 +1624,8 @@ void CodeGenRegBank::computeRegUnitSets() { USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; - ArrayRef Units = RegUnitSets[USIdx].Units; - for (unsigned i = 0, e = Units.size(); i < e; ++i) - dbgs() << " " << RegUnits[Units[i]].Roots[0]->getName(); + for (auto &U : RegUnitSets[USIdx].Units) + dbgs() << " " << RegUnits[U].Roots[0]->getName(); dbgs() << "\n"; }); @@ -1562,9 +1637,8 @@ void CodeGenRegBank::computeRegUnitSets() { USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; - ArrayRef Units = RegUnitSets[USIdx].Units; - for (unsigned i = 0, e = Units.size(); i < e; ++i) - dbgs() << " " << RegUnits[Units[i]].Roots[0]->getName(); + for (auto &U : RegUnitSets[USIdx].Units) + dbgs() << " " << RegUnits[U].Roots[0]->getName(); dbgs() << "\n"; } dbgs() << "\nUnion sets:\n"); @@ -1609,9 +1683,8 @@ void CodeGenRegBank::computeRegUnitSets() { else { DEBUG(dbgs() << "UnitSet " << RegUnitSets.size()-1 << " " << RegUnitSets.back().Name << ":"; - ArrayRef Units = RegUnitSets.back().Units; - for (unsigned i = 0, e = Units.size(); i < e; ++i) - dbgs() << " " << RegUnits[Units[i]].Roots[0]->getName(); + for (auto &U : RegUnitSets.back().Units) + dbgs() << " " << RegUnits[U].Roots[0]->getName(); dbgs() << "\n";); } } @@ -1625,9 +1698,8 @@ void CodeGenRegBank::computeRegUnitSets() { USIdx < USEnd; ++USIdx) { dbgs() << "UnitSet " << USIdx << " " << RegUnitSets[USIdx].Name << ":"; - ArrayRef Units = RegUnitSets[USIdx].Units; - for (unsigned i = 0, e = Units.size(); i < e; ++i) - dbgs() << " " << RegUnits[Units[i]].Roots[0]->getName(); + for (auto &U : RegUnitSets[USIdx].Units) + dbgs() << " " << RegUnits[U].Roots[0]->getName(); dbgs() << "\n"; }); @@ -1648,8 +1720,8 @@ void CodeGenRegBank::computeRegUnitSets() { continue; DEBUG(dbgs() << "RC " << RC.getName() << " Units: \n"; - for (unsigned i = 0, e = RCRegUnits.size(); i < e; ++i) dbgs() - << RegUnits[RCRegUnits[i]].getRoots()[0]->getName() << " "; + for (auto &U : RCRegUnits) + dbgs() << RegUnits[U].getRoots()[0]->getName() << " "; dbgs() << "\n UnitSetIDs:"); // Find all supersets. @@ -1694,9 +1766,47 @@ void CodeGenRegBank::computeRegUnitSets() { } } +void CodeGenRegBank::computeRegUnitLaneMasks() { + for (auto &Register : Registers) { + // Create an initial lane mask for all register units. + const auto &RegUnits = Register.getRegUnits(); + CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks(RegUnits.count(), 0); + // Iterate through SubRegisters. + typedef CodeGenRegister::SubRegMap SubRegMap; + const SubRegMap &SubRegs = Register.getSubRegs(); + for (SubRegMap::const_iterator S = SubRegs.begin(), + SE = SubRegs.end(); S != SE; ++S) { + CodeGenRegister *SubReg = S->second; + // Ignore non-leaf subregisters, their lane masks are fully covered by + // the leaf subregisters anyway. + if (SubReg->getSubRegs().size() != 0) + continue; + CodeGenSubRegIndex *SubRegIndex = S->first; + const CodeGenRegister *SubRegister = S->second; + unsigned LaneMask = SubRegIndex->LaneMask; + // Distribute LaneMask to Register Units touched. + for (unsigned SUI : SubRegister->getRegUnits()) { + bool Found = false; + unsigned u = 0; + for (unsigned RU : RegUnits) { + if (SUI == RU) { + RegUnitLaneMasks[u] |= LaneMask; + assert(!Found); + Found = true; + } + ++u; + } + (void)Found; + assert(Found); + } + } + Register.setRegUnitLaneMasks(RegUnitLaneMasks); + } +} + void CodeGenRegBank::computeDerivedInfo() { computeComposites(); - computeSubRegIndexLaneMasks(); + computeSubRegLaneMasks(); // Compute a weight for each register unit created during getSubRegs. // This may create adopted register units (with unit # >= NumNativeRegUnits). @@ -1706,6 +1816,15 @@ void CodeGenRegBank::computeDerivedInfo() { // supersets for the union of overlapping sets. computeRegUnitSets(); + computeRegUnitLaneMasks(); + + // Compute register class HasDisjunctSubRegs flag. + for (CodeGenRegisterClass &RC : RegClasses) { + RC.HasDisjunctSubRegs = false; + for (const CodeGenRegister *Reg : RC.getMembers()) + RC.HasDisjunctSubRegs |= Reg->HasDisjunctSubRegs; + } + // Get the weight of each set. for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) RegUnitSets[Idx].Weight = getRegUnitSetWeight(RegUnitSets[Idx].Units); @@ -1743,13 +1862,12 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { continue; // Compute the set intersection of RC1 and RC2. - const CodeGenRegister::Set &Memb1 = RC1->getMembers(); - const CodeGenRegister::Set &Memb2 = RC2->getMembers(); - CodeGenRegister::Set Intersection; - std::set_intersection(Memb1.begin(), Memb1.end(), - Memb2.begin(), Memb2.end(), - std::inserter(Intersection, Intersection.begin()), - CodeGenRegister::Less()); + const CodeGenRegister::Vec &Memb1 = RC1->getMembers(); + const CodeGenRegister::Vec &Memb2 = RC2->getMembers(); + CodeGenRegister::Vec Intersection; + std::set_intersection( + Memb1.begin(), Memb1.end(), Memb2.begin(), Memb2.end(), + std::inserter(Intersection, Intersection.begin()), deref()); // Skip disjoint class pairs. if (Intersection.empty()) @@ -1775,19 +1893,21 @@ void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) { // void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) { // Map SubRegIndex to set of registers in RC supporting that SubRegIndex. - typedef std::map SubReg2SetMap; + typedef std::map> SubReg2SetMap; // Compute the set of registers supporting each SubRegIndex. SubReg2SetMap SRSets; - for (CodeGenRegister::Set::const_iterator RI = RC->getMembers().begin(), - RE = RC->getMembers().end(); RI != RE; ++RI) { - const CodeGenRegister::SubRegMap &SRM = (*RI)->getSubRegs(); + for (const auto R : RC->getMembers()) { + const CodeGenRegister::SubRegMap &SRM = R->getSubRegs(); for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(), E = SRM.end(); I != E; ++I) - SRSets[I->first].insert(*RI); + SRSets[I->first].push_back(R); } + for (auto I : SRSets) + sortAndUniqueRegisters(I.second); + // Find matching classes for all SRSets entries. Iterate in SubRegIndex // numerical order to visit synthetic indices last. for (const auto &SubIdx : SubRegIndices) { @@ -1832,9 +1952,7 @@ void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC, // Build list of (Super, Sub) pairs for this SubIdx. SSPairs.clear(); TopoSigs.reset(); - for (CodeGenRegister::Set::const_iterator RI = RC->getMembers().begin(), - RE = RC->getMembers().end(); RI != RE; ++RI) { - const CodeGenRegister *Super = *RI; + for (const auto Super : RC->getMembers()) { const CodeGenRegister *Sub = Super->getSubRegs().find(&SubIdx)->second; assert(Sub && "Missing sub-register"); SSPairs.push_back(std::make_pair(Super, Sub)); @@ -1853,22 +1971,26 @@ void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC, if (!TopoSigs.anyCommon(SubRC.getTopoSigs())) continue; // Compute the subset of RC that maps into SubRC. - CodeGenRegister::Set SubSet; + CodeGenRegister::Vec SubSetVec; for (unsigned i = 0, e = SSPairs.size(); i != e; ++i) if (SubRC.contains(SSPairs[i].second)) - SubSet.insert(SSPairs[i].first); - if (SubSet.empty()) + SubSetVec.push_back(SSPairs[i].first); + + if (SubSetVec.empty()) continue; + // RC injects completely into SubRC. - if (SubSet.size() == SSPairs.size()) { + sortAndUniqueRegisters(SubSetVec); + if (SubSetVec.size() == SSPairs.size()) { SubRC.addSuperRegClass(&SubIdx, RC); continue; } + // Only a subset of RC maps into SubRC. Make sure it is represented by a // class. - getOrCreateSubClass(RC, &SubSet, RC->getName() + "_with_" + - SubIdx.getName() + "_in_" + - SubRC.getName()); + getOrCreateSubClass(RC, &SubSetVec, RC->getName() + "_with_" + + SubIdx.getName() + "_in_" + + SubRC.getName()); } } }