X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FProfileData%2FInstrProfReader.cpp;h=6f201243736d09baecce19d27c1fa7478a70add8;hb=7b81b822ba8418a27225056bff9229b2d6c7dca9;hp=c563355599f103814c16e4530f5173df9186cdc6;hpb=ddfcb21b3f728fd569e106f43c1bcaed356223e4;p=oota-llvm.git diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index c563355599f..6f201243736 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -13,48 +13,85 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/ProfileData/InstrProf.h" - +#include "llvm/ADT/STLExtras.h" #include using namespace llvm; -static uint64_t getRawMagic() { - return - uint64_t('l') << 56 | - uint64_t('p') << 48 | - uint64_t('r') << 40 | - uint64_t('o') << 32 | - uint64_t('f') << 24 | - uint64_t('r') << 16 | - uint64_t('a') << 8 | - uint64_t('w'); -} - -error_code InstrProfReader::create(std::string Path, - std::unique_ptr &Result) { - std::unique_ptr Buffer; - if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer)) +static ErrorOr> +setupMemoryBuffer(std::string Path) { + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + return std::move(BufferOrErr.get()); +} + +static std::error_code initializeReader(InstrProfReader &Reader) { + return Reader.readHeader(); +} + +ErrorOr> +InstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) return EC; + return InstrProfReader::create(std::move(BufferOrError.get())); +} - // Sanity check the file. +ErrorOr> +InstrProfReader::create(std::unique_ptr Buffer) { + // Sanity check the buffer. if (Buffer->getBufferSize() > std::numeric_limits::max()) return instrprof_error::too_large; - if (Buffer->getBufferSize() < sizeof(uint64_t)) { - Result.reset(new TextInstrProfReader(Buffer)); - Result->readHeader(); - return instrprof_error::success; - } - - uint64_t Magic = *(uint64_t *)Buffer->getBufferStart(); - uint64_t SwappedMagic = sys::SwapByteOrder(Magic); - if (Magic == getRawMagic() || SwappedMagic == getRawMagic()) - Result.reset(new RawInstrProfReader(Buffer)); + std::unique_ptr Result; + // Create the reader. + if (IndexedInstrProfReader::hasFormat(*Buffer)) + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + else if (RawInstrProfReader64::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader64(std::move(Buffer))); + else if (RawInstrProfReader32::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader32(std::move(Buffer))); + else if (TextInstrProfReader::hasFormat(*Buffer)) + Result.reset(new TextInstrProfReader(std::move(Buffer))); else - Result.reset(new TextInstrProfReader(Buffer)); - Result->readHeader(); - return instrprof_error::success; + return instrprof_error::unrecognized_format; + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); +} + +ErrorOr> +IndexedInstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return IndexedInstrProfReader::create(std::move(BufferOrError.get())); +} + + +ErrorOr> +IndexedInstrProfReader::create(std::unique_ptr Buffer) { + // Sanity check the buffer. + if (Buffer->getBufferSize() > std::numeric_limits::max()) + return instrprof_error::too_large; + + // Create the reader. + if (!IndexedInstrProfReader::hasFormat(*Buffer)) + return instrprof_error::bad_magic; + auto Result = llvm::make_unique(std::move(Buffer)); + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); } void InstrProfIterator::Increment() { @@ -62,9 +99,18 @@ void InstrProfIterator::Increment() { *this = InstrProfIterator(); } -error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { - // Skip empty lines. - while (!Line.is_at_end() && Line->empty()) +bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { + // Verify that this really looks like plain ASCII text by checking a + // 'reasonable' number of characters (up to profile magic size). + size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); + StringRef buffer = Buffer.getBufferStart(); + return count == 0 || std::all_of(buffer.begin(), buffer.begin() + count, + [](char c) { return ::isprint(c) || ::isspace(c); }); +} + +std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Skip empty lines and comments. + while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; // If we hit EOF while looking for a name, we're done. if (Line.is_at_end()) @@ -76,7 +122,7 @@ error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Read the function hash. if (Line.is_at_end()) return error(instrprof_error::truncated); - if ((Line++)->getAsInteger(10, Record.Hash)) + if ((Line++)->getAsInteger(0, Record.Hash)) return error(instrprof_error::malformed); // Read the number of counters. @@ -85,49 +131,77 @@ error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { return error(instrprof_error::truncated); if ((Line++)->getAsInteger(10, NumCounters)) return error(instrprof_error::malformed); + if (NumCounters == 0) + return error(instrprof_error::malformed); // Read each counter and fill our internal storage with the values. - Counts.clear(); - Counts.reserve(NumCounters); + Record.Counts.clear(); + Record.Counts.reserve(NumCounters); for (uint64_t I = 0; I < NumCounters; ++I) { if (Line.is_at_end()) return error(instrprof_error::truncated); uint64_t Count; if ((Line++)->getAsInteger(10, Count)) return error(instrprof_error::malformed); - Counts.push_back(Count); + Record.Counts.push_back(Count); } - // Give the record a reference to our internal counter storage. - Record.Counts = Counts; return success(); } -static uint64_t getRawVersion() { - return 1; +template +bool RawInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < sizeof(uint64_t)) + return false; + uint64_t Magic = + *reinterpret_cast(DataBuffer.getBufferStart()); + return RawInstrProf::getMagic() == Magic || + sys::getSwappedBytes(RawInstrProf::getMagic()) == Magic; } -namespace { + +template +std::error_code RawInstrProfReader::readHeader() { + if (!hasFormat(*DataBuffer)) + return error(instrprof_error::bad_magic); + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) + return error(instrprof_error::bad_header); + auto *Header = reinterpret_cast( + DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic(); + return readHeader(*Header); } -RawInstrProfReader::RawInstrProfReader(std::unique_ptr &DataBuffer) - : DataBuffer(DataBuffer.release()) { } -error_code RawInstrProfReader::readHeader() { - if (DataBuffer->getBufferSize() < sizeof(RawHeader)) - return error(instrprof_error::malformed); - const RawHeader *Header = (RawHeader *)DataBuffer->getBufferStart(); - if (Header->Magic == getRawMagic()) - ShouldSwapBytes = false; - else { - if (sys::SwapByteOrder(Header->Magic) != getRawMagic()) - return error(instrprof_error::malformed); +template +std::error_code +RawInstrProfReader::readNextHeader(const char *CurrentPos) { + const char *End = DataBuffer->getBufferEnd(); + // Skip zero padding between profiles. + while (CurrentPos != End && *CurrentPos == 0) + ++CurrentPos; + // If there's nothing left, we're done. + if (CurrentPos == End) + return instrprof_error::eof; + // If there isn't enough space for another header, this is probably just + // garbage at the end of the file. + if (CurrentPos + sizeof(RawInstrProf::Header) > End) + return instrprof_error::malformed; + // The writer ensures each profile is padded to start at an aligned address. + if (reinterpret_cast(CurrentPos) % alignOf()) + return instrprof_error::malformed; + // The magic should have the same byte order as in the previous header. + uint64_t Magic = *reinterpret_cast(CurrentPos); + if (Magic != swap(RawInstrProf::getMagic())) + return instrprof_error::bad_magic; - ShouldSwapBytes = true; - } + // There's another profile to read, so we need to process the header. + auto *Header = reinterpret_cast(CurrentPos); return readHeader(*Header); } -error_code RawInstrProfReader::readHeader(const RawHeader &Header) { - if (swap(Header.Version) != getRawVersion()) +template +std::error_code RawInstrProfReader::readHeader( + const RawInstrProf::Header &Header) { + if (swap(Header.Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); CountersDelta = swap(Header.CountersDelta); @@ -136,51 +210,305 @@ error_code RawInstrProfReader::readHeader(const RawHeader &Header) { auto CountersSize = swap(Header.CountersSize); auto NamesSize = swap(Header.NamesSize); - ptrdiff_t DataOffset = sizeof(RawHeader); - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); + ptrdiff_t CountersOffset = + DataOffset + sizeof(RawInstrProf::ProfileData) * DataSize; ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; - size_t FileSize = NamesOffset + sizeof(char) * NamesSize; + size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; - if (FileSize != DataBuffer->getBufferSize()) - return error(instrprof_error::malformed); + auto *Start = reinterpret_cast(&Header); + if (Start + ProfileSize > DataBuffer->getBufferEnd()) + return error(instrprof_error::bad_header); - Data = (ProfileData *)(DataBuffer->getBufferStart() + DataOffset); + Data = reinterpret_cast *>( + Start + DataOffset); DataEnd = Data + DataSize; - CountersStart = (uint64_t *)(DataBuffer->getBufferStart() + CountersOffset); - NamesStart = DataBuffer->getBufferStart() + NamesOffset; + CountersStart = reinterpret_cast(Start + CountersOffset); + NamesStart = Start + NamesOffset; + ProfileEnd = Start + ProfileSize; return success(); } -error_code RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { - if (Data == DataEnd) - return error(instrprof_error::eof); +template +std::error_code RawInstrProfReader::readName(InstrProfRecord &Record) { + Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize)); + if (Record.Name.data() < NamesStart || + Record.Name.data() + Record.Name.size() > DataBuffer->getBufferEnd()) + return error(instrprof_error::malformed); - // Get the raw data. - StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); - auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), - swap(Data->NumCounters)); + return success(); +} + +template +std::error_code RawInstrProfReader::readFuncHash( + InstrProfRecord &Record) { + Record.Hash = swap(Data->FuncHash); + return success(); +} + +template +std::error_code RawInstrProfReader::readRawCounts( + InstrProfRecord &Record) { + uint32_t NumCounters = swap(Data->NumCounters); + IntPtrT CounterPtr = Data->CounterPtr; + if (NumCounters == 0) + return error(instrprof_error::malformed); + + auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); + auto *NamesStartAsCounter = reinterpret_cast(NamesStart); // Check bounds. - if (RawName.data() < NamesStart || - RawName.data() + RawName.size() > DataBuffer->getBufferEnd() || - RawCounts.data() < CountersStart || - RawCounts.data() + RawCounts.size() > (uint64_t *)NamesStart) + if (RawCounts.data() < CountersStart || + RawCounts.data() + RawCounts.size() > NamesStartAsCounter) return error(instrprof_error::malformed); - // Store the data in Record, byte-swapping as necessary. - Record.Hash = swap(Data->FuncHash); - Record.Name = RawName; if (ShouldSwapBytes) { - Counts.clear(); - Counts.reserve(RawCounts.size()); + Record.Counts.clear(); + Record.Counts.reserve(RawCounts.size()); for (uint64_t Count : RawCounts) - Counts.push_back(swap(Count)); - Record.Counts = Counts; + Record.Counts.push_back(swap(Count)); } else Record.Counts = RawCounts; + return success(); +} + +template +std::error_code +RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { + if (atEnd()) + if (std::error_code EC = readNextHeader(ProfileEnd)) + return EC; + + // Read name ad set it in Record. + if (std::error_code EC = readName(Record)) + return EC; + + // Read FuncHash and set it in Record. + if (std::error_code EC = readFuncHash(Record)) + return EC; + + // Read raw counts and set Record. + if (std::error_code EC = readRawCounts(Record)) + return EC; + // Iterate. - ++Data; + advanceData(); + return success(); +} + +namespace llvm { +template class RawInstrProfReader; +template class RawInstrProfReader; +} + +InstrProfLookupTrait::hash_value_type +InstrProfLookupTrait::ComputeHash(StringRef K) { + return IndexedInstrProf::ComputeHash(HashType, K); +} + +typedef InstrProfLookupTrait::data_type data_type; +typedef InstrProfLookupTrait::offset_type offset_type; + +bool InstrProfLookupTrait::ReadValueProfilingData( + const unsigned char *&D, const unsigned char *const End) { + ErrorOr> VDataPtrOrErr = + IndexedInstrProf::ValueProfData::getValueProfData( + D, End, ValueProfDataEndianness); + + if (VDataPtrOrErr.getError()) + return false; + + VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), &HashKeys); + D += VDataPtrOrErr.get()->TotalSize; + + return true; +} + +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, + offset_type N) { + // Check if the data is corrupt. If so, don't try to read it. + if (N % sizeof(uint64_t)) + return data_type(); + + DataBuffer.clear(); + std::vector CounterBuffer; + + using namespace support; + const unsigned char *End = D + N; + while (D < End) { + // Read hash. + if (D + sizeof(uint64_t) >= End) + return data_type(); + uint64_t Hash = endian::readNext(D); + + // Initialize number of counters for FormatVersion == 1. + uint64_t CountsSize = N / sizeof(uint64_t) - 1; + // If format version is different then read the number of counters. + if (FormatVersion != 1) { + if (D + sizeof(uint64_t) > End) + return data_type(); + CountsSize = endian::readNext(D); + } + // Read counter values. + if (D + CountsSize * sizeof(uint64_t) > End) + return data_type(); + + CounterBuffer.clear(); + CounterBuffer.reserve(CountsSize); + for (uint64_t J = 0; J < CountsSize; ++J) + CounterBuffer.push_back(endian::readNext(D)); + + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + + // Read value profiling data. + if (FormatVersion > 2 && !ReadValueProfilingData(D, End)) { + DataBuffer.clear(); + return data_type(); + } + } + return DataBuffer; +} + +std::error_code +InstrProfReaderIndex::getRecords(StringRef FuncName, + ArrayRef &Data) { + auto Iter = Index->find(FuncName); + if (Iter == Index->end()) + return instrprof_error::unknown_function; + + Data = (*Iter); + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +std::error_code InstrProfReaderIndex::getRecords( + ArrayRef &Data) { + if (atEnd()) return instrprof_error::eof; + + Data = *RecordIterator; + + if (Data.empty()) return instrprof_error::malformed; + + return instrprof_error::success; +} + +void InstrProfReaderIndex::Init(const unsigned char *Buckets, + const unsigned char *const Payload, + const unsigned char *const Base, + IndexedInstrProf::HashT HashType, + uint64_t Version) { + FormatVersion = Version; + Index.reset(IndexType::Create(Buckets, Payload, Base, + InstrProfLookupTrait(HashType, Version))); + // Form the map of hash values to const char* keys in profiling data. + std::vector> HashKeys; + for (auto Key : Index->keys()) { + const char *KeyTableRef = StringTable.insertString(Key); + HashKeys.push_back(std::make_pair(ComputeHash(HashType, Key), KeyTableRef)); + } + std::sort(HashKeys.begin(), HashKeys.end(), less_first()); + HashKeys.erase(std::unique(HashKeys.begin(), HashKeys.end()), HashKeys.end()); + // Set the hash key map for the InstrLookupTrait + Index->getInfoObj().setHashKeys(std::move(HashKeys)); + RecordIterator = Index->data_begin(); +} + +bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < 8) + return false; + using namespace support; + uint64_t Magic = + endian::read(DataBuffer.getBufferStart()); + // Verify that it's magical. + return Magic == IndexedInstrProf::Magic; +} + +std::error_code IndexedInstrProfReader::readHeader() { + const unsigned char *Start = + (const unsigned char *)DataBuffer->getBufferStart(); + const unsigned char *Cur = Start; + if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) + return error(instrprof_error::truncated); + + using namespace support; + + auto *Header = reinterpret_cast(Cur); + Cur += sizeof(IndexedInstrProf::Header); + + // Check the magic number. + uint64_t Magic = endian::byte_swap(Header->Magic); + if (Magic != IndexedInstrProf::Magic) + return error(instrprof_error::bad_magic); + + // Read the version. + uint64_t FormatVersion = endian::byte_swap(Header->Version); + if (FormatVersion > IndexedInstrProf::Version) + return error(instrprof_error::unsupported_version); + + // Read the maximal function count. + MaxFunctionCount = + endian::byte_swap(Header->MaxFunctionCount); + + // Read the hash type and start offset. + IndexedInstrProf::HashT HashType = static_cast( + endian::byte_swap(Header->HashType)); + if (HashType > IndexedInstrProf::HashT::Last) + return error(instrprof_error::unsupported_hash_type); + + uint64_t HashOffset = endian::byte_swap(Header->HashOffset); + + // The rest of the file is an on disk hash table. + Index.Init(Start + HashOffset, Cur, Start, HashType, FormatVersion); + + return success(); +} + +ErrorOr +IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash) { + ArrayRef Data; + std::error_code EC = Index.getRecords(FuncName, Data); + if (EC != instrprof_error::success) + return EC; + // Found it. Look for counters with the right hash. + for (unsigned I = 0, E = Data.size(); I < E; ++I) { + // Check for a match and fill the vector if there is one. + if (Data[I].Hash == FuncHash) { + return std::move(Data[I]); + } + } + return error(instrprof_error::hash_mismatch); +} + +std::error_code +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector &Counts) { + ErrorOr Record = getInstrProfRecord(FuncName, FuncHash); + if (std::error_code EC = Record.getError()) + return EC; + + Counts = Record.get().Counts; + return success(); +} + +std::error_code IndexedInstrProfReader::readNextRecord( + InstrProfRecord &Record) { + static unsigned RecordIndex = 0; + + ArrayRef Data; + + std::error_code EC = Index.getRecords(Data); + if (EC != instrprof_error::success) + return error(EC); + + Record = Data[RecordIndex++]; + if (RecordIndex >= Data.size()) { + Index.advanceToNextKey(); + RecordIndex = 0; + } return success(); }