From 09717fa07bf6a4cc529885ccb33c8751519efd22 Mon Sep 17 00:00:00 2001 From: Xinliang David Li Date: Sun, 18 Oct 2015 01:02:29 +0000 Subject: [PATCH] Minor Instr PGO code restructuring 1. Key constant values (version, magic) and data structures related to raw and indexed profile format are moved into one centralized file: InstrProf.h. 2. Utility function such as MD5Hash computation is also moved to the common header to allow sharing with other components in the future. 3. A header data structure is introduced for Indexed format so that the reader and writer can always be in sync. 4. Added some comments to document different places where multiple definition of the data structure must be kept in sync (reader/writer, runtime, lowering etc). No functional change is intended. Differential Revision: http://reviews.llvm.org/D13758 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250638 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ProfileData/InstrProf.h | 102 +++++++++++++++++++++ include/llvm/ProfileData/InstrProfReader.h | 22 +---- lib/ProfileData/InstrProfIndexed.h | 56 ----------- lib/ProfileData/InstrProfReader.cpp | 81 ++++++---------- lib/ProfileData/InstrProfWriter.cpp | 20 +++- 5 files changed, 147 insertions(+), 134 deletions(-) delete mode 100644 lib/ProfileData/InstrProfIndexed.h diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 668e6d3663d..f425c6e13ed 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -16,8 +16,11 @@ #ifndef LLVM_PROFILEDATA_INSTRPROF_H_ #define LLVM_PROFILEDATA_INSTRPROF_H_ +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -132,6 +135,105 @@ struct InstrProfRecord { } }; +namespace IndexedInstrProf { +enum class HashT : uint32_t { + MD5, + + Last = MD5 +}; + +static inline uint64_t MD5Hash(StringRef Str) { + MD5 Hash; + Hash.update(Str); + llvm::MD5::MD5Result Result; + Hash.final(Result); + // Return the least significant 8 bytes. Our MD5 implementation returns the + // result in little endian, so we may need to swap bytes. + using namespace llvm::support; + return endian::read(Result); +} + +static inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return IndexedInstrProf::MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" +const uint64_t Version = 3; +const HashT HashType = HashT::MD5; + +struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t MaxFunctionCount; + uint64_t HashType; + uint64_t HashOffset; +}; + +} // end namespace IndexedInstrProf + +namespace RawInstrProf { + +const uint64_t Version = 1; + +// Magic number to detect file format and endianness. +// Use 255 at one end, since no UTF-8 file can use that character. Avoid 0, +// so that utilities, like strings, don't grab it as a string. 129 is also +// invalid UTF-8, and high enough to be interesting. +// Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR" +// for 32-bit platforms. +// The magic and version need to be kept in sync with +// projects/compiler-rt/lib/profile/InstrProfiling.c + +template +inline uint64_t getMagic(); +template <> +inline uint64_t getMagic() { + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 | + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 | + uint64_t('r') << 8 | uint64_t(129); +} + +template <> +inline uint64_t getMagic() { + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 | + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 | + uint64_t('R') << 8 | uint64_t(129); +} + +// The definition should match the structure defined in +// compiler-rt/lib/profile/InstrProfiling.h. +// It should also match the synthesized type in +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. + +template +struct ProfileData { + const uint32_t NameSize; + const uint32_t NumCounters; + const uint64_t FuncHash; + const IntPtrT NamePtr; + const IntPtrT CounterPtr; +}; + +// The definition should match the header referenced in +// compiler-rt/lib/profile/InstrProfilingFile.c and +// InstrProfilingBuffer.c. + +struct Header { + const uint64_t Magic; + const uint64_t Version; + const uint64_t DataSize; + const uint64_t CountersSize; + const uint64_t NamesSize; + const uint64_t CountersDelta; + const uint64_t NamesDelta; +}; + +} // end namespace RawInstrProf + } // end namespace llvm namespace std { diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index c0585d6f6d2..cdead0f112d 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -132,28 +132,12 @@ class RawInstrProfReader : public InstrProfReader { private: /// The profile data file contents. std::unique_ptr DataBuffer; - struct ProfileData { - const uint32_t NameSize; - const uint32_t NumCounters; - const uint64_t FuncHash; - const IntPtrT NamePtr; - const IntPtrT CounterPtr; - }; - struct RawHeader { - const uint64_t Magic; - const uint64_t Version; - const uint64_t DataSize; - const uint64_t CountersSize; - const uint64_t NamesSize; - const uint64_t CountersDelta; - const uint64_t NamesDelta; - }; bool ShouldSwapBytes; uint64_t CountersDelta; uint64_t NamesDelta; - const ProfileData *Data; - const ProfileData *DataEnd; + const RawInstrProf::ProfileData *Data; + const RawInstrProf::ProfileData *DataEnd; const uint64_t *CountersStart; const char *NamesStart; const char *ProfileEnd; @@ -170,7 +154,7 @@ public: private: std::error_code readNextHeader(const char *CurrentPos); - std::error_code readHeader(const RawHeader &Header); + std::error_code readHeader(const RawInstrProf::Header &Header); template IntT swap(IntT Int) const { return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h deleted file mode 100644 index fe0b04ed008..00000000000 --- a/lib/ProfileData/InstrProfIndexed.h +++ /dev/null @@ -1,56 +0,0 @@ -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Shared header for the instrumented profile data reader and writer. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H - -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MD5.h" - -namespace llvm { - -namespace IndexedInstrProf { -enum class HashT : uint32_t { - MD5, - - Last = MD5 -}; - -static inline uint64_t MD5Hash(StringRef Str) { - MD5 Hash; - Hash.update(Str); - llvm::MD5::MD5Result Result; - Hash.final(Result); - // Return the least significant 8 bytes. Our MD5 implementation returns the - // result in little endian, so we may need to swap bytes. - using namespace llvm::support; - return endian::read(Result); -} - -static inline uint64_t ComputeHash(HashT Type, StringRef K) { - switch (Type) { - case HashT::MD5: - return IndexedInstrProf::MD5Hash(K); - } - llvm_unreachable("Unhandled hash type"); -} - -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 3; -const HashT HashType = HashT::MD5; -} - -} // end namespace llvm - -#endif diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 8ad50615a25..61d1f11076c 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfReader.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/STLExtras.h" #include @@ -139,54 +138,25 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { return success(); } -template -static uint64_t getRawMagic(); - -template <> -uint64_t getRawMagic() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('r') << 8 | - uint64_t(129); -} - -template <> -uint64_t getRawMagic() { - return - uint64_t(255) << 56 | - uint64_t('l') << 48 | - uint64_t('p') << 40 | - uint64_t('r') << 32 | - uint64_t('o') << 24 | - uint64_t('f') << 16 | - uint64_t('R') << 8 | - uint64_t(129); -} - template bool RawInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { if (DataBuffer.getBufferSize() < sizeof(uint64_t)) return false; uint64_t Magic = *reinterpret_cast(DataBuffer.getBufferStart()); - return getRawMagic() == Magic || - sys::getSwappedBytes(getRawMagic()) == Magic; + return RawInstrProf::getMagic() == Magic || + sys::getSwappedBytes(RawInstrProf::getMagic()) == Magic; } template std::error_code RawInstrProfReader::readHeader() { if (!hasFormat(*DataBuffer)) return error(instrprof_error::bad_magic); - if (DataBuffer->getBufferSize() < sizeof(RawHeader)) + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) return error(instrprof_error::bad_header); - auto *Header = - reinterpret_cast(DataBuffer->getBufferStart()); - ShouldSwapBytes = Header->Magic != getRawMagic(); + auto *Header = reinterpret_cast( + DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic(); return readHeader(*Header); } @@ -202,29 +172,25 @@ RawInstrProfReader::readNextHeader(const char *CurrentPos) { return instrprof_error::eof; // If there isn't enough space for another header, this is probably just // garbage at the end of the file. - if (CurrentPos + sizeof(RawHeader) > End) + if (CurrentPos + sizeof(RawInstrProf::Header) > End) return instrprof_error::malformed; // The writer ensures each profile is padded to start at an aligned address. if (reinterpret_cast(CurrentPos) % alignOf()) return instrprof_error::malformed; // The magic should have the same byte order as in the previous header. uint64_t Magic = *reinterpret_cast(CurrentPos); - if (Magic != swap(getRawMagic())) + if (Magic != swap(RawInstrProf::getMagic())) return instrprof_error::bad_magic; // There's another profile to read, so we need to process the header. - auto *Header = reinterpret_cast(CurrentPos); + auto *Header = reinterpret_cast(CurrentPos); return readHeader(*Header); } -static uint64_t getRawVersion() { - return 1; -} - template -std::error_code -RawInstrProfReader::readHeader(const RawHeader &Header) { - if (swap(Header.Version) != getRawVersion()) +std::error_code RawInstrProfReader::readHeader( + const RawInstrProf::Header &Header) { + if (swap(Header.Version) != RawInstrProf::Version) return error(instrprof_error::unsupported_version); CountersDelta = swap(Header.CountersDelta); @@ -233,8 +199,9 @@ RawInstrProfReader::readHeader(const RawHeader &Header) { auto CountersSize = swap(Header.CountersSize); auto NamesSize = swap(Header.NamesSize); - ptrdiff_t DataOffset = sizeof(RawHeader); - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); + ptrdiff_t CountersOffset = + DataOffset + sizeof(RawInstrProf::ProfileData) * DataSize; ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; @@ -242,7 +209,8 @@ RawInstrProfReader::readHeader(const RawHeader &Header) { if (Start + ProfileSize > DataBuffer->getBufferEnd()) return error(instrprof_error::bad_header); - Data = reinterpret_cast(Start + DataOffset); + Data = reinterpret_cast *>( + Start + DataOffset); DataEnd = Data + DataSize; CountersStart = reinterpret_cast(Start + CountersOffset); NamesStart = Start + NamesOffset; @@ -421,25 +389,30 @@ std::error_code IndexedInstrProfReader::readHeader() { using namespace support; + auto *Header = reinterpret_cast(Cur); + Cur += sizeof(IndexedInstrProf::Header); + // Check the magic number. - uint64_t Magic = endian::readNext(Cur); + uint64_t Magic = endian::byte_swap(Header->Magic); if (Magic != IndexedInstrProf::Magic) return error(instrprof_error::bad_magic); // Read the version. - FormatVersion = endian::readNext(Cur); + FormatVersion = endian::byte_swap(Header->Version); if (FormatVersion > IndexedInstrProf::Version) return error(instrprof_error::unsupported_version); // Read the maximal function count. - MaxFunctionCount = endian::readNext(Cur); + MaxFunctionCount = + endian::byte_swap(Header->MaxFunctionCount); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( - endian::readNext(Cur)); + endian::byte_swap(Header->HashType)); if (HashType > IndexedInstrProf::HashT::Last) return error(instrprof_error::unsupported_hash_type); - uint64_t HashOffset = endian::readNext(Cur); + + uint64_t HashOffset = endian::byte_swap(Header->HashOffset); // The rest of the file is an on disk hash table. Index.reset(InstrProfReaderIndex::Create( diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 856194d7776..e3018a92e5e 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfWriter.h" -#include "InstrProfIndexed.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/OnDiskHashTable.h" @@ -197,13 +196,23 @@ std::pair InstrProfWriter::writeImpl(raw_ostream &OS) { endian::Writer LE(OS); // Write the header. - LE.write(IndexedInstrProf::Magic); - LE.write(IndexedInstrProf::Version); - LE.write(MaxFunctionCount); - LE.write(static_cast(IndexedInstrProf::HashType)); + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; + Header.Version = IndexedInstrProf::Version; + Header.MaxFunctionCount = MaxFunctionCount; + Header.HashType = static_cast(IndexedInstrProf::HashType); + Header.HashOffset = 0; + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); + + // Only write out all the fields execpt 'HashOffset'. We need + // to remember the offset of that field to allow back patching + // later. + for (int I = 0; I < N - 1; I++) + LE.write(reinterpret_cast(&Header)[I]); // Save a space to write the hash table start location. uint64_t HashTableStartLoc = OS.tell(); + // Reserve the space for HashOffset field. LE.write(0); // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS); @@ -218,6 +227,7 @@ void InstrProfWriter::write(raw_fd_ostream &OS) { // Go back and fill in the hash table start. using namespace support; OS.seek(TableStart.first); + // Now patch the HashOffset field previously reserved. endian::Writer(OS).write(TableStart.second); } -- 2.34.1