#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/FunctionInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/DataStream.h"
#include "llvm/Support/ManagedStatic.h"
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<BitstreamReader> StreamFile;
BitstreamCursor Stream;
+ // Next offset to start scanning for lazy parsing of function bodies.
uint64_t NextUnreadBit = 0;
+ // Last function offset found in the VST.
+ uint64_t LastFunctionBlockBit = 0;
bool SeenValueSymbolTable = false;
unsigned VSTOffset = 0;
+ // Contains an arbitrary and optional string identifying the bitcode producer
+ std::string ProducerIdentification;
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
bool StripDebugInfo = false;
+ std::vector<std::string> BundleTags;
+
public:
std::error_code error(BitcodeError E, const Twine &Message);
std::error_code error(BitcodeError E);
void setStripDebugInfo() override;
private:
+ /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
+ // ProducerIdentification data member, and do some basic enforcement on the
+ // "epoch" encoded in the bitcode.
+ std::error_code parseBitcodeVersion();
+
std::vector<StructType *> IdentifiedStructTypes;
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
/// a corresponding error code.
std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
- std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+ std::error_code parseModule(uint64_t ResumeBit,
+ bool ShouldLazyLoadMetadata = false);
std::error_code parseAttributeBlock();
std::error_code parseAttributeGroupBlock();
std::error_code parseTypeTable();
std::error_code parseTypeTableBody();
+ std::error_code parseOperandBundleTags();
ErrorOr<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT);
std::error_code parseValueSymbolTable(unsigned Offset = 0);
std::error_code parseConstants();
+ std::error_code rememberAndSkipFunctionBodies();
std::error_code rememberAndSkipFunctionBody();
/// Save the positions of the Metadata blocks and skip parsing the blocks.
std::error_code rememberAndSkipMetadata();
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
};
+
+/// Class to manage reading and parsing function summary index bitcode
+/// files/sections.
+class FunctionIndexBitcodeReader {
+ DiagnosticHandlerFunction DiagnosticHandler;
+
+ /// Eventually points to the function index built during parsing.
+ FunctionInfoIndex *TheIndex = nullptr;
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ std::unique_ptr<BitstreamReader> StreamFile;
+ BitstreamCursor Stream;
+
+ /// \brief Used to indicate whether we are doing lazy parsing of summary data.
+ ///
+ /// If false, the summary section is fully parsed into the index during
+ /// the initial parse. Otherwise, if true, the caller is expected to
+ /// invoke \a readFunctionSummary for each summary needed, and the summary
+ /// section is thus parsed lazily.
+ bool IsLazy = false;
+
+ /// Used to indicate whether caller only wants to check for the presence
+ /// of the function summary bitcode section. All blocks are skipped,
+ /// but the SeenFuncSummary boolean is set.
+ bool CheckFuncSummaryPresenceOnly = false;
+
+ /// Indicates whether we have encountered a function summary section
+ /// yet during parsing, used when checking if file contains function
+ /// summary section.
+ bool SeenFuncSummary = false;
+
+ /// \brief Map populated during function summary section parsing, and
+ /// consumed during ValueSymbolTable parsing.
+ ///
+ /// Used to correlate summary records with VST entries. For the per-module
+ /// index this maps the ValueID to the parsed function summary, and
+ /// for the combined index this maps the summary record's bitcode
+ /// offset to the function summary (since in the combined index the
+ /// VST records do not hold value IDs but rather hold the function
+ /// summary record offset).
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>> SummaryMap;
+
+ /// Map populated during module path string table parsing, from the
+ /// module ID to a string reference owned by the index's module
+ /// path string table, used to correlate with combined index function
+ /// summary records.
+ DenseMap<uint64_t, StringRef> ModuleIdMap;
+
+ public:
+ std::error_code error(BitcodeError E, const Twine &Message);
+ std::error_code error(BitcodeError E);
+ std::error_code error(const Twine &Message);
+
+ FunctionIndexBitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false,
+ bool CheckFuncSummaryPresenceOnly = false);
+ FunctionIndexBitcodeReader(LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy = false,
+ bool CheckFuncSummaryPresenceOnly = false);
+ ~FunctionIndexBitcodeReader() { freeState(); }
+
+ void freeState();
+
+ void releaseBuffer();
+
+ /// Check if the parser has encountered a function summary section.
+ bool foundFuncSummary() { return SeenFuncSummary; }
+
+ /// \brief Main interface to parsing a bitcode buffer.
+ /// \returns true if an error occurred.
+ std::error_code parseSummaryIndexInto(std::unique_ptr<DataStreamer> Streamer,
+ FunctionInfoIndex *I);
+
+ /// \brief Interface for parsing a function summary lazily.
+ std::error_code parseFunctionSummary(std::unique_ptr<DataStreamer> Streamer,
+ FunctionInfoIndex *I,
+ size_t FunctionSummaryOffset);
+
+ private:
+ std::error_code parseModule();
+ std::error_code parseValueSymbolTable();
+ std::error_code parseEntireSummary();
+ std::error_code parseModuleStringTable();
+ std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
+ std::error_code initStreamFromBuffer();
+ std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+};
} // namespace
BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
}
std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
+ if (!ProducerIdentification.empty()) {
+ Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
+ "' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
+ return ::error(DiagnosticHandler, make_error_code(E), MsgWithID);
+ }
return ::error(DiagnosticHandler, make_error_code(E), Message);
}
std::error_code BitcodeReader::error(const Twine &Message) {
+ if (!ProducerIdentification.empty()) {
+ Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
+ "' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
+ return ::error(DiagnosticHandler,
+ make_error_code(BitcodeError::CorruptedBitcode), MsgWithID);
+ }
return ::error(DiagnosticHandler,
make_error_code(BitcodeError::CorruptedBitcode), Message);
}
}
}
+std::error_code BitcodeReader::parseOperandBundleTags() {
+ if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
+ return error("Invalid record");
+
+ if (!BundleTags.empty())
+ return error("Invalid multiple blocks");
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Tags are implicitly mapped to integers by their order.
+
+ if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
+ return error("Invalid record");
+
+ // OPERAND_BUNDLE_TAG: [strchr x N]
+ BundleTags.emplace_back();
+ if (convertToString(Record, 0, BundleTags.back()))
+ return error("Invalid record");
+ Record.clear();
+ }
+}
+
/// Associate a value with its name from the given index in the provided record.
ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT) {
assert(F);
uint64_t FuncBitOffset = FuncWordOffset * 32;
DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
- // Set the NextUnreadBit to point to the last function block.
+ // Set the LastFunctionBlockBit to point to the last function block.
// Later when parsing is resumed after function materialization,
// we can simply skip that last function block.
- if (FuncBitOffset > NextUnreadBit)
- NextUnreadBit = FuncBitOffset;
+ if (FuncBitOffset > LastFunctionBlockBit)
+ LastFunctionBlockBit = FuncBitOffset;
break;
}
case bitc::VST_CODE_BBENTRY: {
return error("Invalid ID");
++BBI;
}
- BB = BBI;
+ BB = &*BBI;
} else {
// Otherwise insert a placeholder and remember it so it can be inserted
// when the function is parsed.
// Save the current stream state.
uint64_t CurBit = Stream.GetCurrentBitNo();
+ assert(
+ (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) &&
+ "Mismatch between VST and scanned function offsets");
DeferredFunctionInfo[Fn] = CurBit;
// Skip over the function block for now.
return std::error_code();
}
-std::error_code BitcodeReader::parseModule(bool Resume,
+/// Support for lazy parsing of function bodies. This is required if we
+/// either have an old bitcode file without a VST forward declaration record,
+/// or if we have an anonymous function being materialized, since anonymous
+/// functions do not have a name and are therefore not in the VST.
+std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
+ Stream.JumpToBit(NextUnreadBit);
+
+ if (Stream.AtEndOfStream()) return error("Could not find function in stream");
+
+ assert(SeenFirstFunctionBody);
+ // An old bitcode file with the symbol table at the end would have
+ // finished the parse greedily.
+ assert(SeenValueSymbolTable);
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+ switch (Entry.Kind) {
+ default:
+ return error("Expect SubBlock");
+ case BitstreamEntry::SubBlock:
+ switch (Entry.ID) {
+ default:
+ return error("Expect function block");
+ case bitc::FUNCTION_BLOCK_ID:
+ if (std::error_code EC = rememberAndSkipFunctionBody()) return EC;
+ NextUnreadBit = Stream.GetCurrentBitNo();
+ return std::error_code();
+ }
+ }
+ }
+}
+
+std::error_code BitcodeReader::parseBitcodeVersion() {
+ if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+ return error("Invalid record");
+
+ // Read all the records.
+ SmallVector<uint64_t, 64> Record;
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ default:
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default: // Default behavior: reject
+ return error("Invalid value");
+ case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x
+ // N]
+ convertToString(Record, 0, ProducerIdentification);
+ break;
+ }
+ case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
+ unsigned epoch = (unsigned)Record[0];
+ if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
+ return error(
+ Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
+ "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
+ }
+ }
+ }
+ }
+}
+
+std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata) {
- if (Resume)
- Stream.JumpToBit(NextUnreadBit);
+ if (ResumeBit)
+ Stream.JumpToBit(ResumeBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return error("Invalid record");
BitcodeReader::parseValueSymbolTable(VSTOffset))
return EC;
SeenValueSymbolTable = true;
- return std::error_code();
+ // Fall through so that we record the NextUnreadBit below.
+ // This is necessary in case we have an anonymous function that
+ // is later materialized. Since it will not have a VST entry we
+ // need to fall back to the lazy parse to find its offset.
} else {
// If we have a VST forward declaration record, but have already
// parsed the VST (just above, when the first function body was
// encountered here), then we are resuming the parse after
- // materializing functions. The NextUnreadBit points to the start
- // of the last function block recorded in the VST (set when
- // parsing the VST function entries). Skip it.
+ // materializing functions. The ResumeBit points to the
+ // start of the last function block recorded in the
+ // DeferredFunctionInfo map. Skip it.
if (Stream.SkipBlock())
return error("Invalid record");
continue;
}
// Support older bitcode files that did not have the function
- // index in the VST, nor a VST forward declaration record.
+ // index in the VST, nor a VST forward declaration record, as
+ // well as anonymous functions that do not have VST entries.
// Build the DeferredFunctionInfo vector on the fly.
if (std::error_code EC = rememberAndSkipFunctionBody())
return EC;
+
// Suspend parsing when we reach the function bodies. Subsequent
// materialization calls will resume it when necessary. If the bitcode
// file is old, the symbol table will be at the end instead and will not
if (std::error_code EC = parseUseLists())
return EC;
break;
+ case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+ if (std::error_code EC = parseOperandBundleTags())
+ return EC;
+ break;
}
continue;
case bitc::MODULE_CODE_ALIAS:
case bitc::MODULE_CODE_ALIAS_OLD: {
bool NewRecord = BitCode == bitc::MODULE_CODE_ALIAS;
- if (Record.size() < (3 + NewRecord))
+ if (Record.size() < (3 + (unsigned)NewRecord))
return error("Invalid record");
unsigned OpNum = 0;
Type *Ty = getTypeByID(Record[OpNum++]);
}
}
+/// Helper to read the header common to all bitcode files.
+static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
+ // Sniff for the signature.
+ if (Stream.Read(8) != 'B' ||
+ Stream.Read(8) != 'C' ||
+ Stream.Read(4) != 0x0 ||
+ Stream.Read(4) != 0xC ||
+ Stream.Read(4) != 0xE ||
+ Stream.Read(4) != 0xD)
+ return false;
+ return true;
+}
+
std::error_code
BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
Module *M, bool ShouldLazyLoadMetadata) {
return EC;
// Sniff for the signature.
- if (Stream.Read(8) != 'B' ||
- Stream.Read(8) != 'C' ||
- Stream.Read(4) != 0x0 ||
- Stream.Read(4) != 0xC ||
- Stream.Read(4) != 0xE ||
- Stream.Read(4) != 0xD)
- return error("Invalid bitcode signature");
+ if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature");
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
if (Entry.Kind != BitstreamEntry::SubBlock)
return error("Malformed block");
+ if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+ parseBitcodeVersion();
+ continue;
+ }
+
if (Entry.ID == bitc::MODULE_BLOCK_ID)
- return parseModule(false, ShouldLazyLoadMetadata);
+ return parseModule(0, ShouldLazyLoadMetadata);
if (Stream.SkipBlock())
return error("Invalid record");
return EC;
// Sniff for the signature.
- if (Stream.Read(8) != 'B' ||
- Stream.Read(8) != 'C' ||
- Stream.Read(4) != 0x0 ||
- Stream.Read(4) != 0xC ||
- Stream.Read(4) != 0xE ||
- Stream.Read(4) != 0xD)
- return error("Invalid bitcode signature");
+ if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature");
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
unsigned ModuleMDValueListSize = MDValueList.size();
// Add all the function arguments to the value table.
- for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- ValueList.push_back(I);
+ for (Argument &I : F->args())
+ ValueList.push_back(&I);
unsigned NextValueNo = ValueList.size();
BasicBlock *CurBB = nullptr;
return nullptr;
};
+ std::vector<OperandBundleDef> OperandBundles;
+
// Read all the records.
SmallVector<uint64_t, 64> Record;
while (1) {
CurBB->getInstList().push_back(Temp);
}
} else {
- I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+ auto CastOp = (Instruction::CastOps)Opc;
+ if (!CastInst::castIsValid(CastOp, Op, ResTy))
+ return error("Invalid cast");
+ I = CastInst::Create(CastOp, Op, ResTy);
}
InstructionList.push_back(I);
break;
}
}
- I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+ I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
+ OperandBundles.clear();
InstructionList.push_back(I);
cast<InvokeInst>(I)
->setCallingConv(static_cast<CallingConv::ID>(~(1U << 13) & CCInfo));
}
}
- I = CallInst::Create(FTy, Callee, Args);
+ I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+ OperandBundles.clear();
InstructionList.push_back(I);
cast<CallInst>(I)->setCallingConv(
static_cast<CallingConv::ID>((~(1U << 14) & CCInfo) >> 1));
InstructionList.push_back(I);
break;
}
+
+ case bitc::FUNC_CODE_OPERAND_BUNDLE: {
+ // A call or an invoke can be optionally prefixed with some variable
+ // number of operand bundle blocks. These blocks are read into
+ // OperandBundles and consumed at the next call or invoke instruction.
+
+ if (Record.size() < 1 || Record[0] >= BundleTags.size())
+ return error("Invalid record");
+
+ OperandBundles.emplace_back();
+ OperandBundles.back().Tag = BundleTags[Record[0]];
+
+ std::vector<Value *> &Inputs = OperandBundles.back().Inputs;
+
+ unsigned OpNum = 1;
+ while (OpNum != Record.size()) {
+ Value *Op;
+ if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+ return error("Invalid record");
+ Inputs.push_back(Op);
+ }
+
+ continue;
+ }
}
// Add instruction to end of current BB. If there is no current BB, reject
delete I;
return error("Invalid instruction with no BB");
}
+ if (!OperandBundles.empty()) {
+ delete I;
+ return error("Operand bundles found with no consumer");
+ }
CurBB->getInstList().push_back(I);
// If this was a terminator instruction, move to the next block.
OutOfRecordLoop:
+ if (!OperandBundles.empty())
+ return error("Operand bundles found with no consumer");
+
// Check the function list for unresolved values.
if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
if (!A->getParent()) {
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
// This is the fallback handling for the old format bitcode that
- // didn't contain the function index in the VST. Assert if we end up
- // here for the new format (which is the only time the VSTOffset would
- // be non-zero).
- assert(VSTOffset == 0);
- if (Stream.AtEndOfStream())
- return error("Could not find function in stream");
- // ParseModule will parse the next body in the stream and set its
- // position in the DeferredFunctionInfo map.
- if (std::error_code EC = parseModule(true))
- return EC;
+ // didn't contain the function index in the VST, or when we have
+ // an anonymous function which would not have a VST entry.
+ // Assert that we have one of those two cases.
+ assert(VSTOffset == 0 || !F->hasName());
+ // Parse the next body in the stream and set its position in the
+ // DeferredFunctionInfo map.
+ if (std::error_code EC = rememberAndSkipFunctionBodies()) return EC;
}
return std::error_code();
}
// Iterate over the module, deserializing any functions that are still on
// disk.
- for (Module::iterator F = TheModule->begin(), E = TheModule->end();
- F != E; ++F) {
- if (std::error_code EC = materialize(F))
+ for (Function &F : *TheModule) {
+ if (std::error_code EC = materialize(&F))
return EC;
}
- // At this point, if there are any function bodies, the current bit is
- // pointing to the END_BLOCK record after them. Now make sure the rest
- // of the bits in the module have been read.
- if (NextUnreadBit)
- parseModule(true);
+ // At this point, if there are any function bodies, parse the rest of
+ // the bits in the module past the last function block we have recorded
+ // through either lazy scanning or the VST.
+ if (LastFunctionBlockBit || NextUnreadBit)
+ parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit
+ : NextUnreadBit);
// Check that all block address forward references got resolved (as we
// promised above).
return std::error_code();
}
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E,
+ const Twine &Message) {
+ return ::error(DiagnosticHandler, make_error_code(E), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(const Twine &Message) {
+ return ::error(DiagnosticHandler,
+ make_error_code(BitcodeError::CorruptedBitcode), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E) {
+ return ::error(DiagnosticHandler, make_error_code(E));
+}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+ MemoryBuffer *Buffer, LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy,
+ bool CheckFuncSummaryPresenceOnly)
+ : DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
+ Buffer(Buffer),
+ IsLazy(IsLazy),
+ CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+ LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler,
+ bool IsLazy, bool CheckFuncSummaryPresenceOnly)
+ : DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
+ Buffer(nullptr),
+ IsLazy(IsLazy),
+ CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+void FunctionIndexBitcodeReader::freeState() { Buffer = nullptr; }
+
+void FunctionIndexBitcodeReader::releaseBuffer() { Buffer.release(); }
+
+// Specialized value symbol table parser used when reading function index
+// blocks where we don't actually create global values.
+// At the end of this routine the function index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// the function block's bitcode offset as well as the offset into the
+// function summary section.
+std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
+ if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ // Read all the records for this value table.
+ SmallString<128> ValueName;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record.
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
+ break;
+ case bitc::VST_CODE_FNENTRY: {
+ // VST_FNENTRY: [valueid, offset, namechar x N]
+ if (convertToString(Record, 2, ValueName))
+ return error("Invalid record");
+ unsigned ValueID = Record[0];
+ uint64_t FuncOffset = Record[1];
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncOffset);
+ if (foundFuncSummary() && !IsLazy) {
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+ SummaryMap.find(ValueID);
+ assert(SMI != SummaryMap.end() && "Summary info not found");
+ FuncInfo->setFunctionSummary(std::move(SMI->second));
+ }
+ TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+ ValueName.clear();
+ break;
+ }
+ case bitc::VST_CODE_COMBINED_FNENTRY: {
+ // VST_FNENTRY: [offset, namechar x N]
+ if (convertToString(Record, 1, ValueName))
+ return error("Invalid record");
+ uint64_t FuncSummaryOffset = Record[0];
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
+ if (foundFuncSummary() && !IsLazy) {
+ DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+ SummaryMap.find(FuncSummaryOffset);
+ assert(SMI != SummaryMap.end() && "Summary info not found");
+ FuncInfo->setFunctionSummary(std::move(SMI->second));
+ }
+ TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+ ValueName.clear();
+ break;
+ }
+ }
+ }
+}
+
+// Parse just the blocks needed for function index building out of the module.
+// At the end of this routine the function Index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// either the parsed function summary information (when parsing summaries
+// eagerly), or just to the function summary record's offset
+// if parsing lazily (IsLazy).
+std::error_code FunctionIndexBitcodeReader::parseModule() {
+ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+ return error("Invalid record");
+
+ // Read the function index for this module.
+ while (1) {
+ BitstreamEntry Entry = Stream.advance();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+
+ case BitstreamEntry::SubBlock:
+ if (CheckFuncSummaryPresenceOnly) {
+ if (Entry.ID == bitc::FUNCTION_SUMMARY_BLOCK_ID)
+ SeenFuncSummary = true;
+ if (Stream.SkipBlock()) return error("Invalid record");
+ // No need to parse the rest since we found the summary.
+ return std::error_code();
+ }
+ switch (Entry.ID) {
+ default: // Skip unknown content.
+ if (Stream.SkipBlock()) return error("Invalid record");
+ break;
+ case bitc::BLOCKINFO_BLOCK_ID:
+ // Need to parse these to get abbrev ids (e.g. for VST)
+ if (Stream.ReadBlockInfoBlock()) return error("Malformed block");
+ break;
+ case bitc::VALUE_SYMTAB_BLOCK_ID:
+ if (std::error_code EC = parseValueSymbolTable()) return EC;
+ break;
+ case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+ SeenFuncSummary = true;
+ if (IsLazy) {
+ // Lazy parsing of summary info, skip it.
+ if (Stream.SkipBlock()) return error("Invalid record");
+ } else if (std::error_code EC = parseEntireSummary())
+ return EC;
+ break;
+ case bitc::MODULE_STRTAB_BLOCK_ID:
+ if (std::error_code EC = parseModuleStringTable()) return EC;
+ break;
+ }
+ continue;
+
+ case BitstreamEntry::Record:
+ Stream.skipRecord(Entry.ID);
+ continue;
+ }
+ }
+}
+
+// Eagerly parse the entire function summary block (i.e. for all functions
+// in the index). This populates the FunctionSummary objects in
+// the index.
+std::error_code FunctionIndexBitcodeReader::parseEntireSummary() {
+ if (Stream.EnterSubBlock(bitc::FUNCTION_SUMMARY_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ // Read a record. The record format depends on whether this
+ // is a per-module index or a combined index file. In the per-module
+ // case the records contain the associated value's ID for correlation
+ // with VST entries. In the combined index the correlation is done
+ // via the bitcode offset of the summary records (which were saved
+ // in the combined index VST entries). The records also contain
+ // information used for ThinLTO renaming and importing.
+ Record.clear();
+ uint64_t CurRecordBit = Stream.GetCurrentBitNo();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ // FS_PERMODULE_ENTRY: [valueid, islocal, instcount]
+ case bitc::FS_CODE_PERMODULE_ENTRY: {
+ unsigned ValueID = Record[0];
+ bool IsLocal = Record[1];
+ unsigned InstCount = Record[2];
+ std::unique_ptr<FunctionSummary> FS =
+ llvm::make_unique<FunctionSummary>(InstCount);
+ FS->setLocalFunction(IsLocal);
+ // The module path string ref set in the summary must be owned by the
+ // index's module string table. Since we don't have a module path
+ // string table section in the per-module index, we create a single
+ // module path string table entry with an empty (0) ID to take
+ // ownership.
+ FS->setModulePath(
+ TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+ SummaryMap[ValueID] = std::move(FS);
+ }
+ // FS_COMBINED_ENTRY: [modid, instcount]
+ case bitc::FS_CODE_COMBINED_ENTRY: {
+ uint64_t ModuleId = Record[0];
+ unsigned InstCount = Record[1];
+ std::unique_ptr<FunctionSummary> FS =
+ llvm::make_unique<FunctionSummary>(InstCount);
+ FS->setModulePath(ModuleIdMap[ModuleId]);
+ SummaryMap[CurRecordBit] = std::move(FS);
+ }
+ }
+ }
+ llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the module string table block into the Index.
+// This populates the ModulePathStringTable map in the index.
+std::error_code FunctionIndexBitcodeReader::parseModuleStringTable() {
+ if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
+ return error("Invalid record");
+
+ SmallVector<uint64_t, 64> Record;
+
+ SmallString<128> ModulePath;
+ while (1) {
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ case BitstreamEntry::SubBlock: // Handled for us already.
+ case BitstreamEntry::Error:
+ return error("Malformed block");
+ case BitstreamEntry::EndBlock:
+ return std::error_code();
+ case BitstreamEntry::Record:
+ // The interesting case.
+ break;
+ }
+
+ Record.clear();
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ default: // Default behavior: ignore.
+ break;
+ case bitc::MST_CODE_ENTRY: {
+ // MST_ENTRY: [modid, namechar x N]
+ if (convertToString(Record, 1, ModulePath))
+ return error("Invalid record");
+ uint64_t ModuleId = Record[0];
+ StringRef ModulePathInMap =
+ TheIndex->addModulePath(ModulePath, ModuleId);
+ ModuleIdMap[ModuleId] = ModulePathInMap;
+ ModulePath.clear();
+ break;
+ }
+ }
+ }
+ llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the function info index from the bitcode streamer into the given index.
+std::error_code FunctionIndexBitcodeReader::parseSummaryIndexInto(
+ std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I) {
+ TheIndex = I;
+
+ if (std::error_code EC = initStream(std::move(Streamer))) return EC;
+
+ // Sniff for the signature.
+ if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature");
+
+ // We expect a number of well-defined blocks, though we don't necessarily
+ // need to understand them all.
+ while (1) {
+ if (Stream.AtEndOfStream()) {
+ // We didn't really read a proper Module block.
+ return error("Malformed block");
+ }
+
+ BitstreamEntry Entry =
+ Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+ if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block");
+
+ // If we see a MODULE_BLOCK, parse it to find the blocks needed for
+ // building the function summary index.
+ if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule();
+
+ if (Stream.SkipBlock()) return error("Invalid record");
+ }
+}
+
+// Parse the function information at the given offset in the buffer into
+// the index. Used to support lazy parsing of function summaries from the
+// combined index during importing.
+// TODO: This function is not yet complete as it won't have a consumer
+// until ThinLTO function importing is added.
+std::error_code FunctionIndexBitcodeReader::parseFunctionSummary(
+ std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I,
+ size_t FunctionSummaryOffset) {
+ TheIndex = I;
+
+ if (std::error_code EC = initStream(std::move(Streamer))) return EC;
+
+ // Sniff for the signature.
+ if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature");
+
+ Stream.JumpToBit(FunctionSummaryOffset);
+
+ BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+ switch (Entry.Kind) {
+ default:
+ return error("Malformed block");
+ case BitstreamEntry::Record:
+ // The expected case.
+ break;
+ }
+
+ // TODO: Read a record. This interface will be completed when ThinLTO
+ // importing is added so that it can be tested.
+ SmallVector<uint64_t, 64> Record;
+ switch (Stream.readRecord(Entry.ID, Record)) {
+ case bitc::FS_CODE_COMBINED_ENTRY:
+ default:
+ return error("Invalid record");
+ }
+
+ return std::error_code();
+}
+
+std::error_code FunctionIndexBitcodeReader::initStream(
+ std::unique_ptr<DataStreamer> Streamer) {
+ if (Streamer) return initLazyStream(std::move(Streamer));
+ return initStreamFromBuffer();
+}
+
+std::error_code FunctionIndexBitcodeReader::initStreamFromBuffer() {
+ const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart();
+ const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize();
+
+ if (Buffer->getBufferSize() & 3) return error("Invalid bitcode signature");
+
+ // If we have a wrapper header, parse it and ignore the non-bc file contents.
+ // The magic number is 0x0B17C0DE stored in little endian.
+ if (isBitcodeWrapper(BufPtr, BufEnd))
+ if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+ return error("Invalid bitcode wrapper header");
+
+ StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+ Stream.init(&*StreamFile);
+
+ return std::error_code();
+}
+
+std::error_code FunctionIndexBitcodeReader::initLazyStream(
+ std::unique_ptr<DataStreamer> Streamer) {
+ // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+ // see it.
+ auto OwnedBytes =
+ llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
+ StreamingMemoryObject &Bytes = *OwnedBytes;
+ StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
+ Stream.init(&*StreamFile);
+
+ unsigned char buf[16];
+ if (Bytes.readBytes(buf, 16, 0) != 16)
+ return error("Invalid bitcode signature");
+
+ if (!isBitcode(buf, buf + 16)) return error("Invalid bitcode signature");
+
+ if (isBitcodeWrapper(buf, buf + 4)) {
+ const unsigned char *bitcodeStart = buf;
+ const unsigned char *bitcodeEnd = buf + 16;
+ SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+ Bytes.dropLeadingBytes(bitcodeStart - buf);
+ Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
+ }
+ return std::error_code();
+}
+
namespace {
class BitcodeErrorCategoryType : public std::error_category {
const char *name() const LLVM_NOEXCEPT override {
return "";
return Triple.get();
}
+
+// Parse the specified bitcode buffer, returning the function info index.
+// If IsLazy is false, parse the entire function summary into
+// the index. Otherwise skip the function summary section, and only create
+// an index object with a map from function name to function summary offset.
+// The index is used to perform lazy function summary reading later.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>> llvm::getFunctionInfoIndex(
+ MemoryBufferRef Buffer, LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler, IsLazy);
+
+ std::unique_ptr<FunctionInfoIndex> Index =
+ llvm::make_unique<FunctionInfoIndex>();
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return EC;
+ };
+
+ if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get()))
+ return cleanupOnError(EC);
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return std::move(Index);
+}
+
+// Check if the given bitcode buffer contains a function summary block.
+bool llvm::hasFunctionSummary(MemoryBufferRef Buffer, LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler, false,
+ true);
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return false;
+ };
+
+ if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr))
+ return cleanupOnError(EC);
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return R.foundFuncSummary();
+}
+
+// This method supports lazy reading of function summary data from the combined
+// index during ThinLTO function importing. When reading the combined index
+// file, getFunctionInfoIndex is first invoked with IsLazy=true.
+// Then this method is called for each function considered for importing,
+// to parse the summary information for the given function name into
+// the index.
+std::error_code llvm::readFunctionSummary(
+ MemoryBufferRef Buffer, LLVMContext &Context,
+ DiagnosticHandlerFunction DiagnosticHandler, StringRef FunctionName,
+ std::unique_ptr<FunctionInfoIndex> Index) {
+ std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+ FunctionIndexBitcodeReader R(Buf.get(), Context, DiagnosticHandler);
+
+ auto cleanupOnError = [&](std::error_code EC) {
+ R.releaseBuffer(); // Never take ownership on error.
+ return EC;
+ };
+
+ // Lookup the given function name in the FunctionMap, which may
+ // contain a list of function infos in the case of a COMDAT. Walk through
+ // and parse each function summary info at the function summary offset
+ // recorded when parsing the value symbol table.
+ for (const auto &FI : Index->getFunctionInfoList(FunctionName)) {
+ size_t FunctionSummaryOffset = FI->bitcodeIndex();
+ if (std::error_code EC =
+ R.parseFunctionSummary(nullptr, Index.get(), FunctionSummaryOffset))
+ return cleanupOnError(EC);
+ }
+
+ Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+ return std::error_code();
+}