// Each section has the following format
//
// function1:total_samples:total_head_samples
-// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-// ...
-// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+// ...
+// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+// offsetA[.discriminator]: fnA:num_of_total_samples
+// offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
+// ...
//
-// The file may contain blank lines between sections and within a
-// section. However, the spacing within a single line is fixed. Additional
-// spaces will result in an error while reading the file.
+// This is a nested tree in which the identation represent the nest level
+// of the inline stack. There is no blank line in the file. And the spacing
+// within a single line is fixed. Additional spaces will result in an error
+// while reading the file.
+//
+// Inline stack is a stack of source locations in which the top of the stack
+// represents the leaf function, and the bottom of the stack represents the
+// actual symbol in which the instruction belongs.
//
// Function names must be mangled in order for the profile loader to
// match them in the current translation unit. The two numbers in the
// in the prologue of the function (second number). This head sample
// count provides an indicator of how frequently the function is invoked.
//
+// There are two types of lines in the function body.
+//
+// * Sampled line represents the profile information of a source location.
+// * Callsite line represents the profile inofrmation of a callsite.
+//
// Each sampled line may contain several items. Some are optional (marked
// below):
//
// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
// with ``baz()`` being the relatively more frequently called target.
//
+// Each callsite line may contain several items. Some are optional.
+//
+// a. Source line offset. This number represents the line number of the
+// callsite that is inlined in the profiled binary.
+//
+// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
+//
+// c. Number of samples. This is an integer quantity representing the
+// total number of samples collected for the inlined instance at this
+// callsite
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm::sampleprof;
using namespace llvm;
dumpFunctionProfile(I.getKey(), OS);
}
+/// \brief Parse \p Input as function head.
+///
+/// Parse one line of \p Input, and update function name in \p FName,
+/// function's total sample count in \p NumSamples, function's entry
+/// count in \p NumHeadSamples.
+///
+/// \returns true if parsing is successful.
+static bool ParseHead(const StringRef &Input, StringRef &FName,
+ unsigned &NumSamples, unsigned &NumHeadSamples) {
+ if (Input[0] == ' ')
+ return false;
+ size_t n2 = Input.rfind(':');
+ size_t n1 = Input.rfind(':', n2 - 1);
+ FName = Input.substr(0, n1);
+ if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
+ return false;
+ if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
+ return false;
+ return true;
+}
+
+/// \brief Parse \p Input as line sample.
+///
+/// \param Input input line.
+/// \param IsCallsite true if the line represents an inlined callsite.
+/// \param Depth the depth of the inline stack.
+/// \param NumSamples total samples of the line/inlined callsite.
+/// \param LineOffset line offset to the start of the function.
+/// \param Discriminator discriminator of the line.
+/// \param TargetCountMap map from indirect call target to count.
+///
+/// returns true if parsing is successful.
+static bool ParseLine(const StringRef &Input, bool &IsCallsite, unsigned &Depth,
+ unsigned &NumSamples, unsigned &LineOffset,
+ unsigned &Discriminator, StringRef &CalleeName,
+ DenseMap<StringRef, unsigned> &TargetCountMap) {
+ for (Depth = 0; Input[Depth] == ' '; Depth++)
+ ;
+ if (Depth == 0)
+ return false;
+
+ size_t n1 = Input.find(':');
+ StringRef Loc = Input.substr(Depth, n1 - Depth);
+ size_t n2 = Loc.find('.');
+ if (n2 == StringRef::npos) {
+ if (Loc.getAsInteger(10, LineOffset))
+ return false;
+ Discriminator = 0;
+ } else {
+ if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
+ return false;
+ if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
+ return false;
+ }
+
+ StringRef Rest = Input.substr(n1 + 2);
+ if (Rest[0] >= '0' && Rest[0] <= '9') {
+ IsCallsite = false;
+ size_t n3 = Rest.find(' ');
+ if (n3 == StringRef::npos) {
+ if (Rest.getAsInteger(10, NumSamples))
+ return false;
+ } else {
+ if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
+ return false;
+ }
+ while (n3 != StringRef::npos) {
+ n3 += Rest.substr(n3).find_first_not_of(' ');
+ Rest = Rest.substr(n3);
+ n3 = Rest.find(' ');
+ StringRef pair = Rest;
+ if (n3 != StringRef::npos) {
+ pair = Rest.substr(0, n3);
+ }
+ int n4 = pair.find(':');
+ unsigned count;
+ if (pair.substr(n4 + 1).getAsInteger(10, count))
+ return false;
+ TargetCountMap[pair.substr(0, n4)] = count;
+ }
+ } else {
+ IsCallsite = true;
+ int n3 = Rest.find_last_of(':');
+ CalleeName = Rest.substr(0, n3);
+ if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
+ return false;
+ }
+ return true;
+}
+
/// \brief Load samples from a text file.
///
/// See the documentation at the top of the file for an explanation of
std::error_code SampleProfileReaderText::read() {
line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
- // Read the profile of each function. Since each function may be
- // mentioned more than once, and we are collecting flat profiles,
- // accumulate samples as we parse them.
- Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
- Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
- Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)");
- while (!LineIt.is_at_eof()) {
+ SmallVector<FunctionSamples *, 10> InlineStack;
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
+ continue;
// Read the header of each function.
//
// Note that for function identifiers we are actually expecting
//
// The only requirement we place on the identifier, then, is that it
// should not begin with a number.
- SmallVector<StringRef, 4> Matches;
- if (!HeadRE.match(*LineIt, &Matches)) {
- reportError(LineIt.line_number(),
- "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
- return sampleprof_error::malformed;
- }
- assert(Matches.size() == 4);
- StringRef FName = Matches[1];
- unsigned NumSamples, NumHeadSamples;
- Matches[2].getAsInteger(10, NumSamples);
- Matches[3].getAsInteger(10, NumHeadSamples);
- Profiles[FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FName];
- FProfile.addTotalSamples(NumSamples);
- FProfile.addHeadSamples(NumHeadSamples);
- ++LineIt;
-
- // Now read the body. The body of the function ends when we reach
- // EOF or when we see the start of the next function.
- while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
- if (!LineSampleRE.match(*LineIt, &Matches)) {
+ if ((*LineIt)[0] != ' ') {
+ unsigned NumSamples, NumHeadSamples;
+ StringRef FName;
+ if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
+ reportError(LineIt.line_number(),
+ "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ Profiles[FName] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FName];
+ FProfile.addTotalSamples(NumSamples);
+ FProfile.addHeadSamples(NumHeadSamples);
+ InlineStack.clear();
+ InlineStack.push_back(&FProfile);
+ } else {
+ unsigned NumSamples;
+ StringRef FName;
+ DenseMap<StringRef, unsigned> TargetCountMap;
+ bool IsCallsite;
+ unsigned Depth, LineOffset, Discriminator;
+ if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
+ Discriminator, FName, TargetCountMap)) {
reportError(LineIt.line_number(),
"Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
*LineIt);
return sampleprof_error::malformed;
}
- assert(Matches.size() == 5);
- unsigned LineOffset, NumSamples, Discriminator = 0;
- Matches[1].getAsInteger(10, LineOffset);
- if (Matches[2] != "")
- Matches[2].getAsInteger(10, Discriminator);
- Matches[3].getAsInteger(10, NumSamples);
-
- // If there are function calls in this line, generate a call sample
- // entry for each call.
- std::string CallsLine(Matches[4]);
- while (CallsLine != "") {
- SmallVector<StringRef, 3> CallSample;
- if (!CallSampleRE.match(CallsLine, &CallSample)) {
- reportError(LineIt.line_number(),
- "Expected 'mangled_name:NUM', found " + CallsLine);
- return sampleprof_error::malformed;
+ if (IsCallsite) {
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
+ }
+ FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
+ CallsiteLocation(LineOffset, Discriminator, FName));
+ FSamples.addTotalSamples(NumSamples);
+ InlineStack.push_back(&FSamples);
+ } else {
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
+ }
+ FunctionSamples &FProfile = *InlineStack.back();
+ for (const auto &name_count : TargetCountMap) {
+ FProfile.addCalledTargetSamples(LineOffset, Discriminator,
+ name_count.first, name_count.second);
}
- StringRef CalledFunction = CallSample[1];
- unsigned CalledFunctionSamples;
- CallSample[2].getAsInteger(10, CalledFunctionSamples);
- FProfile.addCalledTargetSamples(LineOffset, Discriminator,
- CalledFunction, CalledFunctionSamples);
- CallsLine = CallSampleRE.sub("", CallsLine);
+ FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
}
-
- FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
- ++LineIt;
}
}
return sampleprof_error::success;
}
-
std::error_code
SampleProfileReaderGCC::readOneFunctionProfile(const SourceStack &Stack,
bool Update) {
return sampleprof_error::not_implemented;
}
-
/// \brief Read a GCC AutoFDO profile.
///
/// This format is generated by the Linux Perf conversion tool at
if (std::error_code EC = readFunctionProfiles())
return EC;
- // FIXME(dnovillo) - Module groups and working set support are not
- // yet implemented.
+// FIXME(dnovillo) - Module groups and working set support are not
+// yet implemented.
#if 0
// Read the module group file.
if (std::error_code EC = readModuleGroup())