Don't use a red zone for code coverage if the user specified `-mno-red-zone'.
[oota-llvm.git] / lib / Transforms / Instrumentation / GCOVProfiling.cpp
index ec05872179eff2eb2ca6abca16318b2bec5c66b9..5e064cd70d6e791fd2e85943d8d1463737196362 100644 (file)
 
 #define DEBUG_TYPE "insert-gcov-profiling"
 
-#include "ProfilingUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Analysis/DebugInfo.h"
+#include "ProfilingUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/PathV2.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <string>
 #include <utility>
 using namespace llvm;
 
 namespace {
   class GCOVProfiler : public ModulePass {
-    bool runOnModule(Module &M);
   public:
     static char ID;
-    GCOVProfiler() : ModulePass(ID) {
+    GCOVProfiler()
+        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+          UseExtraChecksum(false), NoRedZone(false) {
+      initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
+    }
+    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+                 bool useExtraChecksum = false, bool NoRedZone = false)
+        : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
+          Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
+      assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
     virtual const char *getPassName() const {
       return "GCOV Profiler";
     }
-
   private:
+    bool runOnModule(Module &M);
+
     // Create the GCNO files for the Module based on DebugInfo.
-    void EmitGCNO(DebugInfoFinder &DIF);
+    void emitGCNO();
+
+    // Modify the program to track transitions along edges and call into the
+    // profiling runtime to emit .gcda files when run.
+    bool emitProfileArcs();
 
     // Get pointers to the functions in the runtime library.
     Constant *getStartFileFunc();
+    Constant *getIncrementIndirectCounterFunc();
     Constant *getEmitFunctionFunc();
     Constant *getEmitArcsFunc();
     Constant *getEndFileFunc();
 
+    // Create or retrieve an i32 state value that is used to represent the
+    // pred block number for certain non-trivial edges.
+    GlobalVariable *getEdgeStateValue();
+
+    // Produce a table of pointers to counters, by predecessor and successor
+    // block number.
+    GlobalVariable *buildEdgeLookupTable(Function *F,
+                                         GlobalVariable *Counter,
+                                         const UniqueVector<BasicBlock *> &Preds,
+                                         const UniqueVector<BasicBlock *> &Succs);
+
     // Add the function to write out all our counters to the global destructor
     // list.
-    void InsertCounterWriteout(DebugInfoFinder &,
-                               SmallVector<std::pair<GlobalVariable *,
-                                                     uint32_t>, 8> &);
+    void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+    void insertIndirectCounterIncrement();
+    void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+
+    std::string mangleName(DICompileUnit CU, const char *NewStem);
 
-    Module *Mod;
+    bool EmitNotes;
+    bool EmitData;
+    bool Use402Format;
+    bool UseExtraChecksum;
+    bool NoRedZone;
+
+    Module *M;
     LLVMContext *Ctx;
   };
 }
@@ -75,61 +109,58 @@ char GCOVProfiler::ID = 0;
 INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
                 "Insert instrumentation for GCOV profiling", false, false)
 
-ModulePass *llvm::createGCOVProfilerPass() { return new GCOVProfiler(); }
-
-static DISubprogram FindSubprogram(DIScope scope) {
-  while (!scope.isSubprogram()) {
-    assert(scope.isLexicalBlock() &&
-           "Debug location not lexical block or subprogram");
-    scope = DILexicalBlock(scope).getContext();
-  }
-  return DISubprogram(scope);
+ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
+                                         bool Use402Format,
+                                         bool UseExtraChecksum,
+                                         bool NoRedZone) {
+  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum,
+                          NoRedZone);
 }
 
 namespace {
   class GCOVRecord {
    protected:
-    static const char *lines_tag;
-    static const char *function_tag;
-    static const char *block_tag;
-    static const char *edge_tag;
+    static const char *LinesTag;
+    static const char *FunctionTag;
+    static const char *BlockTag;
+    static const char *EdgeTag;
 
     GCOVRecord() {}
 
-    void WriteBytes(const char *b, int size) {
-      os->write(b, size);
+    void writeBytes(const char *Bytes, int Size) {
+      os->write(Bytes, Size);
     }
 
-    void Write(uint32_t i) {
-      WriteBytes(reinterpret_cast<char*>(&i), 4);
+    void write(uint32_t i) {
+      writeBytes(reinterpret_cast<char*>(&i), 4);
     }
 
     // Returns the length measured in 4-byte blocks that will be used to
     // represent this string in a GCOV file
-    unsigned LengthOfGCOVString(StringRef s) {
+    unsigned lengthOfGCOVString(StringRef s) {
       // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs
-      // padding out to the next 4-byte word. The length is measured in 4-byte words
-      // including padding, not bytes of actual string.
-      return (s.size() + 5) / 4;
+      // padding out to the next 4-byte word. The length is measured in 4-byte
+      // words including padding, not bytes of actual string.
+      return (s.size() / 4) + 1;
     }
 
-    void WriteGCOVString(StringRef s) {
-      uint32_t len = LengthOfGCOVString(s);
-      Write(len);
-      WriteBytes(s.data(), s.size());
+    void writeGCOVString(StringRef s) {
+      uint32_t Len = lengthOfGCOVString(s);
+      write(Len);
+      writeBytes(s.data(), s.size());
 
       // Write 1 to 4 bytes of NUL padding.
-      assert((unsigned)(5 - ((s.size() + 1) % 4)) > 0);
-      assert((unsigned)(5 - ((s.size() + 1) % 4)) <= 4);
-      WriteBytes("\0\0\0\0", 5 - ((s.size() + 1) % 4));
+      assert((unsigned)(4 - (s.size() % 4)) > 0);
+      assert((unsigned)(4 - (s.size() % 4)) <= 4);
+      writeBytes("\0\0\0\0", 4 - (s.size() % 4));
     }
 
     raw_ostream *os;
   };
-  const char *GCOVRecord::lines_tag = "\0\0\x45\x01";
-  const char *GCOVRecord::function_tag = "\0\0\0\1";
-  const char *GCOVRecord::block_tag = "\0\0\x41\x01";
-  const char *GCOVRecord::edge_tag = "\0\0\x43\x01";
+  const char *GCOVRecord::LinesTag = "\0\0\x45\x01";
+  const char *GCOVRecord::FunctionTag = "\0\0\0\1";
+  const char *GCOVRecord::BlockTag = "\0\0\x41\x01";
+  const char *GCOVRecord::EdgeTag = "\0\0\x43\x01";
 
   class GCOVFunction;
   class GCOVBlock;
@@ -139,24 +170,30 @@ namespace {
   // to the block.
   class GCOVLines : public GCOVRecord {
    public:
-    void AddLine(uint32_t line) {
-      lines.push_back(line);
+    void addLine(uint32_t Line) {
+      Lines.push_back(Line);
     }
 
-    uint32_t Length() {
-      return LengthOfGCOVString(filename) + 2 + lines.size();
+    uint32_t length() {
+      // Here 2 = 1 for string length + 1 for '0' id#.
+      return lengthOfGCOVString(Filename) + 2 + Lines.size();
     }
 
-   private:
-    friend class GCOVBlock;
+    void writeOut() {
+      write(0);
+      writeGCOVString(Filename);
+      for (int i = 0, e = Lines.size(); i != e; ++i)
+        write(Lines[i]);
+    }
 
-    GCOVLines(std::string filename, raw_ostream *os)
-        : filename(filename) {
+    GCOVLines(StringRef F, raw_ostream *os) 
+      : Filename(F) {
       this->os = os;
     }
 
-    std::string filename;
-    SmallVector<uint32_t, 32> lines;
+   private:
+    StringRef Filename;
+    SmallVector<uint32_t, 32> Lines;
   };
 
   // Represent a basic block in GCOV. Each block has a unique number in the
@@ -164,56 +201,50 @@ namespace {
   // other blocks.
   class GCOVBlock : public GCOVRecord {
    public:
-    GCOVLines &GetFile(std::string filename) {
-      GCOVLines *&lines = lines_by_file[filename];
-      if (!lines) {
-        lines = new GCOVLines(filename, os);
+    GCOVLines &getFile(StringRef Filename) {
+      GCOVLines *&Lines = LinesByFile[Filename];
+      if (!Lines) {
+        Lines = new GCOVLines(Filename, os);
       }
-      return *lines;
+      return *Lines;
     }
 
-    void AddEdge(GCOVBlock &successor) {
-      out_edges.push_back(&successor);
+    void addEdge(GCOVBlock &Successor) {
+      OutEdges.push_back(&Successor);
     }
 
-    void WriteOut() {
-      uint32_t len = 3;
-      for (StringMap<GCOVLines *>::iterator I = lines_by_file.begin(),
-               E = lines_by_file.end(); I != E; ++I) {
-        len += I->second->Length();
+    void writeOut() {
+      uint32_t Len = 3;
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) {
+        Len += I->second->length();
       }
 
-      WriteBytes(lines_tag, 4);
-      Write(len);
-      Write(number);
-      for (StringMap<GCOVLines *>::iterator I = lines_by_file.begin(),
-               E = lines_by_file.end(); I != E; ++I) {
-        Write(0);
-        WriteGCOVString(I->second->filename);
-        for (int i = 0, e = I->second->lines.size(); i != e; ++i) {
-          Write(I->second->lines[i]);
-        }
-      }
-      Write(0);
-      Write(0);
+      writeBytes(LinesTag, 4);
+      write(Len);
+      write(Number);
+      for (StringMap<GCOVLines *>::iterator I = LinesByFile.begin(),
+               E = LinesByFile.end(); I != E; ++I) 
+        I->second->writeOut();
+      write(0);
+      write(0);
     }
 
     ~GCOVBlock() {
-      DeleteContainerSeconds(lines_by_file);
+      DeleteContainerSeconds(LinesByFile);
     }
 
    private:
     friend class GCOVFunction;
 
-    GCOVBlock(uint32_t number, raw_ostream *os)
-        : number(number) {
+    GCOVBlock(uint32_t Number, raw_ostream *os)
+        : Number(Number) {
       this->os = os;
     }
 
-    uint32_t number;
-    BasicBlock *block;
-    StringMap<GCOVLines *> lines_by_file;
-    SmallVector<GCOVBlock *, 4> out_edges;
+    uint32_t Number;
+    StringMap<GCOVLines *> LinesByFile;
+    SmallVector<GCOVBlock *, 4> OutEdges;
   };
 
   // A function has a unique identifier, a checksum (we leave as zero) and a
@@ -221,288 +252,550 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-    GCOVFunction(DISubprogram SP, raw_ostream *os) {
+    GCOVFunction(DISubprogram SP, raw_ostream *os,
+                 bool Use402Format, bool UseExtraChecksum) {
       this->os = os;
 
       Function *F = SP.getFunction();
+      DEBUG(dbgs() << "Function: " << F->getName() << "\n");
       uint32_t i = 0;
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        blocks[BB] = new GCOVBlock(i++, os);
+        Blocks[BB] = new GCOVBlock(i++, os);
       }
-
-      WriteBytes(function_tag, 4);
-      uint32_t block_len = 1 + 1 + 1 + LengthOfGCOVString(SP.getName()) +
-          1 + LengthOfGCOVString(SP.getFilename()) + 1;
-      Write(block_len);
-      uint32_t ident = reinterpret_cast<intptr_t>((MDNode*)SP);
-      Write(ident);
-      Write(0); // checksum
-      WriteGCOVString(SP.getName());
-      WriteGCOVString(SP.getFilename());
-      Write(SP.getLineNumber());
+      ReturnBlock = new GCOVBlock(i++, os);
+
+      writeBytes(FunctionTag, 4);
+      uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
+          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+      if (UseExtraChecksum)
+        ++BlockLen;
+      write(BlockLen);
+      uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
+      write(Ident);
+      write(0);  // lineno checksum
+      if (UseExtraChecksum)
+        write(0);  // cfg checksum
+      writeGCOVString(SP.getName());
+      writeGCOVString(SP.getFilename());
+      write(SP.getLineNumber());
     }
 
     ~GCOVFunction() {
-      DeleteContainerSeconds(blocks);
+      DeleteContainerSeconds(Blocks);
+      delete ReturnBlock;
     }
 
-    GCOVBlock &GetBlock(BasicBlock *BB) {
-      return *blocks[BB];
+    GCOVBlock &getBlock(BasicBlock *BB) {
+      return *Blocks[BB];
     }
 
-    void WriteOut() {
+    GCOVBlock &getReturnBlock() {
+      return *ReturnBlock;
+    }
+
+    void writeOut() {
       // Emit count of blocks.
-      WriteBytes(block_tag, 4);
-      Write(blocks.size());
-      for (int i = 0, e = blocks.size(); i != e; ++i) {
-        Write(0);  // No flags on our blocks.
+      writeBytes(BlockTag, 4);
+      write(Blocks.size() + 1);
+      for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
+        write(0);  // No flags on our blocks.
       }
+      DEBUG(dbgs() << Blocks.size() << " blocks.\n");
 
       // Emit edges between blocks.
-      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = blocks.begin(),
-               E = blocks.end(); I != E; ++I) {
-        GCOVBlock &block = *I->second;
-        if (block.out_edges.empty()) continue;
-
-        WriteBytes(edge_tag, 4);
-        Write(block.out_edges.size() * 2 + 1);
-        Write(block.number);
-        for (int i = 0, e = block.out_edges.size(); i != e; ++i) {
-          Write(block.out_edges[i]->number);
-          Write(0);  // no flags
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        GCOVBlock &Block = *I->second;
+        if (Block.OutEdges.empty()) continue;
+
+        writeBytes(EdgeTag, 4);
+        write(Block.OutEdges.size() * 2 + 1);
+        write(Block.Number);
+        for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+          DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+                       << "\n");
+          write(Block.OutEdges[i]->Number);
+          write(0);  // no flags
         }
       }
 
       // Emit lines for each block.
-      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = blocks.begin(),
-               E = blocks.end(); I != E; ++I) {
-        I->second->WriteOut();
+      for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
+               E = Blocks.end(); I != E; ++I) {
+        I->second->writeOut();
       }
     }
 
    private:
-    DenseMap<BasicBlock *, GCOVBlock *> blocks;
+    DenseMap<BasicBlock *, GCOVBlock *> Blocks;
+    GCOVBlock *ReturnBlock;
   };
 }
 
-void GCOVProfiler::EmitGCNO(DebugInfoFinder &DIF) {
-  DenseMap<const MDNode *, raw_fd_ostream *> gcno_files;
-  for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(),
-           E = DIF.compile_unit_end(); I != E; ++I) {
-    // Each compile unit gets its own .gcno file. This means that whether we run
-    // this pass over the original .o's as they're produced, or run it after
-    // LTO, we'll generate the same .gcno files.
-
-    DICompileUnit CU(*I);
-    raw_fd_ostream *&Out = gcno_files[CU];
-    std::string ErrorInfo;
-    Out = new raw_fd_ostream(
-        (sys::path::stem(CU.getFilename()) + ".gcno").str().c_str(),
-        ErrorInfo, raw_fd_ostream::F_Binary);
-    Out->write("oncg*404MVLL", 12);
-  }
-
-  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
-           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
-    DISubprogram SP(*SPI);
-    raw_fd_ostream *&os = gcno_files[SP.getCompileUnit()];
-
-    GCOVFunction function(SP, os);
-    Function *F = SP.getFunction();
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      GCOVBlock &block = function.GetBlock(BB);
-      TerminatorInst *TI = BB->getTerminator();
-      if (int successors = TI->getNumSuccessors()) {
-        for (int i = 0; i != successors; ++i) {
-          block.AddEdge(function.GetBlock(TI->getSuccessor(i)));
-        }
-      }
-
-      uint32_t line = 0;
-      for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
-        const DebugLoc &loc = I->getDebugLoc();
-        if (loc.isUnknown()) continue;
-        if (line == loc.getLine()) continue;
-        line = loc.getLine();
-        if (SP != FindSubprogram(DIScope(loc.getScope(*Ctx)))) continue;
-
-        GCOVLines &lines = block.GetFile(SP.getFilename());
-        lines.AddLine(loc.getLine());
+std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
+  if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
+    for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
+      MDNode *N = GCov->getOperand(i);
+      if (N->getNumOperands() != 2) continue;
+      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+      MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1));
+      if (!GCovFile || !CompileUnit) continue;
+      if (CompileUnit == CU) {
+        SmallString<128> Filename = GCovFile->getString();
+        sys::path::replace_extension(Filename, NewStem);
+        return Filename.str();
       }
     }
-    function.WriteOut();
   }
 
-  for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
-           I = gcno_files.begin(), E = gcno_files.end(); I != E; ++I) {
-    raw_fd_ostream *&Out = I->second;
-    Out->write("\0\0\0\0\0\0\0\0", 4); // EOF
-    Out->close();
-    delete Out;
-  }
+  SmallString<128> Filename = CU.getFilename();
+  sys::path::replace_extension(Filename, NewStem);
+  return sys::path::filename(Filename.str());
 }
 
 bool GCOVProfiler::runOnModule(Module &M) {
-  Mod = &M;
+  this->M = &M;
   Ctx = &M.getContext();
 
-  DebugInfoFinder DIF;
-  DIF.processModule(*Mod);
+  if (EmitNotes) emitGCNO();
+  if (EmitData) return emitProfileArcs();
+  return false;
+}
 
-  EmitGCNO(DIF);
+void GCOVProfiler::emitGCNO() {
+  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CU_Nodes) return;
 
-  SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> counters_by_ident;
-  for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(),
-           SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) {
-    DISubprogram SP(*SPI);
-    Function *F = SP.getFunction();
+  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
 
-    // TODO: GCOV format requires a distinct unified exit block.
-    unsigned edges = 0;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
-      edges += TI->getNumSuccessors();
-    }
+    DICompileUnit CU(CU_Nodes->getOperand(i));
+    std::string ErrorInfo;
+    raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+    if (!Use402Format)
+      out.write("oncg*404MVLL", 12);
+    else
+      out.write("oncg*204MVLL", 12);
+
+    DIArray SPs = CU.getSubprograms();
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+      DISubprogram SP(SPs.getElement(i));
+      if (!SP.Verify()) continue;
 
-    const ArrayType *counter_type =
-        ArrayType::get(Type::getInt64Ty(*Ctx), edges);
-    GlobalVariable *counter =
-        new GlobalVariable(*Mod, counter_type, false,
-                           GlobalValue::InternalLinkage,
-                           Constant::getNullValue(counter_type),
-                           "__llvm_gcov_ctr", 0, false, 0);
-    counters_by_ident.push_back(
-        std::make_pair(counter, reinterpret_cast<intptr_t>((MDNode*)SP)));
-
-    UniqueVector<BasicBlock *> complex_edge_preds;
-    UniqueVector<BasicBlock *> complex_edge_succs;
-
-    unsigned edge_num = 0;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
-      if (int successors = TI->getNumSuccessors()) {
-        IRBuilder<> builder(TI);
-
-        if (successors == 1) {
-          Value *ctr = builder.CreateConstInBoundsGEP2_64(counter, 0, edge_num);
-          Value *count = builder.CreateLoad(ctr);
-          count = builder.CreateAdd(count,
-                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
-          builder.CreateStore(count, ctr);
-        } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
-          Value *sel = builder.CreateSelect(
-              BI->getCondition(),
-              ConstantInt::get(Type::getInt64Ty(*Ctx), edge_num),
-              ConstantInt::get(Type::getInt64Ty(*Ctx), edge_num + 1));
-          SmallVector<Value *, 2> idx;
-          idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
-          idx.push_back(sel);
-          Value *ctr = builder.CreateInBoundsGEP(counter,
-                                                 idx.begin(), idx.end());
-          Value *count = builder.CreateLoad(ctr);
-          count = builder.CreateAdd(count,
-                                    ConstantInt::get(Type::getInt64Ty(*Ctx),1));
-          builder.CreateStore(count, ctr);
-        } else {
-          complex_edge_preds.insert(BB);
+      Function *F = SP.getFunction();
+      if (!F) continue;
+      GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        GCOVBlock &Block = Func.getBlock(BB);
+        TerminatorInst *TI = BB->getTerminator();
+        if (int successors = TI->getNumSuccessors()) {
           for (int i = 0; i != successors; ++i) {
-            complex_edge_succs.insert(TI->getSuccessor(i));
+            Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
           }
+        } else if (isa<ReturnInst>(TI)) {
+          Block.addEdge(Func.getReturnBlock());
+        }
+
+        uint32_t Line = 0;
+        for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+             I != IE; ++I) {
+          const DebugLoc &Loc = I->getDebugLoc();
+          if (Loc.isUnknown()) continue;
+          if (Line == Loc.getLine()) continue;
+          Line = Loc.getLine();
+          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          Lines.addLine(Loc.getLine());
         }
-        edge_num += successors;
       }
+      Func.writeOut();
     }
+    out.write("\0\0\0\0\0\0\0\0", 8);  // EOF
+    out.close();
+  }
+}
 
-    // TODO: support switch, invoke, indirectbr
-    if (!complex_edge_preds.empty()) {
-      // emit a [preds x [succs x i64*]].
-      for (int i = 0, e = complex_edge_preds.size(); i != e; ++i) {
-        // call runtime to state save
+bool GCOVProfiler::emitProfileArcs() {
+  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CU_Nodes) return false;
+
+  bool Result = false;  
+  bool InsertIndCounterIncrCode = false;
+  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+    DICompileUnit CU(CU_Nodes->getOperand(i));
+    DIArray SPs = CU.getSubprograms();
+    SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+      DISubprogram SP(SPs.getElement(i));
+      if (!SP.Verify()) continue;
+      Function *F = SP.getFunction();
+      if (!F) continue;
+      if (!Result) Result = true;
+      unsigned Edges = 0;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        if (isa<ReturnInst>(TI))
+          ++Edges;
+        else
+          Edges += TI->getNumSuccessors();
       }
-      for (int i = 0, e = complex_edge_succs.size(); i != e; ++i) {
-        // call runtime to perform increment
+      
+      ArrayType *CounterTy =
+        ArrayType::get(Type::getInt64Ty(*Ctx), Edges);
+      GlobalVariable *Counters =
+        new GlobalVariable(*M, CounterTy, false,
+                           GlobalValue::InternalLinkage,
+                           Constant::getNullValue(CounterTy),
+                           "__llvm_gcov_ctr");
+      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+      
+      UniqueVector<BasicBlock *> ComplexEdgePreds;
+      UniqueVector<BasicBlock *> ComplexEdgeSuccs;
+      
+      unsigned Edge = 0;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+        if (Successors) {
+          IRBuilder<> Builder(TI);
+          
+          if (Successors == 1) {
+            Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                                Edge);
+            Value *Count = Builder.CreateLoad(Counter);
+            Count = Builder.CreateAdd(Count,
+                                      ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+            Builder.CreateStore(Count, Counter);
+          } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+            Value *Sel = Builder.CreateSelect(
+              BI->getCondition(),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
+              ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+            SmallVector<Value *, 2> Idx;
+            Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+            Idx.push_back(Sel);
+            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+            Value *Count = Builder.CreateLoad(Counter);
+            Count = Builder.CreateAdd(Count,
+                                      ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+            Builder.CreateStore(Count, Counter);
+          } else {
+            ComplexEdgePreds.insert(BB);
+            for (int i = 0; i != Successors; ++i)
+              ComplexEdgeSuccs.insert(TI->getSuccessor(i));
+          }
+          Edge += Successors;
+        }
+      }
+      
+      if (!ComplexEdgePreds.empty()) {
+        GlobalVariable *EdgeTable =
+          buildEdgeLookupTable(F, Counters,
+                               ComplexEdgePreds, ComplexEdgeSuccs);
+        GlobalVariable *EdgeState = getEdgeStateValue();
+        
+        Type *Int32Ty = Type::getInt32Ty(*Ctx);
+        for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
+          IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+          Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+        }
+        for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
+          // call runtime to perform increment
+          BasicBlock::iterator InsertPt =
+            ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
+          IRBuilder<> Builder(InsertPt);
+          Value *CounterPtrArray =
+            Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
+                                               i * ComplexEdgePreds.size());
+
+          // Build code to increment the counter.
+          InsertIndCounterIncrCode = true;
+          Builder.CreateCall2(getIncrementIndirectCounterFunc(),
+                              EdgeState, CounterPtrArray);
+        }
       }
     }
+
+    insertCounterWriteout(CountersBySP);
+    insertFlush(CountersBySP);
   }
 
-  InsertCounterWriteout(DIF, counters_by_ident);
+  if (InsertIndCounterIncrCode)
+    insertIndirectCounterIncrement();
+
+  return Result;
+}
+
+// All edges with successors that aren't branches are "complex", because it
+// requires complex logic to pick which counter to update.
+GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
+    Function *F,
+    GlobalVariable *Counters,
+    const UniqueVector<BasicBlock *> &Preds,
+    const UniqueVector<BasicBlock *> &Succs) {
+  // TODO: support invoke, threads. We rely on the fact that nothing can modify
+  // the whole-Module pred edge# between the time we set it and the time we next
+  // read it. Threads and invoke make this untrue.
+
+  // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]].
+  size_t TableSize = Succs.size() * Preds.size();
+  Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx);
+  ArrayType *EdgeTableTy = ArrayType::get(Int64PtrTy, TableSize);
+
+  OwningArrayPtr<Constant *> EdgeTable(new Constant*[TableSize]);
+  Constant *NullValue = Constant::getNullValue(Int64PtrTy);
+  for (size_t i = 0; i != TableSize; ++i)
+    EdgeTable[i] = NullValue;
+
+  unsigned Edge = 0;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    TerminatorInst *TI = BB->getTerminator();
+    int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
+    if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
+      for (int i = 0; i != Successors; ++i) {
+        BasicBlock *Succ = TI->getSuccessor(i);
+        IRBuilder<> builder(Succ);
+        Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+                                                            Edge + i);
+        EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
+                  (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+      }
+    }
+    Edge += Successors;
+  }
 
-  return true;
+  ArrayRef<Constant*> V(&EdgeTable[0], TableSize);
+  GlobalVariable *EdgeTableGV =
+      new GlobalVariable(
+          *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
+          ConstantArray::get(EdgeTableTy, V),
+          "__llvm_gcda_edge_table");
+  EdgeTableGV->setUnnamedAddr(true);
+  return EdgeTableGV;
 }
 
 Constant *GCOVProfiler::getStartFileFunc() {
-  const Type *Args[1] = { Type::getInt8PtrTy(*Ctx) };
-  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
-  return Mod->getOrInsertFunction("llvm_gcda_start_file", FTy);
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                              Type::getInt8PtrTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
+}
+
+Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
+  Type *Int32Ty = Type::getInt32Ty(*Ctx);
+  Type *Int64Ty = Type::getInt64Ty(*Ctx);
+  Type *Args[] = {
+    Int32Ty->getPointerTo(),                // uint32_t *predecessor
+    Int64Ty->getPointerTo()->getPointerTo() // uint64_t **counters
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+  return M->getOrInsertFunction("__llvm_gcov_indirect_counter_increment", FTy);
 }
 
 Constant *GCOVProfiler::getEmitFunctionFunc() {
-  const Type *Args[1] = { Type::getInt32Ty(*Ctx) };
-  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
-  return Mod->getOrInsertFunction("llvm_gcda_emit_function", FTy);
+  Type *Args[2] = {
+    Type::getInt32Ty(*Ctx),    // uint32_t ident
+    Type::getInt8PtrTy(*Ctx),  // const char *function_name
+  };
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
+  return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
 }
 
 Constant *GCOVProfiler::getEmitArcsFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
-  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
                                               Args, false);
-  return Mod->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
+  return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
 }
 
 Constant *GCOVProfiler::getEndFileFunc() {
-  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  return Mod->getOrInsertFunction("llvm_gcda_end_file", FTy);
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
 }
 
-static std::string ReplaceStem(std::string orig_filename, std::string new_stem){
-  return (sys::path::stem(orig_filename) + "." + new_stem).str();
+GlobalVariable *GCOVProfiler::getEdgeStateValue() {
+  GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred");
+  if (!GV) {
+    GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false,
+                            GlobalValue::InternalLinkage,
+                            ConstantInt::get(Type::getInt32Ty(*Ctx),
+                                             0xffffffff),
+                            "__llvm_gcov_global_state_pred");
+    GV->setUnnamedAddr(true);
+  }
+  return GV;
 }
 
-void GCOVProfiler::InsertCounterWriteout(
-    DebugInfoFinder &DIF,
-    SmallVector<std::pair<GlobalVariable *, uint32_t>, 8> &counters_by_ident) {
-
-  const FunctionType *WriteoutFTy =
-      FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *WriteoutF = Function::Create(WriteoutFTy,
-                                         GlobalValue::InternalLinkage,
-                                         "__llvm_gcda_writeout", Mod);
+void GCOVProfiler::insertCounterWriteout(
+    ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
+  FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+  if (!WriteoutF)
+    WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
+                                 "__llvm_gcov_writeout", M);
   WriteoutF->setUnnamedAddr(true);
-  BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF);
-  IRBuilder<> builder(BB);
+  WriteoutF->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    WriteoutF->addFnAttr(Attributes::NoRedZone);
+
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
+  IRBuilder<> Builder(BB);
 
   Constant *StartFile = getStartFileFunc();
   Constant *EmitFunction = getEmitFunctionFunc();
   Constant *EmitArcs = getEmitArcsFunc();
   Constant *EndFile = getEndFileFunc();
 
-  for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(),
-           CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) {
-    DICompileUnit compile_unit(*CUI);
-    std::string filename_gcda = ReplaceStem(compile_unit.getFilename(), "gcda");
-    builder.CreateCall(StartFile,
-                       builder.CreateGlobalStringPtr(filename_gcda));
-    for (SmallVector<std::pair<GlobalVariable *, uint32_t>, 8>::iterator
-             I = counters_by_ident.begin(), E = counters_by_ident.end();
-         I != E; ++I) {
-      builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx),
-                                                        I->second));
-      GlobalVariable *GV = I->first;
-      unsigned num_arcs =
+  NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+  if (CU_Nodes) {
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+      DICompileUnit CU(CU_Nodes->getOperand(i));
+      std::string FilenameGcda = mangleName(CU, "gcda");
+      Builder.CreateCall(StartFile,
+                         Builder.CreateGlobalStringPtr(FilenameGcda));
+      for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
+             I = CountersBySP.begin(), E = CountersBySP.end();
+           I != E; ++I) {
+        DISubprogram SP(I->second);
+        intptr_t ident = reinterpret_cast<intptr_t>(I->second);
+        Builder.CreateCall2(EmitFunction,
+                            ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
+                            Builder.CreateGlobalStringPtr(SP.getName()));
+        
+        GlobalVariable *GV = I->first;
+        unsigned Arcs =
           cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
-      builder.CreateCall2(
-          EmitArcs,
-          ConstantInt::get(Type::getInt32Ty(*Ctx), num_arcs),
-          builder.CreateConstGEP2_64(GV, 0, 0));
+        Builder.CreateCall2(EmitArcs,
+                            ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+                            Builder.CreateConstGEP2_64(GV, 0, 0));
+      }
+      Builder.CreateCall(EndFile);
     }
-    builder.CreateCall(EndFile);
   }
-  builder.CreateRetVoid();
+  Builder.CreateRetVoid();
+
+  // Create a small bit of code that registers the "__llvm_gcov_writeout"
+  // function to be executed at exit.
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+                                 "__llvm_gcov_init", M);
+  F->setUnnamedAddr(true);
+  F->setLinkage(GlobalValue::InternalLinkage);
+  F->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    F->addFnAttr(Attributes::NoRedZone);
+
+  BB = BasicBlock::Create(*Ctx, "entry", F);
+  Builder.SetInsertPoint(BB);
+
+  FTy = FunctionType::get(Type::getInt32Ty(*Ctx),
+                          PointerType::get(FTy, 0), false);
+  Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
+  Builder.CreateCall(AtExitFn, WriteoutF);
+  Builder.CreateRetVoid();
+
+  appendToGlobalCtors(*M, F, 0);
+}
+
+void GCOVProfiler::insertIndirectCounterIncrement() {
+  Function *Fn =
+    cast<Function>(GCOVProfiler::getIncrementIndirectCounterFunc());
+  Fn->setUnnamedAddr(true);
+  Fn->setLinkage(GlobalValue::InternalLinkage);
+  Fn->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    Fn->addFnAttr(Attributes::NoRedZone);
+
+  Type *Int32Ty = Type::getInt32Ty(*Ctx);
+  Type *Int64Ty = Type::getInt64Ty(*Ctx);
+  Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff);
+
+  // Create basic blocks for function.
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
+  IRBuilder<> Builder(BB);
+
+  BasicBlock *PredNotNegOne = BasicBlock::Create(*Ctx, "", Fn);
+  BasicBlock *CounterEnd = BasicBlock::Create(*Ctx, "", Fn);
+  BasicBlock *Exit = BasicBlock::Create(*Ctx, "exit", Fn);
+
+  // uint32_t pred = *predecessor;
+  // if (pred == 0xffffffff) return;
+  Argument *Arg = Fn->arg_begin();
+  Arg->setName("predecessor");
+  Value *Pred = Builder.CreateLoad(Arg, "pred");
+  Value *Cond = Builder.CreateICmpEQ(Pred, NegOne);
+  BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
+
+  Builder.SetInsertPoint(PredNotNegOne);
+
+  // uint64_t *counter = counters[pred];
+  // if (!counter) return;
+  Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty);
+  Arg = llvm::next(Fn->arg_begin());
+  Arg->setName("counters");
+  Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+  Value *Counter = Builder.CreateLoad(GEP, "counter");
+  Cond = Builder.CreateICmpEQ(Counter,
+                              Constant::getNullValue(Int64Ty->getPointerTo()));
+  Builder.CreateCondBr(Cond, Exit, CounterEnd);
+
+  // ++*counter;
+  Builder.SetInsertPoint(CounterEnd);
+  Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
+                                 ConstantInt::get(Int64Ty, 1));
+  Builder.CreateStore(Add, Counter);
+  Builder.CreateBr(Exit);
+
+  // Fill in the exit block.
+  Builder.SetInsertPoint(Exit);
+  Builder.CreateRetVoid();
+}
+
+void GCOVProfiler::
+insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  Function *FlushF = M->getFunction("__gcov_flush");
+  if (!FlushF)
+    FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
+                              "__gcov_flush", M);
+  else
+    FlushF->setLinkage(GlobalValue::InternalLinkage);
+  FlushF->setUnnamedAddr(true);
+  FlushF->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    FlushF->addFnAttr(Attributes::NoRedZone);
+
+  BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
+
+  // Write out the current counters.
+  Constant *WriteoutF = M->getFunction("__llvm_gcov_writeout");
+  assert(WriteoutF && "Need to create the writeout function first!");
+
+  IRBuilder<> Builder(Entry);
+  Builder.CreateCall(WriteoutF);
+
+  // Zero out the counters.
+  for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
+         I = CountersBySP.begin(), E = CountersBySP.end();
+       I != E; ++I) {
+    GlobalVariable *GV = I->first;
+    Constant *Null = Constant::getNullValue(GV->getType()->getElementType());
+    Builder.CreateStore(Null, GV);
+  }
 
-  InsertProfilingShutdownCall(WriteoutF, Mod);
+  Type *RetTy = FlushF->getReturnType();
+  if (RetTy == Type::getVoidTy(*Ctx))
+    Builder.CreateRetVoid();
+  else if (RetTy->isIntegerTy())
+    // Used if __gcov_flush was implicitly declared.
+    Builder.CreateRet(ConstantInt::get(RetTy, 0));
+  else
+    report_fatal_error("invalid return type for __gcov_flush");
 }