1 //===- CleanupGCCOutput.cpp - Cleanup GCC Output --------------------------===//
3 // This pass is used to cleanup the output of GCC. GCC's output is
4 // unneccessarily gross for a couple of reasons. This pass does the following
5 // things to try to clean it up:
7 // * Eliminate names for GCC types that we know can't be needed by the user.
8 // * Eliminate names for types that are unused in the entire translation unit
9 // * Fix various problems that we might have in PHI nodes and casts
11 // Note: This code produces dead declarations, it is a good idea to run DCE
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/CleanupGCCOutput.h"
17 #include "llvm/Analysis/FindUsedTypes.h"
18 #include "llvm/Module.h"
19 #include "llvm/SymbolTable.h"
20 #include "llvm/DerivedTypes.h"
21 #include "llvm/iPHINode.h"
22 #include "llvm/iMemory.h"
23 #include "llvm/iTerminators.h"
24 #include "llvm/iOther.h"
25 #include "llvm/Support/CFG.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "Support/StatisticReporter.h"
31 static Statistic<> NumTypeSymtabEntriesKilled("cleangcc\t- Number of unused typenames removed from symtab");
32 static Statistic<> NumCastsMoved("cleangcc\t- Number of casts removed from head of basic block");
33 static Statistic<> NumRefactoredPreds("cleangcc\t- Number of predecessor blocks refactored");
38 struct CleanupGCCOutput : public FunctionPass {
39 const char *getPassName() const { return "Cleanup GCC Output"; }
41 // doPassInitialization - For this pass, it removes global symbol table
42 // entries for primitive types. These are never used for linking in GCC and
43 // they make the output uglier to look at, so we nuke them.
45 // Also, initialize instance variables.
47 bool doInitialization(Module *M);
49 // runOnFunction - This method simplifies the specified function hopefully.
51 bool runOnFunction(Function *F);
53 // doPassFinalization - Strip out type names that are unused by the program
54 bool doFinalization(Module *M);
56 // getAnalysisUsage - This function needs FindUsedTypes to do its job...
58 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
59 AU.addRequired(FindUsedTypes::ID);
64 Pass *createCleanupGCCOutputPass() {
65 return new CleanupGCCOutput();
70 // ShouldNukSymtabEntry - Return true if this module level symbol table entry
71 // should be eliminated.
73 static inline bool ShouldNukeSymtabEntry(const std::pair<std::string,Value*>&E){
74 // Nuke all names for primitive types!
75 if (cast<Type>(E.second)->isPrimitiveType()) return true;
77 // Nuke all pointers to primitive types as well...
78 if (const PointerType *PT = dyn_cast<PointerType>(E.second))
79 if (PT->getElementType()->isPrimitiveType()) return true;
81 // The only types that could contain .'s in the program are things generated
82 // by GCC itself, including "complex.float" and friends. Nuke them too.
83 if (E.first.find('.') != std::string::npos) return true;
88 // doInitialization - For this pass, it removes global symbol table
89 // entries for primitive types. These are never used for linking in GCC and
90 // they make the output uglier to look at, so we nuke them.
92 bool CleanupGCCOutput::doInitialization(Module *M) {
95 if (M->hasSymbolTable()) {
96 SymbolTable *ST = M->getSymbolTable();
98 // Check the symbol table for superfluous type entries...
100 // Grab the 'type' plane of the module symbol...
101 SymbolTable::iterator STI = ST->find(Type::TypeTy);
102 if (STI != ST->end()) {
103 // Loop over all entries in the type plane...
104 SymbolTable::VarMap &Plane = STI->second;
105 for (SymbolTable::VarMap::iterator PI = Plane.begin(); PI != Plane.end();)
106 if (ShouldNukeSymtabEntry(*PI)) { // Should we remove this entry?
107 #if MAP_IS_NOT_BRAINDEAD
108 PI = Plane.erase(PI); // STD C++ Map should support this!
110 Plane.erase(PI); // Alas, GCC 2.95.3 doesn't *SIGH*
113 ++NumTypeSymtabEntriesKilled;
125 // FixCastsAndPHIs - The LLVM GCC has a tendancy to intermix Cast instructions
126 // in with the PHI nodes. These cast instructions are potentially there for two
127 // different reasons:
129 // 1. The cast could be for an early PHI, and be accidentally inserted before
130 // another PHI node. In this case, the PHI node should be moved to the end
131 // of the PHI nodes in the basic block. We know that it is this case if
132 // the source for the cast is a PHI node in this basic block.
134 // 2. If not #1, the cast must be a source argument for one of the PHI nodes
135 // in the current basic block. If this is the case, the cast should be
136 // lifted into the basic block for the appropriate predecessor.
138 static inline bool FixCastsAndPHIs(BasicBlock *BB) {
139 bool Changed = false;
141 BasicBlock::iterator InsertPos = BB->begin();
143 // Find the end of the interesting instructions...
144 while (isa<PHINode>(*InsertPos) || isa<CastInst>(*InsertPos)) ++InsertPos;
146 // Back the InsertPos up to right after the last PHI node.
147 while (InsertPos != BB->begin() && isa<CastInst>(*(InsertPos-1))) --InsertPos;
149 // No PHI nodes, quick exit.
150 if (InsertPos == BB->begin()) return false;
152 // Loop over all casts trapped between the PHI's...
153 BasicBlock::iterator I = BB->begin();
154 while (I != InsertPos) {
155 if (CastInst *CI = dyn_cast<CastInst>(*I)) { // Fix all cast instructions
156 Value *Src = CI->getOperand(0);
158 // Move the cast instruction to the current insert position...
159 --InsertPos; // New position for cast to go...
160 std::swap(*InsertPos, *I); // Cast goes down, PHI goes up
165 if (isa<PHINode>(Src) && // Handle case #1
166 cast<PHINode>(Src)->getParent() == BB) {
167 // We're done for case #1
168 } else { // Handle case #2
169 // In case #2, we have to do a few things:
170 // 1. Remove the cast from the current basic block.
171 // 2. Identify the PHI node that the cast is for.
172 // 3. Find out which predecessor the value is for.
173 // 4. Move the cast to the end of the basic block that it SHOULD be
176 // Remove the cast instruction from the basic block. The remove only
177 // invalidates iterators in the basic block that are AFTER the removed
178 // element. Because we just moved the CastInst to the InsertPos, no
179 // iterators get invalidated.
181 BB->getInstList().remove(InsertPos);
183 // Find the PHI node. Since this cast was generated specifically for a
184 // PHI node, there can only be a single PHI node using it.
186 assert(CI->use_size() == 1 && "Exactly one PHI node should use cast!");
187 PHINode *PN = cast<PHINode>(*CI->use_begin());
189 // Find out which operand of the PHI it is...
191 for (i = 0; i < PN->getNumIncomingValues(); ++i)
192 if (PN->getIncomingValue(i) == CI)
194 assert(i != PN->getNumIncomingValues() && "PHI doesn't use cast!");
196 // Get the predecessor the value is for...
197 BasicBlock *Pred = PN->getIncomingBlock(i);
199 // Reinsert the cast right before the terminator in Pred.
200 Pred->getInstList().insert(Pred->end()-1, CI);
211 // RefactorPredecessor - When we find out that a basic block is a repeated
212 // predecessor in a PHI node, we have to refactor the function until there is at
213 // most a single instance of a basic block in any predecessor list.
215 static inline void RefactorPredecessor(BasicBlock *BB, BasicBlock *Pred) {
216 Function *M = BB->getParent();
217 assert(find(pred_begin(BB), pred_end(BB), Pred) != pred_end(BB) &&
218 "Pred is not a predecessor of BB!");
220 // Create a new basic block, adding it to the end of the function.
221 BasicBlock *NewBB = new BasicBlock("", M);
223 // Add an unconditional branch to BB to the new block.
224 NewBB->getInstList().push_back(new BranchInst(BB));
226 // Get the terminator that causes a branch to BB from Pred.
227 TerminatorInst *TI = Pred->getTerminator();
229 // Find the first use of BB in the terminator...
230 User::op_iterator OI = find(TI->op_begin(), TI->op_end(), BB);
231 assert(OI != TI->op_end() && "Pred does not branch to BB!!!");
233 // Change the use of BB to point to the new stub basic block
236 // Now we need to loop through all of the PHI nodes in BB and convert their
237 // first incoming value for Pred to reference the new basic block instead.
239 for (BasicBlock::iterator I = BB->begin();
240 PHINode *PN = dyn_cast<PHINode>(*I); ++I) {
241 int BBIdx = PN->getBasicBlockIndex(Pred);
242 assert(BBIdx != -1 && "PHI node doesn't have an entry for Pred!");
244 // The value that used to look like it came from Pred now comes from NewBB
245 PN->setIncomingBlock((unsigned)BBIdx, NewBB);
250 // runOnFunction - Loop through the function and fix problems with the PHI nodes
251 // in the current function. The problem is that PHI nodes might exist with
252 // multiple entries for the same predecessor. GCC sometimes generates code that
255 // bb7: br bool %cond1004, label %bb8, label %bb8
256 // bb8: %reg119 = phi uint [ 0, %bb7 ], [ 1, %bb7 ]
258 // which is completely illegal LLVM code. To compensate for this, we insert
259 // an extra basic block, and convert the code to look like this:
261 // bb7: br bool %cond1004, label %bbX, label %bb8
263 // bb8: %reg119 = phi uint [ 0, %bbX ], [ 1, %bb7 ]
266 bool CleanupGCCOutput::runOnFunction(Function *M) {
267 bool Changed = false;
268 // Don't use iterators because invalidation gets messy...
269 for (unsigned MI = 0; MI < M->size(); ++MI) {
270 BasicBlock *BB = M->getBasicBlocks()[MI];
272 Changed |= FixCastsAndPHIs(BB);
274 if (isa<PHINode>(BB->front())) {
275 const vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
277 // Handle the problem. Sort the list of predecessors so that it is easy
278 // to decide whether or not duplicate predecessors exist.
279 vector<BasicBlock*> SortedPreds(Preds);
280 sort(SortedPreds.begin(), SortedPreds.end());
282 // Loop over the predecessors, looking for adjacent BB's that are equal.
283 BasicBlock *LastOne = 0;
284 for (unsigned i = 0; i < Preds.size(); ++i) {
285 if (SortedPreds[i] == LastOne) { // Found a duplicate.
286 RefactorPredecessor(BB, SortedPreds[i]);
287 ++NumRefactoredPreds;
290 LastOne = SortedPreds[i];
297 bool CleanupGCCOutput::doFinalization(Module *M) {
298 bool Changed = false;
300 if (M->hasSymbolTable()) {
301 SymbolTable *ST = M->getSymbolTable();
302 const std::set<const Type *> &UsedTypes =
303 getAnalysis<FindUsedTypes>().getTypes();
305 // Check the symbol table for superfluous type entries that aren't used in
308 // Grab the 'type' plane of the module symbol...
309 SymbolTable::iterator STI = ST->find(Type::TypeTy);
310 if (STI != ST->end()) {
311 // Loop over all entries in the type plane...
312 SymbolTable::VarMap &Plane = STI->second;
313 for (SymbolTable::VarMap::iterator PI = Plane.begin(); PI != Plane.end();)
314 if (!UsedTypes.count(cast<Type>(PI->second))) {
315 #if MAP_IS_NOT_BRAINDEAD
316 PI = Plane.erase(PI); // STD C++ Map should support this!
318 Plane.erase(PI); // Alas, GCC 2.95.3 doesn't *SIGH*
319 PI = Plane.begin(); // N^2 algorithms are fun. :(