//
// This pass looks for equivalent functions that are mergable and folds them.
//
-// A hash is computed from the function, based on its type and number of
-// basic blocks.
+// Order relation is defined on set of functions. It was made through
+// special function comparison procedure that returns
+// 0 when functions are equal,
+// -1 when Left function is less than right function, and
+// 1 for opposite case. We need total-ordering, so we need to maintain
+// four properties on the functions set:
+// a <= a (reflexivity)
+// if a <= b and b <= a then a = b (antisymmetry)
+// if a <= b and b <= c then a <= c (transitivity).
+// for all a and b: a <= b or b <= a (totality).
//
-// Once all hashes are computed, we perform an expensive equality comparison
-// on each function pair. This takes n^2/2 comparisons per bucket, so it's
-// important that the hash function be high quality. The equality comparison
-// iterates through each instruction in each basic block.
+// Comparison iterates through each instruction in each basic block.
+// Functions are kept on binary tree. For each new function F we perform
+// lookup in binary tree.
+// In practice it works the following way:
+// -- We define Function* container class with custom "operator<" (FunctionPtr).
+// -- "FunctionPtr" instances are stored in std::set collection, so every
+// std::set::insert operation will give you result in log(N) time.
//
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
// the object they belong to. However, as long as it's only used for a lookup
// and call, this is irrelevant, and we'd like to fold such functions.
//
-// * switch from n^2 pair-wise comparisons to an n-way comparison for each
-// bucket.
-//
// * be smarter about bitcasts.
//
// In order to fold functions, we will sometimes add either bitcast instructions
// analysis since the two functions differ where one has a bitcast and the
// other doesn't. We should learn to look through bitcasts.
//
+// * Compare complex types with pointer types inside.
+// * Compare cross-reference cases.
+// * Compare complex expressions.
+//
+// All the three issues above could be described as ability to prove that
+// fA == fB == fC == fE == fF == fG in example below:
+//
+// void fA() {
+// fB();
+// }
+// void fB() {
+// fA();
+// }
+//
+// void fE() {
+// fF();
+// }
+// void fF() {
+// fG();
+// }
+// void fG() {
+// fE();
+// }
+//
+// Simplest cross-reference case (fA <--> fB) was implemented in previous
+// versions of MergeFunctions, though it presented only in two function pairs
+// in test-suite (that counts >50k functions)
+// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
+// could cover much more cases.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
-/// Returns the type id for a type to be hashed. We turn pointer types into
-/// integers here because the actual compare logic below considers pointers and
-/// integers of the same size as equal.
-static Type::TypeID getTypeIDForHash(Type *Ty) {
- if (Ty->isPointerTy())
- return Type::IntegerTyID;
- return Ty->getTypeID();
-}
-
-/// Creates a hash-code for the function which is the same for any two
-/// functions that will compare equal, without looking at the instructions
-/// inside the function.
-static unsigned profileFunction(const Function *F) {
- FunctionType *FTy = F->getFunctionType();
-
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
- return ID.ComputeHash();
-}
-
-namespace {
-
-/// ComparableFunction - A struct that pairs together functions with a
-/// DataLayout so that we can keep them together as elements in the DenseSet.
-class ComparableFunction {
-public:
- static const ComparableFunction EmptyKey;
- static const ComparableFunction TombstoneKey;
- static DataLayout * const LookupOnly;
-
- ComparableFunction(Function *Func, const DataLayout *DL)
- : Func(Func), Hash(profileFunction(Func)), DL(DL) {}
-
- Function *getFunc() const { return Func; }
- unsigned getHash() const { return Hash; }
- const DataLayout *getDataLayout() const { return DL; }
-
- // Drops AssertingVH reference to the function. Outside of debug mode, this
- // does nothing.
- void release() {
- assert(Func &&
- "Attempted to release function twice, or release empty/tombstone!");
- Func = nullptr;
- }
-
-private:
- explicit ComparableFunction(unsigned Hash)
- : Func(nullptr), Hash(Hash), DL(nullptr) {}
-
- AssertingVH<Function> Func;
- unsigned Hash;
- const DataLayout *DL;
-};
-
-const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
-const ComparableFunction ComparableFunction::TombstoneKey =
- ComparableFunction(1);
-DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1);
-
-}
-
-namespace llvm {
- template <>
- struct DenseMapInfo<ComparableFunction> {
- static ComparableFunction getEmptyKey() {
- return ComparableFunction::EmptyKey;
- }
- static ComparableFunction getTombstoneKey() {
- return ComparableFunction::TombstoneKey;
- }
- static unsigned getHashValue(const ComparableFunction &CF) {
- return CF.getHash();
- }
- static bool isEqual(const ComparableFunction &LHS,
- const ComparableFunction &RHS);
- };
-}
-
namespace {
/// FunctionComparator - Compares two functions to determine whether or not
/// 6.4.Load: range metadata (as integer numbers)
/// On this stage its better to see the code, since its not more than 10-15
/// strings for particular instruction, and could change sometimes.
- int cmpOperation(const Instruction *L, const Instruction *R) const;
+ int cmpOperations(const Instruction *L, const Instruction *R) const;
/// Compare two GEPs for equivalent pointer arithmetic.
/// Parts to be compared for each comparison stage,
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
- int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
- int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
- return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEPs(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
}
/// cmpType - compares two types,
/// be checked with the same way. If we get Res != 0 on some stage, return it.
/// Otherwise return 0.
/// 6. For all other cases put llvm_unreachable.
- int cmpType(Type *TyL, Type *TyR) const;
+ int cmpTypes(Type *TyL, Type *TyR) const;
int cmpNumbers(uint64_t L, uint64_t R) const;
DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
+class FunctionPtr {
+ AssertingVH<Function> F;
+ const DataLayout *DL;
+
+public:
+ FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ Function *getFunc() const { return F; }
+ void release() { F = 0; }
+ bool operator<(const FunctionPtr &RHS) const {
+ return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+ }
+};
}
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
// Check whether types are bitcastable. This part is just re-factored
// Type::canLosslesslyBitCastTo method, but instead of returning true/false,
// we also pack into result which type is "less" for us.
- int TypesRes = cmpType(TyL, TyR);
+ int TypesRes = cmpTypes(TyL, TyR);
if (TypesRes != 0) {
// Types are different, but check whether we can bitcast them.
if (!TyL->isFirstClassType()) {
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
-int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
return cmpNumbers(STyL->isPacked(), STyR->isPacked());
for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
- if (int Res = cmpType(STyL->getElementType(i),
- STyR->getElementType(i)))
+ if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
return Res;
}
return 0;
if (FTyL->isVarArg() != FTyR->isVarArg())
return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
- if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType()))
+ if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
return Res;
for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
- if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i)))
+ if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
return Res;
}
return 0;
ArrayType *ATyR = cast<ArrayType>(TyR);
if (ATyL->getNumElements() != ATyR->getNumElements())
return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
- return cmpType(ATyL->getElementType(), ATyR->getElementType());
+ return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
}
}
}
// and pointer-to-B are equivalent. This should be kept in sync with
// Instruction::isSameOperationAs.
// Read method declaration comments for more details.
-int FunctionComparator::cmpOperation(const Instruction *L,
- const Instruction *R) const {
+int FunctionComparator::cmpOperations(const Instruction *L,
+ const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
// * replace type comparison with calls to isEquivalentType.
// * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
return Res;
- if (int Res = cmpType(L->getType(), R->getType()))
+ if (int Res = cmpTypes(L->getType(), R->getType()))
return Res;
if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
// if all operands are the same type
for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
if (int Res =
- cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
return Res;
}
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<CallInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
if (int Res = cmpNumbers(CI->getCallingConv(),
cast<InvokeInst>(R)->getCallingConv()))
return Res;
- return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+ if (int Res =
+ cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+ return Res;
+ return cmpNumbers(
+ (uint64_t)CI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
// Determine whether two GEP operations perform the same underlying arithmetic.
// Read method declaration comments for more details.
-int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
const GEPOperator *GEPR) {
unsigned int ASL = GEPL->getPointerAddressSpace();
if (int Res =
cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
return Res;
- if (int Res = cmpGEP(GEPL, GEPR))
+ if (int Res = cmpGEPs(GEPL, GEPR))
return Res;
} else {
- if (int Res = cmpOperation(InstL, InstR))
+ if (int Res = cmpOperations(InstL, InstR))
return Res;
assert(InstL->getNumOperands() == InstR->getNumOperands());
if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
return Res;
// TODO: Already checked in cmpOperation
- if (int Res = cmpType(OpL->getType(), OpR->getType()))
+ if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
return Res;
}
}
if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
return Res;
- if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType()))
+ if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
return Res;
assert(FnL->arg_size() == FnR->arg_size() &&
bool runOnModule(Module &M) override;
private:
- typedef DenseSet<ComparableFunction> FnSetType;
+ typedef std::set<FunctionPtr> FnTreeType;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
/// Returns true, if sanity check has been passed, and false if failed.
bool doSanityCheck(std::vector<WeakVH> &Worklist);
- /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// Insert a ComparableFunction into the FnTree, or merge it away if it's
/// equal to one that's already present.
- bool insert(ComparableFunction &NewF);
+ bool insert(Function *NewFunction);
- /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// Remove a Function from the FnTree and queue it up for a second sweep of
/// analysis.
void remove(Function *F);
- /// Find the functions that use this Value and remove them from FnSet and
+ /// Find the functions that use this Value and remove them from FnTree and
/// queue the functions.
void removeUsers(Value *V);
/// The set of all distinct functions. Use the insert() and remove() methods
/// to modify it.
- FnSetType FnSet;
+ FnTreeType FnTree;
/// DataLayout for more accurate GEP comparisons. May be NULL.
const DataLayout *DL;
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
Deferred.push_back(WeakVH(I));
}
- FnSet.resize(Deferred.size());
do {
std::vector<WeakVH> Worklist;
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
!F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, DL);
- Changed |= insert(CF);
+ Changed |= insert(F);
}
}
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, DL);
- Changed |= insert(CF);
+ Changed |= insert(F);
}
}
- DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
} while (!Deferred.empty());
- FnSet.clear();
+ FnTree.clear();
return Changed;
}
-bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
- const ComparableFunction &RHS) {
- if (LHS.getFunc() == RHS.getFunc() &&
- LHS.getHash() == RHS.getHash())
- return true;
- if (!LHS.getFunc() || !RHS.getFunc())
- return false;
-
- // One of these is a special "underlying pointer comparison only" object.
- if (LHS.getDataLayout() == ComparableFunction::LookupOnly ||
- RHS.getDataLayout() == ComparableFunction::LookupOnly)
- return false;
-
- assert(LHS.getDataLayout() == RHS.getDataLayout() &&
- "Comparing functions for different targets");
-
- return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), RHS.getFunc())
- .compare() == 0;
-}
-
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
++NumFunctionsMerged;
}
-// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
// that was already inserted.
-bool MergeFunctions::insert(ComparableFunction &NewF) {
- std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+bool MergeFunctions::insert(Function *NewFunction) {
+ std::pair<FnTreeType::iterator, bool> Result =
+ FnTree.insert(FunctionPtr(NewFunction, DL));
+
if (Result.second) {
- DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
- const ComparableFunction &OldF = *Result.first;
+ const FunctionPtr &OldF = *Result.first;
// Don't merge tiny functions, since it can just end up making the function
// larger.
// FIXME: Should still merge them if they are unnamed_addr and produce an
// alias.
- if (NewF.getFunc()->size() == 1) {
- if (NewF.getFunc()->front().size() <= 2) {
- DEBUG(dbgs() << NewF.getFunc()->getName()
- << " is to small to bother merging\n");
+ if (NewFunction->size() == 1) {
+ if (NewFunction->front().size() <= 2) {
+ DEBUG(dbgs() << NewFunction->getName()
+ << " is to small to bother merging\n");
return false;
}
}
// Never thunk a strong function to a weak function.
- assert(!OldF.getFunc()->mayBeOverridden() ||
- NewF.getFunc()->mayBeOverridden());
+ assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
- DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
- << NewF.getFunc()->getName() << '\n');
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName()
+ << " == " << NewFunction->getName() << '\n');
- Function *DeleteF = NewF.getFunc();
- NewF.release();
+ Function *DeleteF = NewFunction;
mergeTwoFunctions(OldF.getFunc(), DeleteF);
return true;
}
-// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
-// so that we'll look at it in the next round.
+// Remove a function from FnTree. If it was already in FnTree, add
+// it to Deferred so that we'll look at it in the next round.
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- //
- // The special "lookup only" ComparableFunction bypasses the expensive
- // function comparison in favour of a pointer comparison on the underlying
- // Function*'s.
- ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
- if (FnSet.erase(CF)) {
- DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL));
+ size_t Erased = 0;
+ if (found != FnTree.end() && found->getFunc() == F) {
+ Erased = 1;
+ FnTree.erase(found);
+ }
+
+ if (Erased) {
+ DEBUG(dbgs() << "Removed " << F->getName()
+ << " from set and deferred it.\n");
Deferred.push_back(F);
}
}