Re-sort all of the includes with ./utils/sort_includes.py so that
[oota-llvm.git] / lib / Transforms / Instrumentation / DataFlowSanitizer.cpp
index 7159cc049946914328180173e7648ee72816a1ab..338584c31cdef51d35cc71ae64b0c900e6ed7de2 100644 (file)
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Type.h"
@@ -95,6 +96,22 @@ static cl::opt<bool> ClArgsABI(
     cl::desc("Use the argument ABI rather than the TLS ABI"),
     cl::Hidden);
 
+// Controls whether the pass includes or ignores the labels of pointers in load
+// instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnLoad(
+    "dfsan-combine-pointer-labels-on-load",
+    cl::desc("Combine the label of the pointer with the label of the data when "
+             "loading from memory."),
+    cl::Hidden, cl::init(true));
+
+// Controls whether the pass includes or ignores the labels of pointers in
+// stores instructions.
+static cl::opt<bool> ClCombinePointerLabelsOnStore(
+    "dfsan-combine-pointer-labels-on-store",
+    cl::desc("Combine the label of the pointer with the label of the data when "
+             "storing in memory."),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<bool> ClDebugNonzeroLabels(
     "dfsan-debug-nonzero-labels",
     cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
@@ -179,11 +196,18 @@ class DataFlowSanitizer : public ModulePass {
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
   Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
-  bool isInstrumented(Function *F);
+  bool isInstrumented(const Function *F);
+  bool isInstrumented(const GlobalAlias *GA);
   FunctionType *getArgsFunctionType(FunctionType *T);
+  FunctionType *getTrampolineFunctionType(FunctionType *T);
   FunctionType *getCustomFunctionType(FunctionType *T);
   InstrumentedABI getInstrumentedABI();
   WrapperKind getWrapperKind(Function *F);
+  void addGlobalNamePrefix(GlobalValue *GV);
+  Function *buildWrapperFunction(Function *F, StringRef NewFName,
+                                 GlobalValue::LinkageTypes NewFLink,
+                                 FunctionType *NewFT);
+  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
 
  public:
   DataFlowSanitizer(StringRef ABIListFile = StringRef(),
@@ -283,9 +307,10 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
   return FunctionType::get(RetType, ArgTypes, T->isVarArg());
 }
 
-FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
   assert(!T->isVarArg());
   llvm::SmallVector<Type *, 4> ArgTypes;
+  ArgTypes.push_back(T->getPointerTo());
   std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
   for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
     ArgTypes.push_back(ShadowTy);
@@ -295,6 +320,28 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
   return FunctionType::get(T->getReturnType(), ArgTypes, false);
 }
 
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+  assert(!T->isVarArg());
+  llvm::SmallVector<Type *, 4> ArgTypes;
+  for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
+       i != e; ++i) {
+    FunctionType *FT;
+    if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>(
+                                     *i)->getElementType()))) {
+      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+    } else {
+      ArgTypes.push_back(*i);
+    }
+  }
+  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+    ArgTypes.push_back(ShadowTy);
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
+    ArgTypes.push_back(ShadowPtrTy);
+  return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
 bool DataFlowSanitizer::doInitialization(Module &M) {
   DL = getAnalysisIfAvailable<DataLayout>();
   if (!DL)
@@ -343,10 +390,14 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   return true;
 }
 
-bool DataFlowSanitizer::isInstrumented(Function *F) {
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
   return !ABIList->isIn(*F, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+  return !ABIList->isIn(*GA, "uninstrumented");
+}
+
 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
@@ -362,6 +413,85 @@ DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
   return WK_Warning;
 }
 
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+  std::string GVName = GV->getName(), Prefix = "dfs$";
+  GV->setName(Prefix + GVName);
+
+  // Try to change the name of the function in module inline asm.  We only do
+  // this for specific asm directives, currently only ".symver", to try to avoid
+  // corrupting asm which happens to contain the symbol name as a substring.
+  // Note that the substitution for .symver assumes that the versioned symbol
+  // also has an instrumented name.
+  std::string Asm = GV->getParent()->getModuleInlineAsm();
+  std::string SearchStr = ".symver " + GVName + ",";
+  size_t Pos = Asm.find(SearchStr);
+  if (Pos != std::string::npos) {
+    Asm.replace(Pos, SearchStr.size(),
+                ".symver " + Prefix + GVName + "," + Prefix);
+    GV->getParent()->setModuleInlineAsm(Asm);
+  }
+}
+
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+                                        GlobalValue::LinkageTypes NewFLink,
+                                        FunctionType *NewFT) {
+  FunctionType *FT = F->getFunctionType();
+  Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
+                                    F->getParent());
+  NewF->copyAttributesFrom(F);
+  NewF->removeAttributes(
+      AttributeSet::ReturnIndex,
+      AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+                                       AttributeSet::ReturnIndex));
+
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+  std::vector<Value *> Args;
+  unsigned n = FT->getNumParams();
+  for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+    Args.push_back(&*ai);
+  CallInst *CI = CallInst::Create(F, Args, "", BB);
+  if (FT->getReturnType()->isVoidTy())
+    ReturnInst::Create(*Ctx, BB);
+  else
+    ReturnInst::Create(*Ctx, CI, BB);
+
+  return NewF;
+}
+
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+                                                          StringRef FName) {
+  FunctionType *FTT = getTrampolineFunctionType(FT);
+  Constant *C = Mod->getOrInsertFunction(FName, FTT);
+  Function *F = dyn_cast<Function>(C);
+  if (F && F->isDeclaration()) {
+    F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+    std::vector<Value *> Args;
+    Function::arg_iterator AI = F->arg_begin(); ++AI;
+    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+      Args.push_back(&*AI);
+    CallInst *CI =
+        CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+    ReturnInst *RI;
+    if (FT->getReturnType()->isVoidTy())
+      RI = ReturnInst::Create(*Ctx, BB);
+    else
+      RI = ReturnInst::Create(*Ctx, CI, BB);
+
+    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+    for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+      DFSF.ValShadowMap[ValAI] = ShadowAI;
+    DFSanVisitor(DFSF).visitCallInst(*CI);
+    if (!FT->getReturnType()->isVoidTy())
+      new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+                    &F->getArgumentList().back(), RI);
+  }
+
+  return C;
+}
+
 bool DataFlowSanitizer::runOnModule(Module &M) {
   if (!DL)
     return false;
@@ -391,6 +521,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   DFSanUnionLoadFn =
       Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
   if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+    F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
     F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
   }
   DFSanUnimplementedFn =
@@ -415,6 +546,32 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       FnsToInstrument.push_back(&*i);
   }
 
+  // Give function aliases prefixes when necessary, and build wrappers where the
+  // instrumentedness is inconsistent.
+  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+    GlobalAlias *GA = &*i;
+    ++i;
+    // Don't stop on weak.  We assume people aren't playing games with the
+    // instrumentedness of overridden weak aliases.
+    if (Function *F = dyn_cast<Function>(
+            GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+      if (GAInst && FInst) {
+        addGlobalNamePrefix(GA);
+      } else if (GAInst != FInst) {
+        // Non-instrumented alias of an instrumented function, or vice versa.
+        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
+        // below will take care of instrumenting it.
+        Function *NewF =
+            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+        GA->replaceAllUsesWith(NewF);
+        NewF->takeName(GA);
+        GA->eraseFromParent();
+        FnsToInstrument.push_back(NewF);
+      }
+    }
+  }
+
   AttrBuilder B;
   B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
   ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
@@ -427,12 +584,13 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     Function &F = **i;
     FunctionType *FT = F.getFunctionType();
 
-    if (FT->getNumParams() == 0 && !FT->isVarArg() &&
-        FT->getReturnType()->isVoidTy())
-      continue;
+    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+                              FT->getReturnType()->isVoidTy());
 
     if (isInstrumented(&F)) {
-      if (getInstrumentedABI() == IA_Args) {
+      // Instrumented functions get a 'dfs$' prefix.  This allows us to more
+      // easily identify cases of mismatching ABIs.
+      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
         FunctionType *NewFT = getArgsFunctionType(FT);
         Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
         NewF->copyAttributesFrom(&F);
@@ -463,41 +621,27 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         NewF->takeName(&F);
         F.eraseFromParent();
         *i = NewF;
+        addGlobalNamePrefix(NewF);
+      } else {
+        addGlobalNamePrefix(&F);
       }
                // Hopefully, nobody will try to indirectly call a vararg
                // function... yet.
     } else if (FT->isVarArg()) {
       UnwrappedFnMap[&F] = &F;
       *i = 0;
-    } else {
+    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
       // Build a wrapper function for F.  The wrapper simply calls F, and is
       // added to FnsToInstrument so that any instrumentation according to its
       // WrapperKind is done in the second pass below.
       FunctionType *NewFT = getInstrumentedABI() == IA_Args
                                 ? getArgsFunctionType(FT)
                                 : FT;
-      Function *NewF =
-          Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage,
-                           std::string("dfsw$") + F.getName(), &M);
-      NewF->copyAttributesFrom(&F);
-      NewF->removeAttributes(
-              AttributeSet::ReturnIndex,
-              AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
-                                               AttributeSet::ReturnIndex));
+      Function *NewF = buildWrapperFunction(
+          &F, std::string("dfsw$") + std::string(F.getName()),
+          GlobalValue::LinkOnceODRLinkage, NewFT);
       if (getInstrumentedABI() == IA_TLS)
-        NewF->removeAttributes(AttributeSet::FunctionIndex,
-                               ReadOnlyNoneAttrs);
-
-      BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
-      std::vector<Value *> Args;
-      unsigned n = FT->getNumParams();
-      for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
-        Args.push_back(&*ai);
-      CallInst *CI = CallInst::Create(&F, Args, "", BB);
-      if (FT->getReturnType()->isVoidTy())
-        ReturnInst::Create(*Ctx, BB);
-      else
-        ReturnInst::Create(*Ctx, CI, BB);
+        NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
 
       Value *WrappedFnCst =
           ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
@@ -591,10 +735,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
           Pos = Pos->getNextNode();
         IRBuilder<> IRB(Pos);
-        Instruction *NeInst = cast<Instruction>(
-            IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+        Value *Ne = IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow);
         BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
-            NeInst, /*Unreachable=*/ false, ColdCallWeights));
+            Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
         IRBuilder<> ThenIRB(BI);
         ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
       }
@@ -694,26 +837,20 @@ Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
   IRBuilder<> IRB(Pos);
   BasicBlock *Head = Pos->getParent();
   Value *Ne = IRB.CreateICmpNE(V1, V2);
-  Instruction *NeInst = dyn_cast<Instruction>(Ne);
-  if (NeInst) {
-    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
-        NeInst, /*Unreachable=*/ false, ColdCallWeights));
-    IRBuilder<> ThenIRB(BI);
-    CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
-    Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
-    Call->addAttribute(1, Attribute::ZExt);
-    Call->addAttribute(2, Attribute::ZExt);
-
-    BasicBlock *Tail = BI->getSuccessor(0);
-    PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
-    Phi->addIncoming(Call, Call->getParent());
-    Phi->addIncoming(ZeroShadow, Head);
-    Pos = Phi;
-    return Phi;
-  } else {
-    assert(0 && "todo");
-    return 0;
-  }
+  BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+      Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
+  IRBuilder<> ThenIRB(BI);
+  CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+  Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+  Call->addAttribute(1, Attribute::ZExt);
+  Call->addAttribute(2, Attribute::ZExt);
+
+  BasicBlock *Tail = BI->getSuccessor(0);
+  PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+  Phi->addIncoming(Call, Call->getParent());
+  Phi->addIncoming(V1, Head);
+  Pos = Phi;
+  return Phi;
 }
 
 // A convenience function which folds the shadows of each of the operands
@@ -851,14 +988,15 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
     Align = 1;
   }
   IRBuilder<> IRB(&LI);
-  Value *LoadedShadow =
-      DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
-  Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
-  Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
-  if (CombinedShadow != DFSF.DFS.ZeroShadow)
-    DFSF.NonZeroChecks.insert(CombinedShadow);
-
-  DFSF.setShadow(&LI, CombinedShadow);
+  Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+  if (ClCombinePointerLabelsOnLoad) {
+    Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+    Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &LI);
+  }
+  if (Shadow != DFSF.DFS.ZeroShadow)
+    DFSF.NonZeroChecks.insert(Shadow);
+
+  DFSF.setShadow(&LI, Shadow);
 }
 
 void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
@@ -923,8 +1061,13 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
   } else {
     Align = 1;
   }
-  DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
-                   DFSF.getShadow(SI.getValueOperand()), &SI);
+
+  Value* Shadow = DFSF.getShadow(SI.getValueOperand());
+  if (ClCombinePointerLabelsOnStore) {
+    Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
+    Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &SI);
+  }
+  DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI);
 }
 
 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
@@ -1113,8 +1256,24 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         std::vector<Value *> Args;
 
         CallSite::arg_iterator i = CS.arg_begin();
-        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
-          Args.push_back(*i);
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+          Type *T = (*i)->getType();
+          FunctionType *ParamFT;
+          if (isa<PointerType>(T) &&
+              (ParamFT = dyn_cast<FunctionType>(
+                   cast<PointerType>(T)->getElementType()))) {
+            std::string TName = "dfst";
+            TName += utostr(FT->getNumParams() - n);
+            TName += "$";
+            TName += F->getName();
+            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+            Args.push_back(T);
+            Args.push_back(
+                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+          } else {
+            Args.push_back(*i);
+          }
+        }
 
         i = CS.arg_begin();
         for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)