Introduce a pointertracking pass.
authorTorok Edwin <edwintorok@gmail.com>
Tue, 14 Jul 2009 18:44:28 +0000 (18:44 +0000)
committerTorok Edwin <edwintorok@gmail.com>
Tue, 14 Jul 2009 18:44:28 +0000 (18:44 +0000)
For now this only computes the allocated size of the memory pointed to by a
pointer, and offset a pointer from allocated pointer.
The actual checkLimits part will come later, after another round of review.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@75657 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Analysis/PointerTracking.h [new file with mode: 0644]
include/llvm/LinkAllPasses.h
lib/Analysis/PointerTracking.cpp [new file with mode: 0644]
test/Analysis/PointerTracking/dg.exp [new file with mode: 0644]
test/Analysis/PointerTracking/sizes.ll [new file with mode: 0644]

diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h
new file mode 100644 (file)
index 0000000..2256839
--- /dev/null
@@ -0,0 +1,132 @@
+//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking of pointer bounds.
+// It knows that the libc functions "calloc" and "realloc" allocate memory, thus
+// you should avoid using this pass if they mean something else for your
+// language.
+//
+// All methods assume that the pointer is not NULL, if it is then the returned
+// allocation size is wrong, and the result from checkLimits is wrong too.
+// It also assumes that pointers are valid, and that it is not analyzing a
+// use-after-free scenario.
+// Due to these limitations the "size" returned by these methods should be
+// considered as either 0 or the returned size.
+//
+// Another analysis pass should be used to find use-after-free/NULL dereference
+// bugs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_POINTERTRACKING_H
+#define LLVM_ANALYSIS_POINTERTRACKING_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/PredIteratorCache.h"
+
+namespace llvm {
+  class DominatorTree;
+  class ScalarEvolution;
+  class SCEV;
+  class Loop;
+  class LoopInfo;
+  class TargetData;
+
+  // Result from solver, assuming pointer is not NULL,
+  // and it is not a use-after-free situation.
+  enum SolverResult {
+    AlwaysFalse,// always false with above constraints
+    AlwaysTrue,// always true with above constraints
+    Unknown // it can sometimes be true, sometimes false, or it is undecided
+  };
+
+  class PointerTracking : public FunctionPass {
+  public:
+    typedef ICmpInst::Predicate Predicate;
+    static char ID;
+    PointerTracking();
+
+    virtual bool doInitialization(Module &M);
+
+    // If this pointer directly points to an allocation, return
+    // the number of elements of type Ty allocated.
+    // Otherwise return CouldNotCompute.
+    // Since allocations can fail by returning NULL, the real element count
+    // for every allocation is either 0 or the value returned by this function.
+    const SCEV *getAllocationElementCount(Value *P) const;
+
+    // Same as getAllocationSize() but returns size in bytes.
+    // We consider one byte as 8 bits.
+    const SCEV *getAllocationSizeInBytes(Value *V) const;
+
+    // Given a Pointer, determine a base pointer of known size, and an offset
+    // therefrom.
+    // When unable to determine, sets Base to NULL, and Limit/Offset to
+    // CouldNotCompute.
+    // BaseSize, and Offset are in bytes: Pointer == Base + Offset
+    void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize,
+                          const SCEV *&Offset) const;
+
+    // Compares the 2 scalar evolution expressions according to predicate,
+    // and if it can prove that the result is always true or always false
+    // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown.
+    enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B,
+                                  const Loop *L);
+
+    // Determines whether the condition LHS <Pred> RHS is sufficient
+    // for the condition A <Pred> B to hold.
+    // Currently only ULT/ULE is supported.
+    // This errs on the side of returning false.
+    bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS,
+                             const SCEV *A, Predicate Pred2, const SCEV *B,
+                             const Loop *L);
+
+    // Determines whether Offset is known to be always in [0, Limit) bounds.
+    // This errs on the side of returning Unknown.
+    enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit,
+                                  BasicBlock *BB);
+
+    virtual bool runOnFunction(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    void print(raw_ostream &OS, const Module* = 0) const;
+    virtual void print(std::ostream &OS, const Module* = 0) const;
+  private:
+    Function *FF;
+    TargetData *TD;
+    ScalarEvolution *SE;
+    LoopInfo *LI;
+    DominatorTree *DT;
+
+    Function *callocFunc;
+    Function *reallocFunc;
+    PredIteratorCache predCache;
+
+    SmallPtrSet<const SCEV*, 1> analyzing;
+
+    enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred,
+                                      const SCEV *A, const SCEV *B) const;
+    static bool isMonotonic(const SCEV *S);
+    bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const;
+    bool conditionSufficient(Value *Cond, bool negated,
+                             const SCEV *A, Predicate Pred, const SCEV *B);
+    Value *getConditionToReach(BasicBlock *A,
+                               DomTreeNodeBase<BasicBlock> *B,
+                               bool &negated);
+    Value *getConditionToReach(BasicBlock *A,
+                               BasicBlock *B,
+                               bool &negated);
+    const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const;
+    const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const;
+  };
+}
+#endif
+
index 4891f24..c0cd766 100644 (file)
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/IntervalPartition.h"
 #include "llvm/Analysis/LoopVR.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PointerTracking.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Assembly/PrintModulePass.h"
@@ -136,6 +137,7 @@ namespace {
       (void)new llvm::FindUsedTypes();
       (void)new llvm::ScalarEvolution();
       (void)new llvm::LoopVR();
+      (void)new llvm::PointerTracking();
       ((llvm::Function*)0)->viewCFGOnly();
       llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
       X.add((llvm::Value*)0, 0);  // for -print-alias-sets
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
new file mode 100644 (file)
index 0000000..1ae2fe6
--- /dev/null
@@ -0,0 +1,261 @@
+//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements tracking of pointer bounds.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PointerTracking.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Value.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+char PointerTracking::ID=0;
+PointerTracking::PointerTracking() : FunctionPass(&ID) {}
+
+bool PointerTracking::runOnFunction(Function &F) {
+  predCache.clear();
+  assert(analyzing.empty());
+  FF = &F;
+  TD = getAnalysisIfAvailable<TargetData>();
+  SE = &getAnalysis<ScalarEvolution>();
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+}
+
+bool PointerTracking::doInitialization(Module &M) {
+  const Type *PTy = PointerType::getUnqual(Type::Int8Ty);
+
+  // Find calloc(i64, i64) or calloc(i32, i32).
+  callocFunc = M.getFunction("calloc");
+  if (callocFunc) {
+    const FunctionType *Ty = callocFunc->getFunctionType();
+
+    std::vector<const Type*> args, args2;
+    args.push_back(Type::Int64Ty);
+    args.push_back(Type::Int64Ty);
+    args2.push_back(Type::Int32Ty);
+    args2.push_back(Type::Int32Ty);
+    const FunctionType *Calloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Calloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Calloc1Type && Ty != Calloc2Type)
+      callocFunc = 0; // Give up
+  }
+
+  // Find realloc(i8*, i64) or realloc(i8*, i32).
+  reallocFunc = M.getFunction("realloc");
+  if (reallocFunc) {
+    const FunctionType *Ty = reallocFunc->getFunctionType();
+    std::vector<const Type*> args, args2;
+    args.push_back(PTy);
+    args.push_back(Type::Int64Ty);
+    args2.push_back(PTy);
+    args2.push_back(Type::Int32Ty);
+
+    const FunctionType *Realloc1Type =
+      FunctionType::get(PTy, args, false);
+    const FunctionType *Realloc2Type =
+      FunctionType::get(PTy, args2, false);
+    if (Ty != Realloc1Type && Ty != Realloc2Type)
+      reallocFunc = 0; // Give up
+  }
+  return false;
+}
+
+// Calculates the number of elements allocated for pointer P,
+// the type of the element is stored in Ty.
+const SCEV *PointerTracking::computeAllocationCount(Value *P,
+                                                    const Type *&Ty) const {
+  Value *V = P->stripPointerCasts();
+  if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
+    Value *arraySize = AI->getArraySize();
+    Ty = AI->getAllocatedType();
+    // arraySize elements of type Ty.
+    return SE->getSCEV(arraySize);
+  }
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (GV->hasDefinitiveInitializer()) {
+      Constant *C = GV->getInitializer();
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        Ty = ATy->getElementType();
+        return SE->getConstant(Type::Int32Ty, ATy->getNumElements());
+      }
+    }
+    Ty = GV->getType();
+    return SE->getConstant(Type::Int32Ty, 1);
+    //TODO: implement more tracking for globals
+  }
+
+  if (CallInst *CI = dyn_cast<CallInst>(V)) {
+    CallSite CS(CI);
+    Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+    const Loop *L = LI->getLoopFor(CI->getParent());
+    if (F == callocFunc) {
+      Ty = Type::Int8Ty;
+      // calloc allocates arg0*arg1 bytes.
+      return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)),
+                                               SE->getSCEV(CS.getArgument(1))),
+                                L);
+    } else if (F == reallocFunc) {
+      Ty = Type::Int8Ty;
+      // realloc allocates arg1 bytes.
+      return SE->getSCEVAtScope(CS.getArgument(1), L);
+    }
+  }
+
+  return SE->getCouldNotCompute();
+}
+
+// Calculates the number of elements of type Ty allocated for P.
+const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
+                                                           const Type *Ty)
+  const {
+    const Type *elementTy;
+    const SCEV *Count = computeAllocationCount(P, elementTy);
+    if (isa<SCEVCouldNotCompute>(Count))
+      return Count;
+    if (elementTy == Ty)
+      return Count;
+
+    if (!TD) // need TargetData from this point forward
+      return SE->getCouldNotCompute();
+
+    uint64_t elementSize = TD->getTypeAllocSize(elementTy);
+    uint64_t wantSize = TD->getTypeAllocSize(Ty);
+    if (elementSize == wantSize)
+      return Count;
+    if (elementSize % wantSize) //fractional counts not possible
+      return SE->getCouldNotCompute();
+    return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
+                                                 elementSize/wantSize));
+}
+
+const SCEV *PointerTracking::getAllocationElementCount(Value *V) const {
+  // We only deal with pointers.
+  const PointerType *PTy = cast<PointerType>(V->getType());
+  return computeAllocationCountForType(V, PTy->getElementType());
+}
+
+const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const {
+  return computeAllocationCountForType(V, Type::Int8Ty);
+}
+
+// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too
+enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L,
+                                                   Predicate Pred,
+                                                   const SCEV *A,
+                                                   const SCEV *B) const {
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysTrue;
+
+  Pred = ICmpInst::getInversePredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, B, A))
+    return AlwaysFalse;
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  if (SE->isLoopGuardedByCond(L, Pred, A, B))
+    return AlwaysTrue;
+  return Unknown;
+}
+
+enum SolverResult PointerTracking::checkLimits(const SCEV *Offset,
+                                               const SCEV *Limit,
+                                               BasicBlock *BB)
+{
+  //FIXME: merge implementation
+  return Unknown;
+}
+
+void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base,
+                                       const SCEV *&Limit,
+                                       const SCEV *&Offset) const
+{
+    Pointer = Pointer->stripPointerCasts();
+    Base = Pointer->getUnderlyingObject();
+    Limit = getAllocationSizeInBytes(Base);
+    if (isa<SCEVCouldNotCompute>(Limit)) {
+      Base = 0;
+      Offset = Limit;
+      return;
+    }
+
+    Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base));
+    if (isa<SCEVCouldNotCompute>(Offset)) {
+      Base = 0;
+      Limit = Offset;
+    }
+}
+
+void PointerTracking::print(raw_ostream &OS, const Module* M) const {
+  // Calling some PT methods may cause caches to be updated, however
+  // this should be safe for the same reason its safe for SCEV.
+  PointerTracking &PT = *const_cast<PointerTracking*>(this);
+  for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
+    if (!isa<PointerType>(I->getType()))
+      continue;
+    Value *Base;
+    const SCEV *Limit, *Offset;
+    getPointerOffset(&*I, Base, Limit, Offset);
+    if (!Base)
+      continue;
+
+    if (Base == &*I) {
+      const SCEV *S = getAllocationElementCount(Base);
+      OS << *Base << " ==> " << *S << " elements, ";
+      OS << *Limit << " bytes allocated\n";
+      continue;
+    }
+    OS << &*I << " -- base: " << *Base;
+    OS << " offset: " << *Offset;
+
+    enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent());
+    switch (res) {
+    case AlwaysTrue:
+      OS << " always safe\n";
+      break;
+    case AlwaysFalse:
+      OS << " always unsafe\n";
+      break;
+    case Unknown:
+      OS << " <<unknown>>\n";
+      break;
+    }
+  }
+}
+
+void PointerTracking::print(std::ostream &o, const Module* M) const {
+  raw_os_ostream OS(o);
+  print(OS, M);
+}
+
+static RegisterPass<PointerTracking> X("pointertracking",
+                                       "Track pointer bounds", false, true);
+}
diff --git a/test/Analysis/PointerTracking/dg.exp b/test/Analysis/PointerTracking/dg.exp
new file mode 100644 (file)
index 0000000..f200589
--- /dev/null
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll
new file mode 100644 (file)
index 0000000..5da4dcc
--- /dev/null
@@ -0,0 +1,84 @@
+; RUN: llvm-as < %s | opt -pointertracking -analyze | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@.str = internal constant [5 x i8] c"1234\00"          ; <[5 x i8]*> [#uses=1]
+@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8            ; <i8**> [#uses=1]
+@test1a = global [5 x i8] c"1234\00", align 1          ; <[5 x i8]*> [#uses=1]
+@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4                ; <[5 x i32]*> [#uses=2]
+@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8                ; <i32**> [#uses=1]
+@test0p = common global i32* null, align 8             ; <i32**> [#uses=1]
+@test0i = common global i32 0, align 4         ; <i32*> [#uses=1]
+
+define i32 @foo0() nounwind {
+entry:
+       %tmp = load i32** @test0p               ; <i32*> [#uses=1]
+       %conv = bitcast i32* %tmp to i8*                ; <i8*> [#uses=1]
+       %call = tail call i32 @bar(i8* %conv) nounwind          ; <i32> [#uses=1]
+       %tmp1 = load i8** @test1p               ; <i8*> [#uses=1]
+       %call2 = tail call i32 @bar(i8* %tmp1) nounwind         ; <i32> [#uses=1]
+       %call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind               ; <i32> [#uses=1]
+       %call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind           ; <i32> [#uses=1]
+       %tmp7 = load i32** @test2p              ; <i32*> [#uses=1]
+       %conv8 = bitcast i32* %tmp7 to i8*              ; <i8*> [#uses=1]
+       %call9 = tail call i32 @bar(i8* %conv8) nounwind                ; <i32> [#uses=1]
+       %call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind                ; <i32> [#uses=1]
+       %add = add i32 %call2, %call            ; <i32> [#uses=1]
+       %add4 = add i32 %add, %call3            ; <i32> [#uses=1]
+       %add6 = add i32 %add4, %call5           ; <i32> [#uses=1]
+       %add10 = add i32 %add6, %call9          ; <i32> [#uses=1]
+       %add12 = add i32 %add10, %call11                ; <i32> [#uses=1]
+       ret i32 %add12
+}
+
+declare i32 @bar(i8*)
+
+define i32 @foo1(i32 %n) nounwind {
+entry:
+; CHECK: 'foo1':
+       %test4a = alloca [10 x i8], align 1             ; <[10 x i8]*> [#uses=1]
+; CHECK: %test4a =
+; CHECK: ==> 1 elements, 10 bytes allocated
+       %test6a = alloca [10 x i32], align 4            ; <[10 x i32]*> [#uses=1]
+; CHECK: %test6a =
+; CHECK: ==> 1 elements, 40 bytes allocated
+       %vla = alloca i8, i32 %n, align 1               ; <i8*> [#uses=1]
+; CHECK: %vla =
+; CHECK: ==> %n elements, %n bytes allocated
+       %0 = shl i32 %n, 2              ; <i32> [#uses=1]
+       %vla7 = alloca i8, i32 %0, align 1              ; <i8*> [#uses=1]
+; CHECK: %vla7 =
+; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated
+       %call = call i32 @bar(i8* %vla) nounwind                ; <i32> [#uses=1]
+       %arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0            ; <i8*> [#uses=1]
+       %call10 = call i32 @bar(i8* %arraydecay) nounwind               ; <i32> [#uses=1]
+       %call11 = call i32 @bar(i8* %vla7) nounwind             ; <i32> [#uses=1]
+       %ptrconv14 = bitcast [10 x i32]* %test6a to i8*         ; <i8*> [#uses=1]
+       %call15 = call i32 @bar(i8* %ptrconv14) nounwind                ; <i32> [#uses=1]
+       %add = add i32 %call10, %call           ; <i32> [#uses=1]
+       %add12 = add i32 %add, %call11          ; <i32> [#uses=1]
+       %add16 = add i32 %add12, %call15                ; <i32> [#uses=1]
+       ret i32 %add16
+}
+
+define i32 @foo2(i32 %n) nounwind {
+entry:
+       %call = malloc i8, i32 %n               ; <i8*> [#uses=1]
+; CHECK: %call =
+; CHECK: ==> %n elements, %n bytes allocated
+       %call2 = tail call i8* @calloc(i64 2, i64 4) nounwind           ; <i8*> [#uses=1]
+; CHECK: %call2 =
+; CHECK: ==> 8 elements, 8 bytes allocated
+       %call4 = tail call i8* @realloc(i8* null, i64 16) nounwind              ; <i8*> [#uses=1]
+; CHECK: %call4 =
+; CHECK: ==> 16 elements, 16 bytes allocated
+       %call6 = tail call i32 @bar(i8* %call) nounwind         ; <i32> [#uses=1]
+       %call8 = tail call i32 @bar(i8* %call2) nounwind                ; <i32> [#uses=1]
+       %call10 = tail call i32 @bar(i8* %call4) nounwind               ; <i32> [#uses=1]
+       %add = add i32 %call8, %call6           ; <i32> [#uses=1]
+       %add11 = add i32 %add, %call10          ; <i32> [#uses=1]
+       ret i32 %add11
+}
+
+declare noalias i8* @calloc(i64, i64) nounwind
+
+declare noalias i8* @realloc(i8* nocapture, i64) nounwind