#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-static bool isDereferenceablePointer(const Value *V,
+static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
SmallPtrSet<const Value *, 32> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- // It's also not always safe to follow a bitcast, for example:
- // bitcast i8* (alloca i8) to i32*
- // would result in a 4-byte load from a 1-byte alloca. Some cases could
- // be handled using DataLayout to check sizes and alignments though.
// These are obviously ok.
if (isa<AllocaInst>(V)) return true;
+ // It's not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. However,
+ // if we're casting from a pointer from a type of larger size
+ // to a type of smaller size (or the same size), and the alignment
+ // is at least as large as for the resulting pointer type, then
+ // we can look through the bitcast.
+ if (DL)
+ if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) {
+ Type *STy = BC->getSrcTy()->getPointerElementType(),
+ *DTy = BC->getDestTy()->getPointerElementType();
+ if ((DL->getTypeStoreSize(STy) >=
+ DL->getTypeStoreSize(DTy)) &&
+ (DL->getABITypeAlignment(STy) >=
+ DL->getABITypeAlignment(DTy)))
+ return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
+ }
+
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return !GV->hasExternalWeakLinkage();
// Conservatively require that the base pointer be fully dereferenceable.
if (!Visited.insert(GEP->getOperand(0)))
return false;
- if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
+ if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
return false;
// Check the indices.
gep_type_iterator GTI = gep_type_begin(GEP);
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
+bool Value::isDereferenceablePointer(const DataLayout *DL) const {
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceablePointer(this, Visited);
+ return ::isDereferenceablePointer(this, DL, Visited);
}
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
+ const DataLayout *DL;
private:
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
+ const DataLayout *DL) {
Function *Callee = Arg->getParent();
unsigned ArgNo = Arg->getArgNo();
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
return false;
}
return true;
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
--- /dev/null
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure the basic alloca pointer hoisting works:
+; CHECK-LABEL: @test1
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %c = alloca i32
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type:
+; CHECK-LABEL: @test2
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i64
+ %c = bitcast i64* %ca to i32*
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type (where the bitcast also needs to be hoisted):
+; CHECK-LABEL: @test3
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i64
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %c = bitcast i64* %ca to i32*
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting does not happen through a bitcast
+; to a pointer to a larger type:
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+; Function Attrs: nounwind uwtable
+define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i16
+ %c = bitcast i16* %ca to i32*
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+attributes #0 = { nounwind uwtable }
+