Add a cost model analysis that allows us to estimate the cost of IR-level instructions.
authorNadav Rotem <nrotem@apple.com>
Fri, 2 Nov 2012 21:48:17 +0000 (21:48 +0000)
committerNadav Rotem <nrotem@apple.com>
Fri, 2 Nov 2012 21:48:17 +0000 (21:48 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167324 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Analysis/Passes.h
include/llvm/InitializePasses.h
include/llvm/LinkAllPasses.h
lib/Analysis/Analysis.cpp
lib/Analysis/CMakeLists.txt
lib/Analysis/CostModel.cpp [new file with mode: 0644]
test/Analysis/CostModel/X86/lit.local.cfg [new file with mode: 0644]
test/Analysis/CostModel/X86/tiny.ll [new file with mode: 0644]
test/Analysis/CostModel/X86/vectorized-loop.ll [new file with mode: 0644]
test/Analysis/CostModel/lit.local.cfg [new file with mode: 0644]
test/Analysis/CostModel/no_info.ll [new file with mode: 0644]

index ff1008b8fdf387cb6b9b8e9551e22021893521d1..27726f49bccea52abce92203297a17ce2e9359e8 100644 (file)
@@ -185,6 +185,13 @@ namespace llvm {
   //
   FunctionPass *createDependenceAnalysisPass();
 
+  //===--------------------------------------------------------------------===//
+  //
+  // createCostModelAnalysisPass - This creates an instance of the
+  // CostModelAnalysis pass.
+  //
+  FunctionPass *createCostModelAnalysisPass();
+
   //===--------------------------------------------------------------------===//
   //
   // Minor pass prototypes, allowing us to expose them through bugpoint and
index 9b8a6bdf5453c551983ddb4af4776713c06e01a6..8c164eb919848c30dc83470f47276b7248403bc7 100644 (file)
@@ -88,6 +88,7 @@ void initializeCodePlacementOptPass(PassRegistry&);
 void initializeConstantMergePass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
 void initializeMachineCopyPropagationPass(PassRegistry&);
+void initializeCostModelAnalysisPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
 void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
index a6d2f29168481f0f3f25d5887854251b1fa045dd..806e4b37b73db6f44e1ca5afe26498eb1ee08bd2 100644 (file)
@@ -60,6 +60,7 @@ namespace {
       (void) llvm::createCFGSimplificationPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
+      (void) llvm::createCostModelAnalysisPass();
       (void) llvm::createDeadArgEliminationPass();
       (void) llvm::createDeadCodeEliminationPass();
       (void) llvm::createDeadInstEliminationPass();
index 8969ea9563c712c803b5b24f6dc561819a91c950..9dc81a6a630f016e903f0339c8e06322ee93149d 100644 (file)
@@ -26,6 +26,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeBasicAliasAnalysisPass(Registry);
   initializeBlockFrequencyInfoPass(Registry);
   initializeBranchProbabilityInfoPass(Registry);
+  initializeCostModelAnalysisPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
   initializeCFGOnlyViewerPass(Registry);
index 8f5a2f2893fd5c021236513d22f418a8323d780c..b3a40bee4211a5c5084005e43da8d6e25111587a 100644 (file)
@@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
+  CostModel.cpp
   CodeMetrics.cpp
   ConstantFolding.cpp
   DbgInfoPrinter.cpp
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
new file mode 100644 (file)
index 0000000..142d287
--- /dev/null
@@ -0,0 +1,175 @@
+//===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the cost model analysis. It provides a very basic cost
+// estimation for LLVM-IR. The cost result can be thought of as cycles, but it
+// is really unit-less. The estimated cost is ment to be used for comparing
+// alternatives.
+//
+//===----------------------------------------------------------------------===//
+
+#define CM_NAME "cost-model"
+#define DEBUG_TYPE CM_NAME
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class CostModelAnalysis : public FunctionPass {
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) {
+      initializeCostModelAnalysisPass(
+        *PassRegistry::getPassRegistry());
+    }
+
+    /// Returns the expected cost of the instruction.
+    /// Returns -1 if the cost is unknown.
+    /// Note, this method does not cache the cost calculation and it
+    /// can be expensive in some cases.
+    unsigned getInstructionCost(Instruction *I) const;
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+    virtual void print(raw_ostream &OS, const Module*) const;
+
+    /// The function that we analyze.
+    Function *F;
+    /// Vector target information.
+    const VectorTargetTransformInfo *VTTI;
+  };
+}  // End of anonymous namespace
+
+// Register this pass.
+char CostModelAnalysis::ID = 0;
+static const char cm_name[] = "Cost Model Analysis";
+INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
+INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
+
+FunctionPass *llvm::createCostModelAnalysisPass() {
+  return new CostModelAnalysis();
+}
+
+void
+CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
+
+bool
+CostModelAnalysis::runOnFunction(Function &F) {
+ this->F = &F;
+
+ // Target information.
+ TargetTransformInfo *TTI;
+ TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ if (TTI)
+   VTTI = TTI->getVectorTargetTransformInfo();
+
+ return false;
+}
+
+unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
+  if (!VTTI)
+    return -1;
+
+  switch (I->getOpcode()) {
+  case Instruction::Ret:
+  case Instruction::PHI:
+  case Instruction::Br: {
+    return VTTI->getCFInstrCost(I->getOpcode());
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
+  }
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    Type *CondTy = SI->getCondition()->getType();
+    return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+  }
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    Type *ValTy = I->getOperand(0)->getType();
+    return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+  }
+  case Instruction::Store: {
+    StoreInst *SI = cast<StoreInst>(I);
+    Type *ValTy = SI->getValueOperand()->getType();
+    return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                                 SI->getAlignment(),
+                                 SI->getPointerAddressSpace());
+  }
+  case Instruction::Load: {
+    LoadInst *LI = cast<LoadInst>(I);
+    return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
+                                 LI->getAlignment(),
+                                 LI->getPointerAddressSpace());
+  }
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    Type *SrcTy = I->getOperand(0)->getType();
+    return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+  }
+  default:
+    // We don't have any information on this instruction.
+    return -1;
+  }
+}
+
+void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
+  if (!F)
+    return;
+
+  for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
+    for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
+      Instruction *Inst = it;
+      unsigned Cost = getInstructionCost(Inst);
+      if (Cost != (unsigned)-1)
+        OS << "Cost Model: Found an estimated cost of " << Cost;
+      else
+        OS << "Cost Model: Unknown cost";
+
+      OS << " for instruction: "<< *Inst << "\n";
+    }
+  }
+}
diff --git a/test/Analysis/CostModel/X86/lit.local.cfg b/test/Analysis/CostModel/X86/lit.local.cfg
new file mode 100644 (file)
index 0000000..a8ad0f1
--- /dev/null
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/X86/tiny.ll b/test/Analysis/CostModel/X86/tiny.ll
new file mode 100644 (file)
index 0000000..cc7b443
--- /dev/null
@@ -0,0 +1,11 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: cost of 1 {{.*}} add
+;CHECK: cost of 1 {{.*}} ret
+define i32 @no_info(i32 %arg) {
+  %e = add i32 %arg, %arg
+  ret i32 %e
+}
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
new file mode 100644 (file)
index 0000000..fbf20de
--- /dev/null
@@ -0,0 +1,76 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @foo(i32* noalias nocapture %A, i32* noalias nocapture %B, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  ;CHECK: cost of 1 {{.*}} icmp
+  %cmp7 = icmp slt i32 %start, %end
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  ;CHECK: cost of 1 {{.*}} sext
+  %0 = sext i32 %start to i64
+  %1 = sub i32 %end, %start
+  %2 = zext i32 %1 to i64
+  %end.idx = add i64 %2, %0
+  ;CHECK: cost of 1 {{.*}} add
+  %n.vec = and i64 %2, 4294967288
+  %end.idx.rnd.down = add i64 %n.vec, %0
+  ;CHECK: cost of 1 {{.*}} icmp
+  %cmp.zero = icmp eq i64 %n.vec, 0
+  br i1 %cmp.zero, label %middle.block, label %vector.body
+
+vector.body:                                      ; preds = %for.body.lr.ph, %vector.body
+  %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body.lr.ph ]
+  %3 = add i64 %index, 2
+  %4 = getelementptr inbounds i32* %B, i64 %3
+  ;CHECK: cost of 0 {{.*}} bitcast
+  %5 = bitcast i32* %4 to <8 x i32>*
+  ;CHECK: cost of 1 {{.*}} load
+  %6 = load <8 x i32>* %5, align 4
+  %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  %8 = getelementptr inbounds i32* %A, i64 %index
+  %9 = bitcast i32* %8 to <8 x i32>*
+  %10 = load <8 x i32>* %9, align 4
+  %11 = add nsw <8 x i32> %10, %7
+  ;CHECK: cost of 1 {{.*}} store
+  store <8 x i32> %11, <8 x i32>* %9, align 4
+  %index.next = add i64 %index, 8
+  %12 = icmp eq i64 %index.next, %end.idx.rnd.down
+  ;CHECK: cost of 1 {{.*}} br
+  br i1 %12, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body, %for.body.lr.ph
+  %cmp.n = icmp eq i64 %end.idx, %end.idx.rnd.down
+  br i1 %cmp.n, label %for.end, label %for.body
+
+for.body:                                         ; preds = %middle.block, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %end.idx.rnd.down, %middle.block ]
+  %13 = add nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %B, i64 %13
+  ;CHECK: cost of 1 {{.*}} load
+  %14 = load i32* %arrayidx, align 4, !tbaa !0
+  ;CHECK: cost of 1 {{.*}} mul
+  %mul = mul nsw i32 %14, 5
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  ;CHECK: cost of 1 {{.*}} load
+  %15 = load i32* %arrayidx2, align 4, !tbaa !0
+  %add3 = add nsw i32 %15, %mul
+  store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  ;CHECK: cost of 0 {{.*}} trunc
+  %16 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %16, %end
+  ;CHECK: cost of 1 {{.*}} br
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %middle.block, %for.body, %entry
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/CostModel/lit.local.cfg b/test/Analysis/CostModel/lit.local.cfg
new file mode 100644 (file)
index 0000000..19eebc0
--- /dev/null
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/CostModel/no_info.ll b/test/Analysis/CostModel/no_info.ll
new file mode 100644 (file)
index 0000000..d20d56b
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: opt < %s -cost-model -analyze | FileCheck %s
+
+; The cost model does not have any target information so it can't make a decision.
+; Notice that OPT does not read the triple information from the module itself, only through the command line.
+
+; This info ignored:
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: Unknown cost {{.*}} add
+;CHECK: Unknown cost {{.*}} ret
+define i32 @no_info(i32 %arg) {
+  %e = add i32 %arg, %arg
+  ret i32 %e
+}