lib/Analysis/CostModel.cpp

   1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the cost model analysis. It provides a very basic cost
  11 // estimation for LLVM-IR. This analysis uses the services of the codegen
  12 // to approximate the cost of any IR instruction when lowered to machine
  13 // instructions. The cost results are unit-less and the cost number represents
  14 // the throughput of the machine assuming that all loads hit the cache, all
  15 // branches are predicted, etc. The cost numbers can be added in order to
  16 // compare two or more transformation alternatives.
  17 //
  18 //===----------------------------------------------------------------------===//
  19
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/Analysis/Passes.h"
  22 #include "llvm/Analysis/TargetTransformInfo.h"
  23 #include "llvm/IR/Function.h"
  24 #include "llvm/IR/Instructions.h"
  25 #include "llvm/IR/IntrinsicInst.h"
  26 #include "llvm/IR/Value.h"
  27 #include "llvm/Pass.h"
  28 #include "llvm/Support/CommandLine.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 using namespace llvm;
  32
  33 #define CM_NAME "cost-model"
  34 #define DEBUG_TYPE CM_NAME
  35
  36 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
  37                                      cl::Hidden,
  38                                      cl::desc("Recognize reduction patterns."));
  39
  40 namespace {
  41   class CostModelAnalysis : public FunctionPass {
  42
  43   public:
  44     static char ID; // Class identification, replacement for typeinfo
  45     CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
  46       initializeCostModelAnalysisPass(
  47         *PassRegistry::getPassRegistry());
  48     }
  49
  50     /// Returns the expected cost of the instruction.
  51     /// Returns -1 if the cost is unknown.
  52     /// Note, this method does not cache the cost calculation and it
  53     /// can be expensive in some cases.
  54     unsigned getInstructionCost(const Instruction *I) const;
  55
  56   private:
  57     void getAnalysisUsage(AnalysisUsage &AU) const override;
  58     bool runOnFunction(Function &F) override;
  59     void print(raw_ostream &OS, const Module*) const override;
  60
  61     /// The function that we analyze.
  62     Function *F;
  63     /// Target information.
  64     const TargetTransformInfo *TTI;
  65   };
  66 }  // End of anonymous namespace
  67
  68 // Register this pass.
  69 char CostModelAnalysis::ID = 0;
  70 static const char cm_name[] = "Cost Model Analysis";
  71 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
  72 INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
  73
  74 FunctionPass *llvm::createCostModelAnalysisPass() {
  75   return new CostModelAnalysis();
  76 }
  77
  78 void
  79 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
  80   AU.setPreservesAll();
  81 }
  82
  83 bool
  84 CostModelAnalysis::runOnFunction(Function &F) {
  85  this->F = &F;
  86  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
  87  TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
  88
  89  return false;
  90 }
  91
  92 static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
  93   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
  94     if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
  95       return false;
  96   return true;
  97 }
  98
  99 static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
 100   bool isAlternate = true;
 101   unsigned MaskSize = Mask.size();
 102
 103   // Example: shufflevector A, B, <0,5,2,7>
 104   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
 105     if (Mask[i] < 0)
 106       continue;
 107     isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
 108   }
 109
 110   if (isAlternate)
 111     return true;
 112
 113   isAlternate = true;
 114   // Example: shufflevector A, B, <4,1,6,3>
 115   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
 116     if (Mask[i] < 0)
 117       continue;
 118     isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
 119   }
 120
 121   return isAlternate;
 122 }
 123
 124 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
 125   TargetTransformInfo::OperandValueKind OpInfo =
 126     TargetTransformInfo::OK_AnyValue;
 127
 128   // Check for a splat of a constant or for a non uniform vector of constants.
 129   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
 130     OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
 131     if (cast<Constant>(V)->getSplatValue() != nullptr)
 132       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
 133   }
 134
 135   return OpInfo;
 136 }
 137
 138 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
 139                                      unsigned Level) {
 140   // We don't need a shuffle if we just want to have element 0 in position 0 of
 141   // the vector.
 142   if (!SI && Level == 0 && IsLeft)
 143     return true;
 144   else if (!SI)
 145     return false;
 146
 147   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
 148
 149   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
 150   // we look at the left or right side.
 151   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
 152     Mask[i] = val;
 153
 154   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
 155   return Mask == ActualMask;
 156 }
 157
 158 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
 159                                           unsigned Level, unsigned NumLevels) {
 160   // Match one level of pairwise operations.
 161   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
 162   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
 163   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
 164   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
 165   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
 166   if (BinOp == nullptr)
 167     return false;
 168
 169   assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
 170
 171   unsigned Opcode = BinOp->getOpcode();
 172   Value *L = BinOp->getOperand(0);
 173   Value *R = BinOp->getOperand(1);
 174
 175   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
 176   if (!LS && Level)
 177     return false;
 178   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
 179   if (!RS && Level)
 180     return false;
 181
 182   // On level 0 we can omit one shufflevector instruction.
 183   if (!Level && !RS && !LS)
 184     return false;
 185
 186   // Shuffle inputs must match.
 187   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
 188   Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
 189   Value *NextLevelOp = nullptr;
 190   if (NextLevelOpR && NextLevelOpL) {
 191     // If we have two shuffles their operands must match.
 192     if (NextLevelOpL != NextLevelOpR)
 193       return false;
 194
 195     NextLevelOp = NextLevelOpL;
 196   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
 197     // On the first level we can omit the shufflevector <0, undef,...>. So the
 198     // input to the other shufflevector <1, undef> must match with one of the
 199     // inputs to the current binary operation.
 200     // Example:
 201     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
 202     //  %BinOp        = fadd          %NextLevelOpL, %R
 203     if (NextLevelOpL && NextLevelOpL != R)
 204       return false;
 205     else if (NextLevelOpR && NextLevelOpR != L)
 206       return false;
 207
 208     NextLevelOp = NextLevelOpL ? R : L;
 209   } else
 210     return false;
 211
 212   // Check that the next levels binary operation exists and matches with the
 213   // current one.
 214   BinaryOperator *NextLevelBinOp = nullptr;
 215   if (Level + 1 != NumLevels) {
 216     if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
 217       return false;
 218     else if (NextLevelBinOp->getOpcode() != Opcode)
 219       return false;
 220   }
 221
 222   // Shuffle mask for pairwise operation must match.
 223   if (matchPairwiseShuffleMask(LS, true, Level)) {
 224     if (!matchPairwiseShuffleMask(RS, false, Level))
 225       return false;
 226   } else if (matchPairwiseShuffleMask(RS, true, Level)) {
 227     if (!matchPairwiseShuffleMask(LS, false, Level))
 228       return false;
 229   } else
 230     return false;
 231
 232   if (++Level == NumLevels)
 233     return true;
 234
 235   // Match next level.
 236   return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
 237 }
 238
 239 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
 240                                    unsigned &Opcode, Type *&Ty) {
 241   if (!EnableReduxCost)
 242     return false;
 243
 244   // Need to extract the first element.
 245   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
 246   unsigned Idx = ~0u;
 247   if (CI)
 248     Idx = CI->getZExtValue();
 249   if (Idx != 0)
 250     return false;
 251
 252   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
 253   if (!RdxStart)
 254     return false;
 255
 256   Type *VecTy = ReduxRoot->getOperand(0)->getType();
 257   unsigned NumVecElems = VecTy->getVectorNumElements();
 258   if (!isPowerOf2_32(NumVecElems))
 259     return false;
 260
 261   // We look for a sequence of shuffle,shuffle,add triples like the following
 262   // that builds a pairwise reduction tree.
 263   //
 264   //  (X0, X1, X2, X3)
 265   //   (X0 + X1, X2 + X3, undef, undef)
 266   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
 267   //
 268   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
 269   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
 270   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
 271   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
 272   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
 273   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
 274   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
 275   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
 276   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 277   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
 278   // %r = extractelement <4 x float> %bin.rdx8, i32 0
 279   if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
 280     return false;
 281
 282   Opcode = RdxStart->getOpcode();
 283   Ty = VecTy;
 284
 285   return true;
 286 }
 287
 288 static std::pair<Value *, ShuffleVectorInst *>
 289 getShuffleAndOtherOprd(BinaryOperator *B) {
 290
 291   Value *L = B->getOperand(0);
 292   Value *R = B->getOperand(1);
 293   ShuffleVectorInst *S = nullptr;
 294
 295   if ((S = dyn_cast<ShuffleVectorInst>(L)))
 296     return std::make_pair(R, S);
 297
 298   S = dyn_cast<ShuffleVectorInst>(R);
 299   return std::make_pair(L, S);
 300 }
 301
 302 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
 303                                           unsigned &Opcode, Type *&Ty) {
 304   if (!EnableReduxCost)
 305     return false;
 306
 307   // Need to extract the first element.
 308   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
 309   unsigned Idx = ~0u;
 310   if (CI)
 311     Idx = CI->getZExtValue();
 312   if (Idx != 0)
 313     return false;
 314
 315   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
 316   if (!RdxStart)
 317     return false;
 318   unsigned RdxOpcode = RdxStart->getOpcode();
 319
 320   Type *VecTy = ReduxRoot->getOperand(0)->getType();
 321   unsigned NumVecElems = VecTy->getVectorNumElements();
 322   if (!isPowerOf2_32(NumVecElems))
 323     return false;
 324
 325   // We look for a sequence of shuffles and adds like the following matching one
 326   // fadd, shuffle vector pair at a time.
 327   //
 328   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
 329   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 330   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
 331   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
 332   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 333   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
 334   // %r = extractelement <4 x float> %bin.rdx8, i32 0
 335
 336   unsigned MaskStart = 1;
 337   Value *RdxOp = RdxStart;
 338   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
 339   unsigned NumVecElemsRemain = NumVecElems;
 340   while (NumVecElemsRemain - 1) {
 341     // Check for the right reduction operation.
 342     BinaryOperator *BinOp;
 343     if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
 344       return false;
 345     if (BinOp->getOpcode() != RdxOpcode)
 346       return false;
 347
 348     Value *NextRdxOp;
 349     ShuffleVectorInst *Shuffle;
 350     std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
 351
 352     // Check the current reduction operation and the shuffle use the same value.
 353     if (Shuffle == nullptr)
 354       return false;
 355     if (Shuffle->getOperand(0) != NextRdxOp)
 356       return false;
 357
 358     // Check that shuffle masks matches.
 359     for (unsigned j = 0; j != MaskStart; ++j)
 360       ShuffleMask[j] = MaskStart + j;
 361     // Fill the rest of the mask with -1 for undef.
 362     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
 363
 364     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
 365     if (ShuffleMask != Mask)
 366       return false;
 367
 368     RdxOp = NextRdxOp;
 369     NumVecElemsRemain /= 2;
 370     MaskStart *= 2;
 371   }
 372
 373   Opcode = RdxOpcode;
 374   Ty = VecTy;
 375   return true;
 376 }
 377
 378 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
 379   if (!TTI)
 380     return -1;
 381
 382   switch (I->getOpcode()) {
 383   case Instruction::GetElementPtr:
 384     return TTI->getUserCost(I);
 385
 386   case Instruction::Ret:
 387   case Instruction::PHI:
 388   case Instruction::Br: {
 389     return TTI->getCFInstrCost(I->getOpcode());
 390   }
 391   case Instruction::Add:
 392   case Instruction::FAdd:
 393   case Instruction::Sub:
 394   case Instruction::FSub:
 395   case Instruction::Mul:
 396   case Instruction::FMul:
 397   case Instruction::UDiv:
 398   case Instruction::SDiv:
 399   case Instruction::FDiv:
 400   case Instruction::URem:
 401   case Instruction::SRem:
 402   case Instruction::FRem:
 403   case Instruction::Shl:
 404   case Instruction::LShr:
 405   case Instruction::AShr:
 406   case Instruction::And:
 407   case Instruction::Or:
 408   case Instruction::Xor: {
 409     TargetTransformInfo::OperandValueKind Op1VK =
 410       getOperandInfo(I->getOperand(0));
 411     TargetTransformInfo::OperandValueKind Op2VK =
 412       getOperandInfo(I->getOperand(1));
 413     return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
 414                                        Op2VK);
 415   }
 416   case Instruction::Select: {
 417     const SelectInst *SI = cast<SelectInst>(I);
 418     Type *CondTy = SI->getCondition()->getType();
 419     return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
 420   }
 421   case Instruction::ICmp:
 422   case Instruction::FCmp: {
 423     Type *ValTy = I->getOperand(0)->getType();
 424     return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
 425   }
 426   case Instruction::Store: {
 427     const StoreInst *SI = cast<StoreInst>(I);
 428     Type *ValTy = SI->getValueOperand()->getType();
 429     return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
 430                                  SI->getAlignment(),
 431                                  SI->getPointerAddressSpace());
 432   }
 433   case Instruction::Load: {
 434     const LoadInst *LI = cast<LoadInst>(I);
 435     return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
 436                                  LI->getAlignment(),
 437                                  LI->getPointerAddressSpace());
 438   }
 439   case Instruction::ZExt:
 440   case Instruction::SExt:
 441   case Instruction::FPToUI:
 442   case Instruction::FPToSI:
 443   case Instruction::FPExt:
 444   case Instruction::PtrToInt:
 445   case Instruction::IntToPtr:
 446   case Instruction::SIToFP:
 447   case Instruction::UIToFP:
 448   case Instruction::Trunc:
 449   case Instruction::FPTrunc:
 450   case Instruction::BitCast:
 451   case Instruction::AddrSpaceCast: {
 452     Type *SrcTy = I->getOperand(0)->getType();
 453     return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
 454   }
 455   case Instruction::ExtractElement: {
 456     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
 457     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
 458     unsigned Idx = -1;
 459     if (CI)
 460       Idx = CI->getZExtValue();
 461
 462     // Try to match a reduction sequence (series of shufflevector and vector
 463     // adds followed by a extractelement).
 464     unsigned ReduxOpCode;
 465     Type *ReduxType;
 466
 467     if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
 468       return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
 469     else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
 470       return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
 471
 472     return TTI->getVectorInstrCost(I->getOpcode(),
 473                                    EEI->getOperand(0)->getType(), Idx);
 474   }
 475   case Instruction::InsertElement: {
 476     const InsertElementInst * IE = cast<InsertElementInst>(I);
 477     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
 478     unsigned Idx = -1;
 479     if (CI)
 480       Idx = CI->getZExtValue();
 481     return TTI->getVectorInstrCost(I->getOpcode(),
 482                                    IE->getType(), Idx);
 483   }
 484   case Instruction::ShuffleVector: {
 485     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
 486     Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
 487     unsigned NumVecElems = VecTypOp0->getVectorNumElements();
 488     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
 489
 490     if (NumVecElems == Mask.size()) {
 491       if (isReverseVectorMask(Mask))
 492         return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
 493                                    0, nullptr);
 494       if (isAlternateVectorMask(Mask))
 495         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
 496                                    VecTypOp0, 0, nullptr);
 497     }
 498
 499     return -1;
 500   }
 501   case Instruction::Call:
 502     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
 503       SmallVector<Value *, 4> Args;
 504       for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
 505         Args.push_back(II->getArgOperand(J));
 506
 507       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
 508                                         Args);
 509     }
 510     return -1;
 511   default:
 512     // We don't have any information on this instruction.
 513     return -1;
 514   }
 515 }
 516
 517 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
 518   if (!F)
 519     return;
 520
 521   for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
 522     for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
 523       Instruction *Inst = &*it;
 524       unsigned Cost = getInstructionCost(Inst);
 525       if (Cost != (unsigned)-1)
 526         OS << "Cost Model: Found an estimated cost of " << Cost;
 527       else
 528         OS << "Cost Model: Unknown cost";
 529
 530       OS << " for instruction: "<< *Inst << "\n";
 531     }
 532   }
 533 }