lib/Analysis/CostModel.cpp

   1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the cost model analysis. It provides a very basic cost
  11 // estimation for LLVM-IR. This analysis uses the services of the codegen
  12 // to approximate the cost of any IR instruction when lowered to machine
  13 // instructions. The cost results are unit-less and the cost number represents
  14 // the throughput of the machine assuming that all loads hit the cache, all
  15 // branches are predicted, etc. The cost numbers can be added in order to
  16 // compare two or more transformation alternatives.
  17 //
  18 //===----------------------------------------------------------------------===//
  19
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/Analysis/Passes.h"
  22 #include "llvm/Analysis/TargetTransformInfo.h"
  23 #include "llvm/IR/Function.h"
  24 #include "llvm/IR/Instructions.h"
  25 #include "llvm/IR/IntrinsicInst.h"
  26 #include "llvm/IR/Value.h"
  27 #include "llvm/Pass.h"
  28 #include "llvm/Support/CommandLine.h"
  29 #include "llvm/Support/Debug.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 using namespace llvm;
  32
  33 #define CM_NAME "cost-model"
  34 #define DEBUG_TYPE CM_NAME
  35
  36 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
  37                                      cl::Hidden,
  38                                      cl::desc("Recognize reduction patterns."));
  39
  40 namespace {
  41   class CostModelAnalysis : public FunctionPass {
  42
  43   public:
  44     static char ID; // Class identification, replacement for typeinfo
  45     CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
  46       initializeCostModelAnalysisPass(
  47         *PassRegistry::getPassRegistry());
  48     }
  49
  50     /// Returns the expected cost of the instruction.
  51     /// Returns -1 if the cost is unknown.
  52     /// Note, this method does not cache the cost calculation and it
  53     /// can be expensive in some cases.
  54     unsigned getInstructionCost(const Instruction *I) const;
  55
  56   private:
  57     void getAnalysisUsage(AnalysisUsage &AU) const override;
  58     bool runOnFunction(Function &F) override;
  59     void print(raw_ostream &OS, const Module*) const override;
  60
  61     /// The function that we analyze.
  62     Function *F;
  63     /// Target information.
  64     const TargetTransformInfo *TTI;
  65   };
  66 }  // End of anonymous namespace
  67
  68 // Register this pass.
  69 char CostModelAnalysis::ID = 0;
  70 static const char cm_name[] = "Cost Model Analysis";
  71 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
  72 INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
  73
  74 FunctionPass *llvm::createCostModelAnalysisPass() {
  75   return new CostModelAnalysis();
  76 }
  77
  78 void
  79 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
  80   AU.setPreservesAll();
  81 }
  82
  83 bool
  84 CostModelAnalysis::runOnFunction(Function &F) {
  85  this->F = &F;
  86  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
  87  TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
  88
  89  return false;
  90 }
  91
  92 static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
  93   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
  94     if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
  95       return false;
  96   return true;
  97 }
  98
  99 static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
 100   bool isAlternate = true;
 101   unsigned MaskSize = Mask.size();
 102
 103   // Example: shufflevector A, B, <0,5,2,7>
 104   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
 105     if (Mask[i] < 0)
 106       continue;
 107     isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
 108   }
 109
 110   if (isAlternate)
 111     return true;
 112
 113   isAlternate = true;
 114   // Example: shufflevector A, B, <4,1,6,3>
 115   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
 116     if (Mask[i] < 0)
 117       continue;
 118     isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
 119   }
 120
 121   return isAlternate;
 122 }
 123
 124 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
 125   TargetTransformInfo::OperandValueKind OpInfo =
 126     TargetTransformInfo::OK_AnyValue;
 127
 128   // Check for a splat of a constant or for a non uniform vector of constants.
 129   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
 130     OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
 131     if (cast<Constant>(V)->getSplatValue() != nullptr)
 132       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
 133   }
 134
 135   return OpInfo;
 136 }
 137
 138 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
 139                                      unsigned Level) {
 140   // We don't need a shuffle if we just want to have element 0 in position 0 of
 141   // the vector.
 142   if (!SI && Level == 0 && IsLeft)
 143     return true;
 144   else if (!SI)
 145     return false;
 146
 147   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
 148
 149   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
 150   // we look at the left or right side.
 151   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
 152     Mask[i] = val;
 153
 154   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
 155   if (Mask != ActualMask)
 156     return false;
 157
 158   return true;
 159 }
 160
 161 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
 162                                           unsigned Level, unsigned NumLevels) {
 163   // Match one level of pairwise operations.
 164   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
 165   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
 166   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
 167   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
 168   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
 169   if (BinOp == nullptr)
 170     return false;
 171
 172   assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
 173
 174   unsigned Opcode = BinOp->getOpcode();
 175   Value *L = BinOp->getOperand(0);
 176   Value *R = BinOp->getOperand(1);
 177
 178   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
 179   if (!LS && Level)
 180     return false;
 181   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
 182   if (!RS && Level)
 183     return false;
 184
 185   // On level 0 we can omit one shufflevector instruction.
 186   if (!Level && !RS && !LS)
 187     return false;
 188
 189   // Shuffle inputs must match.
 190   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
 191   Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
 192   Value *NextLevelOp = nullptr;
 193   if (NextLevelOpR && NextLevelOpL) {
 194     // If we have two shuffles their operands must match.
 195     if (NextLevelOpL != NextLevelOpR)
 196       return false;
 197
 198     NextLevelOp = NextLevelOpL;
 199   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
 200     // On the first level we can omit the shufflevector <0, undef,...>. So the
 201     // input to the other shufflevector <1, undef> must match with one of the
 202     // inputs to the current binary operation.
 203     // Example:
 204     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
 205     //  %BinOp        = fadd          %NextLevelOpL, %R
 206     if (NextLevelOpL && NextLevelOpL != R)
 207       return false;
 208     else if (NextLevelOpR && NextLevelOpR != L)
 209       return false;
 210
 211     NextLevelOp = NextLevelOpL ? R : L;
 212   } else
 213     return false;
 214
 215   // Check that the next levels binary operation exists and matches with the
 216   // current one.
 217   BinaryOperator *NextLevelBinOp = nullptr;
 218   if (Level + 1 != NumLevels) {
 219     if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
 220       return false;
 221     else if (NextLevelBinOp->getOpcode() != Opcode)
 222       return false;
 223   }
 224
 225   // Shuffle mask for pairwise operation must match.
 226   if (matchPairwiseShuffleMask(LS, true, Level)) {
 227     if (!matchPairwiseShuffleMask(RS, false, Level))
 228       return false;
 229   } else if (matchPairwiseShuffleMask(RS, true, Level)) {
 230     if (!matchPairwiseShuffleMask(LS, false, Level))
 231       return false;
 232   } else
 233     return false;
 234
 235   if (++Level == NumLevels)
 236     return true;
 237
 238   // Match next level.
 239   return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
 240 }
 241
 242 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
 243                                    unsigned &Opcode, Type *&Ty) {
 244   if (!EnableReduxCost)
 245     return false;
 246
 247   // Need to extract the first element.
 248   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
 249   unsigned Idx = ~0u;
 250   if (CI)
 251     Idx = CI->getZExtValue();
 252   if (Idx != 0)
 253     return false;
 254
 255   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
 256   if (!RdxStart)
 257     return false;
 258
 259   Type *VecTy = ReduxRoot->getOperand(0)->getType();
 260   unsigned NumVecElems = VecTy->getVectorNumElements();
 261   if (!isPowerOf2_32(NumVecElems))
 262     return false;
 263
 264   // We look for a sequence of shuffle,shuffle,add triples like the following
 265   // that builds a pairwise reduction tree.
 266   //
 267   //  (X0, X1, X2, X3)
 268   //   (X0 + X1, X2 + X3, undef, undef)
 269   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
 270   //
 271   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
 272   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
 273   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
 274   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
 275   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
 276   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
 277   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
 278   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
 279   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 280   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
 281   // %r = extractelement <4 x float> %bin.rdx8, i32 0
 282   if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
 283     return false;
 284
 285   Opcode = RdxStart->getOpcode();
 286   Ty = VecTy;
 287
 288   return true;
 289 }
 290
 291 static std::pair<Value *, ShuffleVectorInst *>
 292 getShuffleAndOtherOprd(BinaryOperator *B) {
 293
 294   Value *L = B->getOperand(0);
 295   Value *R = B->getOperand(1);
 296   ShuffleVectorInst *S = nullptr;
 297
 298   if ((S = dyn_cast<ShuffleVectorInst>(L)))
 299     return std::make_pair(R, S);
 300
 301   S = dyn_cast<ShuffleVectorInst>(R);
 302   return std::make_pair(L, S);
 303 }
 304
 305 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
 306                                           unsigned &Opcode, Type *&Ty) {
 307   if (!EnableReduxCost)
 308     return false;
 309
 310   // Need to extract the first element.
 311   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
 312   unsigned Idx = ~0u;
 313   if (CI)
 314     Idx = CI->getZExtValue();
 315   if (Idx != 0)
 316     return false;
 317
 318   BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
 319   if (!RdxStart)
 320     return false;
 321   unsigned RdxOpcode = RdxStart->getOpcode();
 322
 323   Type *VecTy = ReduxRoot->getOperand(0)->getType();
 324   unsigned NumVecElems = VecTy->getVectorNumElements();
 325   if (!isPowerOf2_32(NumVecElems))
 326     return false;
 327
 328   // We look for a sequence of shuffles and adds like the following matching one
 329   // fadd, shuffle vector pair at a time.
 330   //
 331   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
 332   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 333   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
 334   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
 335   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 336   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
 337   // %r = extractelement <4 x float> %bin.rdx8, i32 0
 338
 339   unsigned MaskStart = 1;
 340   Value *RdxOp = RdxStart;
 341   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
 342   unsigned NumVecElemsRemain = NumVecElems;
 343   while (NumVecElemsRemain - 1) {
 344     // Check for the right reduction operation.
 345     BinaryOperator *BinOp;
 346     if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
 347       return false;
 348     if (BinOp->getOpcode() != RdxOpcode)
 349       return false;
 350
 351     Value *NextRdxOp;
 352     ShuffleVectorInst *Shuffle;
 353     std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
 354
 355     // Check the current reduction operation and the shuffle use the same value.
 356     if (Shuffle == nullptr)
 357       return false;
 358     if (Shuffle->getOperand(0) != NextRdxOp)
 359       return false;
 360
 361     // Check that shuffle masks matches.
 362     for (unsigned j = 0; j != MaskStart; ++j)
 363       ShuffleMask[j] = MaskStart + j;
 364     // Fill the rest of the mask with -1 for undef.
 365     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
 366
 367     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
 368     if (ShuffleMask != Mask)
 369       return false;
 370
 371     RdxOp = NextRdxOp;
 372     NumVecElemsRemain /= 2;
 373     MaskStart *= 2;
 374   }
 375
 376   Opcode = RdxOpcode;
 377   Ty = VecTy;
 378   return true;
 379 }
 380
 381 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
 382   if (!TTI)
 383     return -1;
 384
 385   switch (I->getOpcode()) {
 386   case Instruction::GetElementPtr:
 387     return TTI->getUserCost(I);
 388
 389   case Instruction::Ret:
 390   case Instruction::PHI:
 391   case Instruction::Br: {
 392     return TTI->getCFInstrCost(I->getOpcode());
 393   }
 394   case Instruction::Add:
 395   case Instruction::FAdd:
 396   case Instruction::Sub:
 397   case Instruction::FSub:
 398   case Instruction::Mul:
 399   case Instruction::FMul:
 400   case Instruction::UDiv:
 401   case Instruction::SDiv:
 402   case Instruction::FDiv:
 403   case Instruction::URem:
 404   case Instruction::SRem:
 405   case Instruction::FRem:
 406   case Instruction::Shl:
 407   case Instruction::LShr:
 408   case Instruction::AShr:
 409   case Instruction::And:
 410   case Instruction::Or:
 411   case Instruction::Xor: {
 412     TargetTransformInfo::OperandValueKind Op1VK =
 413       getOperandInfo(I->getOperand(0));
 414     TargetTransformInfo::OperandValueKind Op2VK =
 415       getOperandInfo(I->getOperand(1));
 416     return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
 417                                        Op2VK);
 418   }
 419   case Instruction::Select: {
 420     const SelectInst *SI = cast<SelectInst>(I);
 421     Type *CondTy = SI->getCondition()->getType();
 422     return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
 423   }
 424   case Instruction::ICmp:
 425   case Instruction::FCmp: {
 426     Type *ValTy = I->getOperand(0)->getType();
 427     return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
 428   }
 429   case Instruction::Store: {
 430     const StoreInst *SI = cast<StoreInst>(I);
 431     Type *ValTy = SI->getValueOperand()->getType();
 432     return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
 433                                  SI->getAlignment(),
 434                                  SI->getPointerAddressSpace());
 435   }
 436   case Instruction::Load: {
 437     const LoadInst *LI = cast<LoadInst>(I);
 438     return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
 439                                  LI->getAlignment(),
 440                                  LI->getPointerAddressSpace());
 441   }
 442   case Instruction::ZExt:
 443   case Instruction::SExt:
 444   case Instruction::FPToUI:
 445   case Instruction::FPToSI:
 446   case Instruction::FPExt:
 447   case Instruction::PtrToInt:
 448   case Instruction::IntToPtr:
 449   case Instruction::SIToFP:
 450   case Instruction::UIToFP:
 451   case Instruction::Trunc:
 452   case Instruction::FPTrunc:
 453   case Instruction::BitCast:
 454   case Instruction::AddrSpaceCast: {
 455     Type *SrcTy = I->getOperand(0)->getType();
 456     return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
 457   }
 458   case Instruction::ExtractElement: {
 459     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
 460     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
 461     unsigned Idx = -1;
 462     if (CI)
 463       Idx = CI->getZExtValue();
 464
 465     // Try to match a reduction sequence (series of shufflevector and vector
 466     // adds followed by a extractelement).
 467     unsigned ReduxOpCode;
 468     Type *ReduxType;
 469
 470     if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
 471       return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
 472     else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
 473       return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
 474
 475     return TTI->getVectorInstrCost(I->getOpcode(),
 476                                    EEI->getOperand(0)->getType(), Idx);
 477   }
 478   case Instruction::InsertElement: {
 479     const InsertElementInst * IE = cast<InsertElementInst>(I);
 480     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
 481     unsigned Idx = -1;
 482     if (CI)
 483       Idx = CI->getZExtValue();
 484     return TTI->getVectorInstrCost(I->getOpcode(),
 485                                    IE->getType(), Idx);
 486   }
 487   case Instruction::ShuffleVector: {
 488     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
 489     Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
 490     unsigned NumVecElems = VecTypOp0->getVectorNumElements();
 491     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
 492
 493     if (NumVecElems == Mask.size()) {
 494       if (isReverseVectorMask(Mask))
 495         return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
 496                                    0, nullptr);
 497       if (isAlternateVectorMask(Mask))
 498         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
 499                                    VecTypOp0, 0, nullptr);
 500     }
 501
 502     return -1;
 503   }
 504   case Instruction::Call:
 505     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
 506       SmallVector<Type*, 4> Tys;
 507       for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
 508         Tys.push_back(II->getArgOperand(J)->getType());
 509
 510       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
 511                                         Tys);
 512     }
 513     return -1;
 514   default:
 515     // We don't have any information on this instruction.
 516     return -1;
 517   }
 518 }
 519
 520 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
 521   if (!F)
 522     return;
 523
 524   for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
 525     for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
 526       Instruction *Inst = it;
 527       unsigned Cost = getInstructionCost(Inst);
 528       if (Cost != (unsigned)-1)
 529         OS << "Cost Model: Found an estimated cost of " << Cost;
 530       else
 531         OS << "Cost Model: Unknown cost";
 532
 533       OS << " for instruction: "<< *Inst << "\n";
 534     }
 535   }
 536 }