lib/Target/PowerPC/PPCTargetTransformInfo.cpp

   1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 /// This file implements a TargetTransformInfo analysis pass specific to the
  11 /// PPC target machine. It uses the target's detailed information to provide
  12 /// more precise answers to certain TTI queries, while letting the target
  13 /// independent and default TTI implementations handle the rest.
  14 ///
  15 //===----------------------------------------------------------------------===//
  16
  17 #include "PPC.h"
  18 #include "PPCTargetMachine.h"
  19 #include "llvm/Analysis/TargetTransformInfo.h"
  20 #include "llvm/Support/CommandLine.h"
  21 #include "llvm/Support/Debug.h"
  22 #include "llvm/Target/CostTable.h"
  23 #include "llvm/Target/TargetLowering.h"
  24 using namespace llvm;
  25
  26 #define DEBUG_TYPE "ppctti"
  27
  28 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
  29 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
  30
  31 // Declare the pass initialization routine locally as target-specific passes
  32 // don't have a target-wide initialization entry point, and so we rely on the
  33 // pass constructor initialization.
  34 namespace llvm {
  35 void initializePPCTTIPass(PassRegistry &);
  36 }
  37
  38 namespace {
  39
  40 class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
  41   const PPCSubtarget *ST;
  42   const PPCTargetLowering *TLI;
  43
  44 public:
  45   PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
  46     llvm_unreachable("This pass cannot be directly constructed");
  47   }
  48
  49   PPCTTI(const PPCTargetMachine *TM)
  50       : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
  51         TLI(TM->getSubtargetImpl()->getTargetLowering()) {
  52     initializePPCTTIPass(*PassRegistry::getPassRegistry());
  53   }
  54
  55   virtual void initializePass() override {
  56     pushTTIStack(this);
  57   }
  58
  59   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
  60     TargetTransformInfo::getAnalysisUsage(AU);
  61   }
  62
  63   /// Pass identification.
  64   static char ID;
  65
  66   /// Provide necessary pointer adjustments for the two base classes.
  67   virtual void *getAdjustedAnalysisPointer(const void *ID) override {
  68     if (ID == &TargetTransformInfo::ID)
  69       return (TargetTransformInfo*)this;
  70     return this;
  71   }
  72
  73   /// \name Scalar TTI Implementations
  74   /// @{
  75   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
  76
  77   unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
  78                          Type *Ty) const override;
  79   unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  80                          Type *Ty) const override;
  81
  82   virtual PopcntSupportKind
  83   getPopcntSupport(unsigned TyWidth) const override;
  84   virtual void getUnrollingPreferences(
  85     Loop *L, UnrollingPreferences &UP) const override;
  86
  87   /// @}
  88
  89   /// \name Vector TTI Implementations
  90   /// @{
  91
  92   virtual unsigned getNumberOfRegisters(bool Vector) const override;
  93   virtual unsigned getRegisterBitWidth(bool Vector) const override;
  94   virtual unsigned getMaximumUnrollFactor() const override;
  95   virtual unsigned
  96   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
  97                          OperandValueKind, OperandValueProperties,
  98                          OperandValueProperties) const override;
  99   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
 100                                   int Index, Type *SubTp) const override;
 101   virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
 102                                     Type *Src) const override;
 103   virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 104                                       Type *CondTy) const override;
 105   virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
 106                                       unsigned Index) const override;
 107   virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
 108                                    unsigned Alignment,
 109                                    unsigned AddressSpace) const override;
 110
 111   /// @}
 112 };
 113
 114 } // end anonymous namespace
 115
 116 INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
 117                    "PPC Target Transform Info", true, true, false)
 118 char PPCTTI::ID = 0;
 119
 120 ImmutablePass *
 121 llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
 122   return new PPCTTI(TM);
 123 }
 124
 125
 126 //===----------------------------------------------------------------------===//
 127 //
 128 // PPC cost model.
 129 //
 130 //===----------------------------------------------------------------------===//
 131
 132 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
 133   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
 134   if (ST->hasPOPCNTD() && TyWidth <= 64)
 135     return PSK_FastHardware;
 136   return PSK_Software;
 137 }
 138
 139 unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
 140   if (DisablePPCConstHoist)
 141     return TargetTransformInfo::getIntImmCost(Imm, Ty);
 142
 143   assert(Ty->isIntegerTy());
 144
 145   unsigned BitSize = Ty->getPrimitiveSizeInBits();
 146   if (BitSize == 0)
 147     return ~0U;
 148
 149   if (Imm == 0)
 150     return TCC_Free;
 151
 152   if (Imm.getBitWidth() <= 64) {
 153     if (isInt<16>(Imm.getSExtValue()))
 154       return TCC_Basic;
 155
 156     if (isInt<32>(Imm.getSExtValue())) {
 157       // A constant that can be materialized using lis.
 158       if ((Imm.getZExtValue() & 0xFFFF) == 0)
 159         return TCC_Basic;
 160
 161       return 2 * TCC_Basic;
 162     }
 163   }
 164
 165   return 4 * TCC_Basic;
 166 }
 167
 168 unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
 169                                const APInt &Imm, Type *Ty) const {
 170   if (DisablePPCConstHoist)
 171     return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
 172
 173   assert(Ty->isIntegerTy());
 174
 175   unsigned BitSize = Ty->getPrimitiveSizeInBits();
 176   if (BitSize == 0)
 177     return ~0U;
 178
 179   switch (IID) {
 180   default: return TCC_Free;
 181   case Intrinsic::sadd_with_overflow:
 182   case Intrinsic::uadd_with_overflow:
 183   case Intrinsic::ssub_with_overflow:
 184   case Intrinsic::usub_with_overflow:
 185     if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
 186       return TCC_Free;
 187     break;
 188   }
 189   return PPCTTI::getIntImmCost(Imm, Ty);
 190 }
 191
 192 unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
 193                                Type *Ty) const {
 194   if (DisablePPCConstHoist)
 195     return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
 196
 197   assert(Ty->isIntegerTy());
 198
 199   unsigned BitSize = Ty->getPrimitiveSizeInBits();
 200   if (BitSize == 0)
 201     return ~0U;
 202
 203   unsigned ImmIdx = ~0U;
 204   bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
 205        ZeroFree = false;
 206   switch (Opcode) {
 207   default: return TCC_Free;
 208   case Instruction::GetElementPtr:
 209     // Always hoist the base address of a GetElementPtr. This prevents the
 210     // creation of new constants for every base constant that gets constant
 211     // folded with the offset.
 212     if (Idx == 0)
 213       return 2 * TCC_Basic;
 214     return TCC_Free;
 215   case Instruction::And:
 216     RunFree = true; // (for the rotate-and-mask instructions)
 217     // Fallthrough...
 218   case Instruction::Add:
 219   case Instruction::Or:
 220   case Instruction::Xor:
 221     ShiftedFree = true;
 222     // Fallthrough...
 223   case Instruction::Sub:
 224   case Instruction::Mul:
 225   case Instruction::Shl:
 226   case Instruction::LShr:
 227   case Instruction::AShr:
 228     ImmIdx = 1;
 229     break;
 230   case Instruction::ICmp:
 231     UnsignedFree = true;
 232     ImmIdx = 1;
 233     // Fallthrough... (zero comparisons can use record-form instructions)
 234   case Instruction::Select:
 235     ZeroFree = true;
 236     break;
 237   case Instruction::PHI:
 238   case Instruction::Call:
 239   case Instruction::Ret:
 240   case Instruction::Load:
 241   case Instruction::Store:
 242     break;
 243   }
 244
 245   if (ZeroFree && Imm == 0)
 246     return TCC_Free;
 247
 248   if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
 249     if (isInt<16>(Imm.getSExtValue()))
 250       return TCC_Free;
 251
 252     if (RunFree) {
 253       if (Imm.getBitWidth() <= 32 &&
 254           (isShiftedMask_32(Imm.getZExtValue()) ||
 255            isShiftedMask_32(~Imm.getZExtValue())))
 256         return TCC_Free;
 257
 258
 259       if (ST->isPPC64() &&
 260           (isShiftedMask_64(Imm.getZExtValue()) ||
 261            isShiftedMask_64(~Imm.getZExtValue())))
 262         return TCC_Free;
 263     }
 264
 265     if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
 266       return TCC_Free;
 267
 268     if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
 269       return TCC_Free;
 270   }
 271
 272   return PPCTTI::getIntImmCost(Imm, Ty);
 273 }
 274
 275 void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
 276   if (ST->getDarwinDirective() == PPC::DIR_A2) {
 277     // The A2 is in-order with a deep pipeline, and concatenation unrolling
 278     // helps expose latency-hiding opportunities to the instruction scheduler.
 279     UP.Partial = UP.Runtime = true;
 280   }
 281 }
 282
 283 unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
 284   if (Vector && !ST->hasAltivec())
 285     return 0;
 286   return ST->hasVSX() ? 64 : 32;
 287 }
 288
 289 unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
 290   if (Vector) {
 291     if (ST->hasAltivec()) return 128;
 292     return 0;
 293   }
 294
 295   if (ST->isPPC64())
 296     return 64;
 297   return 32;
 298
 299 }
 300
 301 unsigned PPCTTI::getMaximumUnrollFactor() const {
 302   unsigned Directive = ST->getDarwinDirective();
 303   // The 440 has no SIMD support, but floating-point instructions
 304   // have a 5-cycle latency, so unroll by 5x for latency hiding.
 305   if (Directive == PPC::DIR_440)
 306     return 5;
 307
 308   // The A2 has no SIMD support, but floating-point instructions
 309   // have a 6-cycle latency, so unroll by 6x for latency hiding.
 310   if (Directive == PPC::DIR_A2)
 311     return 6;
 312
 313   // FIXME: For lack of any better information, do no harm...
 314   if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
 315     return 1;
 316
 317   // For most things, modern systems have two execution units (and
 318   // out-of-order execution).
 319   return 2;
 320 }
 321
 322 unsigned PPCTTI::getArithmeticInstrCost(
 323     unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
 324     OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
 325     OperandValueProperties Opd2PropInfo) const {
 326   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 327
 328   // Fallback to the default implementation.
 329   return TargetTransformInfo::getArithmeticInstrCost(
 330       Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
 331 }
 332
 333 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 334                                 Type *SubTp) const {
 335   return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
 336 }
 337
 338 unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
 339   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 340
 341   return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
 342 }
 343
 344 unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 345                                     Type *CondTy) const {
 346   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 347 }
 348
 349 unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
 350                                     unsigned Index) const {
 351   assert(Val->isVectorTy() && "This must be a vector type");
 352
 353   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 354   assert(ISD && "Invalid opcode");
 355
 356   if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
 357     // Double-precision scalars are already located in index #0.
 358     if (Index == 0)
 359       return 0;
 360
 361     return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
 362   }
 363
 364   // Estimated cost of a load-hit-store delay.  This was obtained
 365   // experimentally as a minimum needed to prevent unprofitable
 366   // vectorization for the paq8p benchmark.  It may need to be
 367   // raised further if other unprofitable cases remain.
 368   unsigned LHSPenalty = 2;
 369   if (ISD == ISD::INSERT_VECTOR_ELT)
 370     LHSPenalty += 7;
 371
 372   // Vector element insert/extract with Altivec is very expensive,
 373   // because they require store and reload with the attendant
 374   // processor stall for load-hit-store.  Until VSX is available,
 375   // these need to be estimated as very costly.
 376   if (ISD == ISD::EXTRACT_VECTOR_ELT ||
 377       ISD == ISD::INSERT_VECTOR_ELT)
 378     return LHSPenalty +
 379       TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
 380
 381   return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
 382 }
 383
 384 unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 385                                  unsigned AddressSpace) const {
 386   // Legalize the type.
 387   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
 388   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
 389          "Invalid Opcode");
 390
 391   unsigned Cost =
 392     TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 393
 394   // VSX loads/stores support unaligned access.
 395   if (ST->hasVSX()) {
 396     if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
 397       return Cost;
 398   }
 399
 400   bool UnalignedAltivec =
 401     Src->isVectorTy() &&
 402     Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
 403     LT.second.getSizeInBits() == 128 &&
 404     Opcode == Instruction::Load;
 405
 406   // PPC in general does not support unaligned loads and stores. They'll need
 407   // to be decomposed based on the alignment factor.
 408   unsigned SrcBytes = LT.second.getStoreSize();
 409   if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
 410     Cost += LT.first*(SrcBytes/Alignment-1);
 411
 412     // For a vector type, there is also scalarization overhead (only for
 413     // stores, loads are expanded using the vector-load + permutation sequence,
 414     // which is much less expensive).
 415     if (Src->isVectorTy() && Opcode == Instruction::Store)
 416       for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
 417         Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
 418   }
 419
 420   return Cost;
 421 }
 422