lib/Transforms/Scalar/AlignmentFromAssumptions.cpp

   1 //===----------------------- AlignmentFromAssumptions.cpp -----------------===//
   2 //                  Set Load/Store Alignments From Assumptions
   3 //
   4 //                     The LLVM Compiler Infrastructure
   5 //
   6 // This file is distributed under the University of Illinois Open Source
   7 // License. See LICENSE.TXT for details.
   8 //
   9 //===----------------------------------------------------------------------===//
  10 //
  11 // This file implements a ScalarEvolution-based transformation to set
  12 // the alignments of load, stores and memory intrinsics based on the truth
  13 // expressions of assume intrinsics. The primary motivation is to handle
  14 // complex alignment assumptions that apply to vector loads and stores that
  15 // appear after vectorization and unrolling.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #define AA_NAME "alignment-from-assumptions"
  20 #define DEBUG_TYPE AA_NAME
  21 #include "llvm/Transforms/Scalar.h"
  22 #include "llvm/ADT/SmallPtrSet.h"
  23 #include "llvm/ADT/Statistic.h"
  24 #include "llvm/Analysis/AssumptionTracker.h"
  25 #include "llvm/Analysis/LoopInfo.h"
  26 #include "llvm/Analysis/ValueTracking.h"
  27 #include "llvm/Analysis/ScalarEvolution.h"
  28 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  29 #include "llvm/IR/Constant.h"
  30 #include "llvm/IR/Dominators.h"
  31 #include "llvm/IR/Instruction.h"
  32 #include "llvm/IR/IntrinsicInst.h"
  33 #include "llvm/IR/Intrinsics.h"
  34 #include "llvm/IR/DataLayout.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37 using namespace llvm;
  38
  39 STATISTIC(NumLoadAlignChanged,
  40   "Number of loads changed by alignment assumptions");
  41 STATISTIC(NumStoreAlignChanged,
  42   "Number of stores changed by alignment assumptions");
  43 STATISTIC(NumMemIntAlignChanged,
  44   "Number of memory intrinsics changed by alignment assumptions");
  45
  46 namespace {
  47 struct AlignmentFromAssumptions : public FunctionPass {
  48   static char ID; // Pass identification, replacement for typeid
  49   AlignmentFromAssumptions() : FunctionPass(ID) {
  50     initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
  51   }
  52
  53   bool runOnFunction(Function &F);
  54
  55   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
  56     AU.addRequired<AssumptionTracker>();
  57     AU.addRequired<ScalarEvolution>();
  58     AU.addRequired<DominatorTreeWrapperPass>();
  59
  60     AU.setPreservesCFG();
  61     AU.addPreserved<LoopInfo>();
  62     AU.addPreserved<DominatorTreeWrapperPass>();
  63     AU.addPreserved<ScalarEvolution>();
  64   }
  65
  66   // For memory transfers, we need a common alignment for both the source and
  67   // destination. If we have a new alignment for only one operand of a transfer
  68   // instruction, save it in these maps.  If we reach the other operand through
  69   // another assumption later, then we may change the alignment at that point.
  70   DenseMap<MemTransferInst *, unsigned> NewDestAlignments, NewSrcAlignments;
  71
  72   AssumptionTracker *AT;
  73   ScalarEvolution *SE;
  74   DominatorTree *DT;
  75   const DataLayout *DL;
  76
  77   bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
  78                             const SCEV *&OffSCEV);
  79   bool processAssumption(CallInst *I);
  80 };
  81 }
  82
  83 char AlignmentFromAssumptions::ID = 0;
  84 static const char aip_name[] = "Alignment from assumptions";
  85 INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
  86                       aip_name, false, false)
  87 INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  88 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  89 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  90 INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
  91                     aip_name, false, false)
  92
  93 FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
  94   return new AlignmentFromAssumptions();
  95 }
  96
  97 // Given an expression for the (constant) alignment, AlignSCEV, and an
  98 // expression for the displacement between a pointer and the aligned address,
  99 // DiffSCEV, compute the alignment of the displaced pointer if it can be
 100 // reduced to a constant.
 101 static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
 102                                     const SCEV *AlignSCEV,
 103                                     ScalarEvolution *SE) {
 104   // DiffUnits = Diff % int64_t(Alignment)
 105   const SCEV *DiffAlignDiv = SE->getUDivExpr(DiffSCEV, AlignSCEV);
 106   const SCEV *DiffAlign = SE->getMulExpr(DiffAlignDiv, AlignSCEV);
 107   const SCEV *DiffUnitsSCEV = SE->getMinusSCEV(DiffAlign, DiffSCEV);
 108
 109   DEBUG(dbgs() << "\talignment relative to " << *AlignSCEV << " is " <<
 110                   *DiffUnitsSCEV << " (diff: " << *DiffSCEV << ")\n");
 111
 112   if (const SCEVConstant *ConstDUSCEV =
 113       dyn_cast<SCEVConstant>(DiffUnitsSCEV)) {
 114     int64_t DiffUnits = ConstDUSCEV->getValue()->getSExtValue();
 115
 116     // If the displacement is an exact multiple of the alignment, then the
 117     // displaced pointer has the same alignment as the aligned pointer, so
 118     // return the alignment value.
 119     if (!DiffUnits)
 120       return (unsigned)
 121         cast<SCEVConstant>(AlignSCEV)->getValue()->getSExtValue();
 122
 123     // If the displacement is not an exact multiple, but the remainder is a
 124     // constant, then return this remainder (but only if it is a power of 2).
 125     uint64_t DiffUnitsAbs = abs64(DiffUnits);
 126     if (isPowerOf2_64(DiffUnitsAbs))
 127       return (unsigned) DiffUnitsAbs;
 128   }
 129
 130   return 0;
 131 }
 132
 133 // There is an address given by an offset OffSCEV from AASCEV which has an
 134 // alignment AlignSCEV. Use that information, if possible, to compute a new
 135 // alignment for Ptr.
 136 static unsigned getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
 137                                 const SCEV *OffSCEV, Value *Ptr,
 138                                 ScalarEvolution *SE) {
 139   const SCEV *PtrSCEV = SE->getSCEV(Ptr);
 140   const SCEV *DiffSCEV = SE->getMinusSCEV(PtrSCEV, AASCEV);
 141
 142   // What we really want to know is the overall offset to the aligned
 143   // address. This address is displaced by the provided offset.
 144   DiffSCEV = SE->getMinusSCEV(DiffSCEV, OffSCEV);
 145
 146   DEBUG(dbgs() << "AFI: alignment of " << *Ptr << " relative to " <<
 147                   *AlignSCEV << " and offset " << *OffSCEV <<
 148                   " using diff " << *DiffSCEV << "\n");
 149
 150   unsigned NewAlignment = getNewAlignmentDiff(DiffSCEV, AlignSCEV, SE);
 151   DEBUG(dbgs() << "\tnew alignment: " << NewAlignment << "\n");
 152
 153   if (NewAlignment) {
 154     return NewAlignment;
 155   } else if (const SCEVAddRecExpr *DiffARSCEV =
 156              dyn_cast<SCEVAddRecExpr>(DiffSCEV)) {
 157     // The relative offset to the alignment assumption did not yield a constant,
 158     // but we should try harder: if we assume that a is 32-byte aligned, then in
 159     // for (i = 0; i < 1024; i += 4) r += a[i]; not all of the loads from a are
 160     // 32-byte aligned, but instead alternate between 32 and 16-byte alignment.
 161     // As a result, the new alignment will not be a constant, but can still
 162     // be improved over the default (of 4) to 16.
 163
 164     const SCEV *DiffStartSCEV = DiffARSCEV->getStart();
 165     const SCEV *DiffIncSCEV = DiffARSCEV->getStepRecurrence(*SE);
 166
 167     DEBUG(dbgs() << "\ttrying start/inc alignment using start " <<
 168                     *DiffStartSCEV << " and inc " << *DiffIncSCEV << "\n");
 169
 170     // Now compute the new alignment using the displacement to the value in the
 171     // first iteration, and also the alignment using the per-iteration delta.
 172     // If these are the same, then use that answer. Otherwise, use the smaller
 173     // one, but only if it divides the larger one.
 174     NewAlignment = getNewAlignmentDiff(DiffStartSCEV, AlignSCEV, SE);
 175     unsigned NewIncAlignment = getNewAlignmentDiff(DiffIncSCEV, AlignSCEV, SE);
 176
 177     DEBUG(dbgs() << "\tnew start alignment: " << NewAlignment << "\n");
 178     DEBUG(dbgs() << "\tnew inc alignment: " << NewIncAlignment << "\n");
 179
 180     if (NewAlignment > NewIncAlignment) {
 181       if (NewAlignment % NewIncAlignment == 0) {
 182         DEBUG(dbgs() << "\tnew start/inc alignment: " <<
 183                         NewIncAlignment << "\n");
 184         return NewIncAlignment;
 185       }
 186     } else if (NewIncAlignment > NewAlignment) {
 187       if (NewIncAlignment % NewAlignment == 0) {
 188         DEBUG(dbgs() << "\tnew start/inc alignment: " <<
 189                         NewAlignment << "\n");
 190         return NewAlignment;
 191       }
 192     } else if (NewIncAlignment == NewAlignment && NewIncAlignment) {
 193       DEBUG(dbgs() << "\tnew start/inc alignment: " <<
 194                       NewAlignment << "\n");
 195       return NewAlignment;
 196     }
 197   }
 198
 199   return 0;
 200 }
 201
 202 bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
 203                                  Value *&AAPtr, const SCEV *&AlignSCEV,
 204                                  const SCEV *&OffSCEV) {
 205   // An alignment assume must be a statement about the least-significant
 206   // bits of the pointer being zero, possibly with some offset.
 207   ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
 208   if (!ICI)
 209     return false;
 210
 211   // This must be an expression of the form: x & m == 0.
 212   if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
 213     return false;
 214
 215   // Swap things around so that the RHS is 0.
 216   Value *CmpLHS = ICI->getOperand(0);
 217   Value *CmpRHS = ICI->getOperand(1);
 218   const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
 219   const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
 220   if (CmpLHSSCEV->isZero())
 221     std::swap(CmpLHS, CmpRHS);
 222   else if (!CmpRHSSCEV->isZero())
 223     return false;
 224
 225   BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
 226   if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
 227     return false;
 228
 229   // Swap things around so that the right operand of the and is a constant
 230   // (the mask); we cannot deal with variable masks.
 231   Value *AndLHS = CmpBO->getOperand(0);
 232   Value *AndRHS = CmpBO->getOperand(1);
 233   const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
 234   const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
 235   if (isa<SCEVConstant>(AndLHSSCEV)) {
 236     std::swap(AndLHS, AndRHS);
 237     std::swap(AndLHSSCEV, AndRHSSCEV);
 238   }
 239
 240   const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
 241   if (!MaskSCEV)
 242     return false;
 243
 244   // The mask must have some trailing ones (otherwise the condition is
 245   // trivial and tells us nothing about the alignment of the left operand).
 246   unsigned TrailingOnes =
 247     MaskSCEV->getValue()->getValue().countTrailingOnes();
 248   if (!TrailingOnes)
 249     return false;
 250
 251   // Cap the alignment at the maximum with which LLVM can deal (and make sure
 252   // we don't overflow the shift).
 253   uint64_t Alignment;
 254   TrailingOnes = std::min(TrailingOnes,
 255     unsigned(sizeof(unsigned) * CHAR_BIT - 1));
 256   Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
 257
 258   Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
 259   AlignSCEV = SE->getConstant(Int64Ty, Alignment);
 260
 261   // The LHS might be a ptrtoint instruction, or it might be the pointer
 262   // with an offset.
 263   AAPtr = nullptr;
 264   OffSCEV = nullptr;
 265   if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
 266     AAPtr = PToI->getPointerOperand();
 267     OffSCEV = SE->getConstant(Int64Ty, 0);
 268   } else if (const SCEVAddExpr* AndLHSAddSCEV =
 269              dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
 270     // Try to find the ptrtoint; subtract it and the rest is the offset.
 271     for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
 272          JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
 273       if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
 274         if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
 275           AAPtr = PToI->getPointerOperand();
 276           OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
 277           break;
 278         }
 279   }
 280
 281   if (!AAPtr)
 282     return false;
 283
 284   // Sign extend the offset to 64 bits (so that it is like all of the other
 285   // expressions).
 286   unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
 287   if (OffSCEVBits < 64)
 288     OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
 289   else if (OffSCEVBits > 64)
 290     return false;
 291
 292   AAPtr = AAPtr->stripPointerCasts();
 293   return true;
 294 }
 295
 296 bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
 297   Value *AAPtr;
 298   const SCEV *AlignSCEV, *OffSCEV;
 299   if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
 300     return false;
 301
 302   const SCEV *AASCEV = SE->getSCEV(AAPtr);
 303
 304   // Apply the assumption to all other users of the specified pointer.
 305   SmallPtrSet<Instruction *, 32> Visited;
 306   SmallVector<Instruction*, 16> WorkList;
 307   for (User *J : AAPtr->users()) {
 308     if (J == ACall)
 309       continue;
 310
 311     if (Instruction *K = dyn_cast<Instruction>(J))
 312       if (isValidAssumeForContext(ACall, K, DL, DT))
 313         WorkList.push_back(K);
 314   }
 315
 316   while (!WorkList.empty()) {
 317     Instruction *J = WorkList.pop_back_val();
 318
 319     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
 320       unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
 321         LI->getPointerOperand(), SE);
 322
 323       if (NewAlignment > LI->getAlignment()) {
 324         LI->setAlignment(NewAlignment);
 325         ++NumLoadAlignChanged;
 326       }
 327     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
 328       unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
 329         SI->getPointerOperand(), SE);
 330
 331       if (NewAlignment > SI->getAlignment()) {
 332         SI->setAlignment(NewAlignment);
 333         ++NumStoreAlignChanged;
 334       }
 335     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
 336       unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
 337         MI->getDest(), SE);
 338
 339       // For memory transfers, we need a common alignment for both the
 340       // source and destination. If we have a new alignment for this
 341       // instruction, but only for one operand, save it. If we reach the
 342       // other operand through another assumption later, then we may
 343       // change the alignment at that point.
 344       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
 345         unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
 346           MTI->getSource(), SE);
 347
 348         DenseMap<MemTransferInst *, unsigned>::iterator DI =
 349           NewDestAlignments.find(MTI);
 350         unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
 351                                     0 : DI->second;
 352
 353         DenseMap<MemTransferInst *, unsigned>::iterator SI =
 354           NewSrcAlignments.find(MTI);
 355         unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
 356                                    0 : SI->second;
 357
 358         DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
 359                         AltDestAlignment << " " << NewSrcAlignment <<
 360                         " " << AltSrcAlignment << "\n");
 361
 362         // Of these four alignments, pick the largest possible...
 363         unsigned NewAlignment = 0;
 364         if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
 365           NewAlignment = std::max(NewAlignment, NewDestAlignment);
 366         if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
 367           NewAlignment = std::max(NewAlignment, AltDestAlignment);
 368         if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
 369           NewAlignment = std::max(NewAlignment, NewSrcAlignment);
 370         if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
 371           NewAlignment = std::max(NewAlignment, AltSrcAlignment);
 372
 373         if (NewAlignment > MI->getAlignment()) {
 374           MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
 375             MI->getParent()->getContext()), NewAlignment));
 376           ++NumMemIntAlignChanged;
 377         }
 378
 379         NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
 380         NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
 381       } else if (NewDestAlignment > MI->getAlignment()) {
 382         assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) &&
 383                "Unknown memory intrinsic");
 384
 385         MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
 386           MI->getParent()->getContext()), NewDestAlignment));
 387         ++NumMemIntAlignChanged;
 388       }
 389     }
 390
 391     // Now that we've updated that use of the pointer, look for other uses of
 392     // the pointer to update.
 393     Visited.insert(J);
 394     for (User *UJ : J->users()) {
 395       Instruction *K = cast<Instruction>(UJ);
 396       if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
 397         WorkList.push_back(K);
 398     }
 399   }
 400
 401   return true;
 402 }
 403
 404 bool AlignmentFromAssumptions::runOnFunction(Function &F) {
 405   bool Changed = false;
 406   AT = &getAnalysis<AssumptionTracker>();
 407   SE = &getAnalysis<ScalarEvolution>();
 408   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 409   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
 410   DL = DLP ? &DLP->getDataLayout() : nullptr;
 411
 412   NewDestAlignments.clear();
 413   NewSrcAlignments.clear();
 414
 415   for (auto &I : AT->assumptions(&F))
 416     Changed |= processAssumption(I);
 417
 418   return Changed;
 419 }
 420