lib/Transforms/Scalar/EarlyCSE.cpp

   1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass performs a simple dominator tree walk that eliminates trivially
  11 // redundant instructions.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Transforms/Scalar/EarlyCSE.h"
  16 #include "llvm/ADT/Hashing.h"
  17 #include "llvm/ADT/ScopedHashTable.h"
  18 #include "llvm/ADT/Statistic.h"
  19 #include "llvm/Analysis/GlobalsModRef.h"
  20 #include "llvm/Analysis/AssumptionCache.h"
  21 #include "llvm/Analysis/InstructionSimplify.h"
  22 #include "llvm/Analysis/TargetLibraryInfo.h"
  23 #include "llvm/Analysis/TargetTransformInfo.h"
  24 #include "llvm/IR/DataLayout.h"
  25 #include "llvm/IR/Dominators.h"
  26 #include "llvm/IR/Instructions.h"
  27 #include "llvm/IR/IntrinsicInst.h"
  28 #include "llvm/IR/PatternMatch.h"
  29 #include "llvm/Pass.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/RecyclingAllocator.h"
  32 #include "llvm/Support/raw_ostream.h"
  33 #include "llvm/Transforms/Scalar.h"
  34 #include "llvm/Transforms/Utils/Local.h"
  35 #include <deque>
  36 using namespace llvm;
  37 using namespace llvm::PatternMatch;
  38
  39 #define DEBUG_TYPE "early-cse"
  40
  41 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
  42 STATISTIC(NumCSE,      "Number of instructions CSE'd");
  43 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
  44 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
  45 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
  46
  47 //===----------------------------------------------------------------------===//
  48 // SimpleValue
  49 //===----------------------------------------------------------------------===//
  50
  51 namespace {
  52 /// \brief Struct representing the available values in the scoped hash table.
  53 struct SimpleValue {
  54   Instruction *Inst;
  55
  56   SimpleValue(Instruction *I) : Inst(I) {
  57     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
  58   }
  59
  60   bool isSentinel() const {
  61     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
  62            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
  63   }
  64
  65   static bool canHandle(Instruction *Inst) {
  66     // This can only handle non-void readnone functions.
  67     if (CallInst *CI = dyn_cast<CallInst>(Inst))
  68       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
  69     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
  70            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
  71            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
  72            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
  73            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
  74   }
  75 };
  76 }
  77
  78 namespace llvm {
  79 template <> struct DenseMapInfo<SimpleValue> {
  80   static inline SimpleValue getEmptyKey() {
  81     return DenseMapInfo<Instruction *>::getEmptyKey();
  82   }
  83   static inline SimpleValue getTombstoneKey() {
  84     return DenseMapInfo<Instruction *>::getTombstoneKey();
  85   }
  86   static unsigned getHashValue(SimpleValue Val);
  87   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
  88 };
  89 }
  90
  91 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
  92   Instruction *Inst = Val.Inst;
  93   // Hash in all of the operands as pointers.
  94   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
  95     Value *LHS = BinOp->getOperand(0);
  96     Value *RHS = BinOp->getOperand(1);
  97     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
  98       std::swap(LHS, RHS);
  99
 100     if (isa<OverflowingBinaryOperator>(BinOp)) {
 101       // Hash the overflow behavior
 102       unsigned Overflow =
 103           BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
 104           BinOp->hasNoUnsignedWrap() *
 105               OverflowingBinaryOperator::NoUnsignedWrap;
 106       return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
 107     }
 108
 109     return hash_combine(BinOp->getOpcode(), LHS, RHS);
 110   }
 111
 112   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
 113     Value *LHS = CI->getOperand(0);
 114     Value *RHS = CI->getOperand(1);
 115     CmpInst::Predicate Pred = CI->getPredicate();
 116     if (Inst->getOperand(0) > Inst->getOperand(1)) {
 117       std::swap(LHS, RHS);
 118       Pred = CI->getSwappedPredicate();
 119     }
 120     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
 121   }
 122
 123   if (CastInst *CI = dyn_cast<CastInst>(Inst))
 124     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
 125
 126   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
 127     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
 128                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
 129
 130   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
 131     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
 132                         IVI->getOperand(1),
 133                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
 134
 135   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
 136           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
 137           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
 138           isa<ShuffleVectorInst>(Inst)) &&
 139          "Invalid/unknown instruction");
 140
 141   // Mix in the opcode.
 142   return hash_combine(
 143       Inst->getOpcode(),
 144       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 145 }
 146
 147 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
 148   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 149
 150   if (LHS.isSentinel() || RHS.isSentinel())
 151     return LHSI == RHSI;
 152
 153   if (LHSI->getOpcode() != RHSI->getOpcode())
 154     return false;
 155   if (LHSI->isIdenticalTo(RHSI))
 156     return true;
 157
 158   // If we're not strictly identical, we still might be a commutable instruction
 159   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
 160     if (!LHSBinOp->isCommutative())
 161       return false;
 162
 163     assert(isa<BinaryOperator>(RHSI) &&
 164            "same opcode, but different instruction type?");
 165     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
 166
 167     // Check overflow attributes
 168     if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
 169       assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
 170              "same opcode, but different operator type?");
 171       if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
 172           LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
 173         return false;
 174     }
 175
 176     // Commuted equality
 177     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
 178            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
 179   }
 180   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
 181     assert(isa<CmpInst>(RHSI) &&
 182            "same opcode, but different instruction type?");
 183     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
 184     // Commuted equality
 185     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
 186            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
 187            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
 188   }
 189
 190   return false;
 191 }
 192
 193 //===----------------------------------------------------------------------===//
 194 // CallValue
 195 //===----------------------------------------------------------------------===//
 196
 197 namespace {
 198 /// \brief Struct representing the available call values in the scoped hash
 199 /// table.
 200 struct CallValue {
 201   Instruction *Inst;
 202
 203   CallValue(Instruction *I) : Inst(I) {
 204     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
 205   }
 206
 207   bool isSentinel() const {
 208     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
 209            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
 210   }
 211
 212   static bool canHandle(Instruction *Inst) {
 213     // Don't value number anything that returns void.
 214     if (Inst->getType()->isVoidTy())
 215       return false;
 216
 217     CallInst *CI = dyn_cast<CallInst>(Inst);
 218     if (!CI || !CI->onlyReadsMemory())
 219       return false;
 220     return true;
 221   }
 222 };
 223 }
 224
 225 namespace llvm {
 226 template <> struct DenseMapInfo<CallValue> {
 227   static inline CallValue getEmptyKey() {
 228     return DenseMapInfo<Instruction *>::getEmptyKey();
 229   }
 230   static inline CallValue getTombstoneKey() {
 231     return DenseMapInfo<Instruction *>::getTombstoneKey();
 232   }
 233   static unsigned getHashValue(CallValue Val);
 234   static bool isEqual(CallValue LHS, CallValue RHS);
 235 };
 236 }
 237
 238 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
 239   Instruction *Inst = Val.Inst;
 240   // Hash all of the operands as pointers and mix in the opcode.
 241   return hash_combine(
 242       Inst->getOpcode(),
 243       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
 244 }
 245
 246 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
 247   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
 248   if (LHS.isSentinel() || RHS.isSentinel())
 249     return LHSI == RHSI;
 250   return LHSI->isIdenticalTo(RHSI);
 251 }
 252
 253 //===----------------------------------------------------------------------===//
 254 // EarlyCSE implementation
 255 //===----------------------------------------------------------------------===//
 256
 257 namespace {
 258 /// \brief A simple and fast domtree-based CSE pass.
 259 ///
 260 /// This pass does a simple depth-first walk over the dominator tree,
 261 /// eliminating trivially redundant instructions and using instsimplify to
 262 /// canonicalize things as it goes. It is intended to be fast and catch obvious
 263 /// cases so that instcombine and other passes are more effective. It is
 264 /// expected that a later pass of GVN will catch the interesting/hard cases.
 265 class EarlyCSE {
 266 public:
 267   const TargetLibraryInfo &TLI;
 268   const TargetTransformInfo &TTI;
 269   DominatorTree &DT;
 270   AssumptionCache &AC;
 271   typedef RecyclingAllocator<
 272       BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
 273   typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
 274                           AllocatorTy> ScopedHTType;
 275
 276   /// \brief A scoped hash table of the current values of all of our simple
 277   /// scalar expressions.
 278   ///
 279   /// As we walk down the domtree, we look to see if instructions are in this:
 280   /// if so, we replace them with what we find, otherwise we insert them so
 281   /// that dominated values can succeed in their lookup.
 282   ScopedHTType AvailableValues;
 283
 284   /// A scoped hash table of the current values of previously encounted memory
 285   /// locations.
 286   ///
 287   /// This allows us to get efficient access to dominating loads or stores when
 288   /// we have a fully redundant load.  In addition to the most recent load, we
 289   /// keep track of a generation count of the read, which is compared against
 290   /// the current generation count.  The current generation count is incremented
 291   /// after every possibly writing memory operation, which ensures that we only
 292   /// CSE loads with other loads that have no intervening store.  Ordering
 293   /// events (such as fences or atomic instructions) increment the generation
 294   /// count as well; essentially, we model these as writes to all possible
 295   /// locations.  Note that atomic and/or volatile loads and stores can be
 296   /// present the table; it is the responsibility of the consumer to inspect
 297   /// the atomicity/volatility if needed.
 298   struct LoadValue {
 299     Value *Data;
 300     unsigned Generation;
 301     int MatchingId;
 302     bool IsAtomic;
 303     LoadValue()
 304       : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
 305     LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
 306               bool IsAtomic)
 307       : Data(Data), Generation(Generation), MatchingId(MatchingId),
 308         IsAtomic(IsAtomic) {}
 309   };
 310   typedef RecyclingAllocator<BumpPtrAllocator,
 311                              ScopedHashTableVal<Value *, LoadValue>>
 312       LoadMapAllocator;
 313   typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
 314                           LoadMapAllocator> LoadHTType;
 315   LoadHTType AvailableLoads;
 316
 317   /// \brief A scoped hash table of the current values of read-only call
 318   /// values.
 319   ///
 320   /// It uses the same generation count as loads.
 321   typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
 322   CallHTType AvailableCalls;
 323
 324   /// \brief This is the current generation of the memory value.
 325   unsigned CurrentGeneration;
 326
 327   /// \brief Set up the EarlyCSE runner for a particular function.
 328   EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
 329            DominatorTree &DT, AssumptionCache &AC)
 330       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
 331
 332   bool run();
 333
 334 private:
 335   // Almost a POD, but needs to call the constructors for the scoped hash
 336   // tables so that a new scope gets pushed on. These are RAII so that the
 337   // scope gets popped when the NodeScope is destroyed.
 338   class NodeScope {
 339   public:
 340     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
 341               CallHTType &AvailableCalls)
 342         : Scope(AvailableValues), LoadScope(AvailableLoads),
 343           CallScope(AvailableCalls) {}
 344
 345   private:
 346     NodeScope(const NodeScope &) = delete;
 347     void operator=(const NodeScope &) = delete;
 348
 349     ScopedHTType::ScopeTy Scope;
 350     LoadHTType::ScopeTy LoadScope;
 351     CallHTType::ScopeTy CallScope;
 352   };
 353
 354   // Contains all the needed information to create a stack for doing a depth
 355   // first tranversal of the tree. This includes scopes for values, loads, and
 356   // calls as well as the generation. There is a child iterator so that the
 357   // children do not need to be store spearately.
 358   class StackNode {
 359   public:
 360     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
 361               CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
 362               DomTreeNode::iterator child, DomTreeNode::iterator end)
 363         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
 364           EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
 365           Processed(false) {}
 366
 367     // Accessors.
 368     unsigned currentGeneration() { return CurrentGeneration; }
 369     unsigned childGeneration() { return ChildGeneration; }
 370     void childGeneration(unsigned generation) { ChildGeneration = generation; }
 371     DomTreeNode *node() { return Node; }
 372     DomTreeNode::iterator childIter() { return ChildIter; }
 373     DomTreeNode *nextChild() {
 374       DomTreeNode *child = *ChildIter;
 375       ++ChildIter;
 376       return child;
 377     }
 378     DomTreeNode::iterator end() { return EndIter; }
 379     bool isProcessed() { return Processed; }
 380     void process() { Processed = true; }
 381
 382   private:
 383     StackNode(const StackNode &) = delete;
 384     void operator=(const StackNode &) = delete;
 385
 386     // Members.
 387     unsigned CurrentGeneration;
 388     unsigned ChildGeneration;
 389     DomTreeNode *Node;
 390     DomTreeNode::iterator ChildIter;
 391     DomTreeNode::iterator EndIter;
 392     NodeScope Scopes;
 393     bool Processed;
 394   };
 395
 396   /// \brief Wrapper class to handle memory instructions, including loads,
 397   /// stores and intrinsic loads and stores defined by the target.
 398   class ParseMemoryInst {
 399   public:
 400     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
 401       : IsTargetMemInst(false), Inst(Inst) {
 402       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
 403         if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
 404           IsTargetMemInst = true;
 405     }
 406     bool isLoad() const {
 407       if (IsTargetMemInst) return Info.ReadMem;
 408       return isa<LoadInst>(Inst);
 409     }
 410     bool isStore() const {
 411       if (IsTargetMemInst) return Info.WriteMem;
 412       return isa<StoreInst>(Inst);
 413     }
 414     bool isAtomic() const {
 415       if (IsTargetMemInst) {
 416         assert(Info.IsSimple && "need to refine IsSimple in TTI");
 417         return false;
 418       }
 419       return Inst->isAtomic();
 420     }
 421     bool isUnordered() const {
 422       if (IsTargetMemInst) {
 423         assert(Info.IsSimple && "need to refine IsSimple in TTI");
 424         return true;
 425       }
 426       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
 427         return LI->isUnordered();
 428       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
 429         return SI->isUnordered();
 430       }
 431       // Conservative answer
 432       return !Inst->isAtomic();
 433     }
 434
 435     bool isVolatile() const {
 436       if (IsTargetMemInst) {
 437         assert(Info.IsSimple && "need to refine IsSimple in TTI");
 438         return false;
 439       }
 440       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
 441         return LI->isVolatile();
 442       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
 443         return SI->isVolatile();
 444       }
 445       // Conservative answer
 446       return true;
 447     }
 448
 449
 450     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
 451       return (getPointerOperand() == Inst.getPointerOperand() &&
 452               getMatchingId() == Inst.getMatchingId());
 453     }
 454     bool isValid() const { return getPointerOperand() != nullptr; }
 455
 456     // For regular (non-intrinsic) loads/stores, this is set to -1. For
 457     // intrinsic loads/stores, the id is retrieved from the corresponding
 458     // field in the MemIntrinsicInfo structure.  That field contains
 459     // non-negative values only.
 460     int getMatchingId() const {
 461       if (IsTargetMemInst) return Info.MatchingId;
 462       return -1;
 463     }
 464     Value *getPointerOperand() const {
 465       if (IsTargetMemInst) return Info.PtrVal;
 466       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
 467         return LI->getPointerOperand();
 468       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
 469         return SI->getPointerOperand();
 470       }
 471       return nullptr;
 472     }
 473     bool mayReadFromMemory() const {
 474       if (IsTargetMemInst) return Info.ReadMem;
 475       return Inst->mayReadFromMemory();
 476     }
 477     bool mayWriteToMemory() const {
 478       if (IsTargetMemInst) return Info.WriteMem;
 479       return Inst->mayWriteToMemory();
 480     }
 481
 482   private:
 483     bool IsTargetMemInst;
 484     MemIntrinsicInfo Info;
 485     Instruction *Inst;
 486   };
 487
 488   bool processNode(DomTreeNode *Node);
 489
 490   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
 491     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
 492       return LI;
 493     else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
 494       return SI->getValueOperand();
 495     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
 496     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
 497                                                  ExpectedType);
 498   }
 499 };
 500 }
 501
 502 bool EarlyCSE::processNode(DomTreeNode *Node) {
 503   BasicBlock *BB = Node->getBlock();
 504
 505   // If this block has a single predecessor, then the predecessor is the parent
 506   // of the domtree node and all of the live out memory values are still current
 507   // in this block.  If this block has multiple predecessors, then they could
 508   // have invalidated the live-out memory values of our parent value.  For now,
 509   // just be conservative and invalidate memory if this block has multiple
 510   // predecessors.
 511   if (!BB->getSinglePredecessor())
 512     ++CurrentGeneration;
 513
 514   // If this node has a single predecessor which ends in a conditional branch,
 515   // we can infer the value of the branch condition given that we took this
 516   // path.  We need the single predeccesor to ensure there's not another path
 517   // which reaches this block where the condition might hold a different
 518   // value.  Since we're adding this to the scoped hash table (like any other
 519   // def), it will have been popped if we encounter a future merge block.
 520   if (BasicBlock *Pred = BB->getSinglePredecessor())
 521     if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
 522       if (BI->isConditional())
 523         if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
 524           if (SimpleValue::canHandle(CondInst)) {
 525             assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
 526             auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
 527               ConstantInt::getTrue(BB->getContext()) :
 528               ConstantInt::getFalse(BB->getContext());
 529             AvailableValues.insert(CondInst, ConditionalConstant);
 530             DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
 531                   << CondInst->getName() << "' as " << *ConditionalConstant
 532                   << " in " << BB->getName() << "\n");
 533             // Replace all dominated uses with the known value
 534             replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
 535                                      BasicBlockEdge(Pred, BB));
 536           }
 537
 538   /// LastStore - Keep track of the last non-volatile store that we saw... for
 539   /// as long as there in no instruction that reads memory.  If we see a store
 540   /// to the same location, we delete the dead store.  This zaps trivial dead
 541   /// stores which can occur in bitfield code among other things.
 542   Instruction *LastStore = nullptr;
 543
 544   bool Changed = false;
 545   const DataLayout &DL = BB->getModule()->getDataLayout();
 546
 547   // See if any instructions in the block can be eliminated.  If so, do it.  If
 548   // not, add them to AvailableValues.
 549   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
 550     Instruction *Inst = &*I++;
 551
 552     // Dead instructions should just be removed.
 553     if (isInstructionTriviallyDead(Inst, &TLI)) {
 554       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
 555       Inst->eraseFromParent();
 556       Changed = true;
 557       ++NumSimplify;
 558       continue;
 559     }
 560
 561     // Skip assume intrinsics, they don't really have side effects (although
 562     // they're marked as such to ensure preservation of control dependencies),
 563     // and this pass will not disturb any of the assumption's control
 564     // dependencies.
 565     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
 566       DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
 567       continue;
 568     }
 569
 570     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
 571     // its simpler value.
 572     if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
 573       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
 574       Inst->replaceAllUsesWith(V);
 575       Inst->eraseFromParent();
 576       Changed = true;
 577       ++NumSimplify;
 578       continue;
 579     }
 580
 581     // If this is a simple instruction that we can value number, process it.
 582     if (SimpleValue::canHandle(Inst)) {
 583       // See if the instruction has an available value.  If so, use it.
 584       if (Value *V = AvailableValues.lookup(Inst)) {
 585         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
 586         Inst->replaceAllUsesWith(V);
 587         Inst->eraseFromParent();
 588         Changed = true;
 589         ++NumCSE;
 590         continue;
 591       }
 592
 593       // Otherwise, just remember that this value is available.
 594       AvailableValues.insert(Inst, Inst);
 595       continue;
 596     }
 597
 598     ParseMemoryInst MemInst(Inst, TTI);
 599     // If this is a non-volatile load, process it.
 600     if (MemInst.isValid() && MemInst.isLoad()) {
 601       // (conservatively) we can't peak past the ordering implied by this
 602       // operation, but we can add this load to our set of available values
 603       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
 604         LastStore = nullptr;
 605         ++CurrentGeneration;
 606       }
 607
 608       // If we have an available version of this load, and if it is the right
 609       // generation, replace this instruction.
 610       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
 611       if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
 612           InVal.MatchingId == MemInst.getMatchingId() &&
 613           // We don't yet handle removing loads with ordering of any kind.
 614           !MemInst.isVolatile() && MemInst.isUnordered() &&
 615           // We can't replace an atomic load with one which isn't also atomic.
 616           InVal.IsAtomic >= MemInst.isAtomic()) {
 617         Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
 618         if (Op != nullptr) {
 619           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
 620                        << "  to: " << *InVal.Data << '\n');
 621           if (!Inst->use_empty())
 622             Inst->replaceAllUsesWith(Op);
 623           Inst->eraseFromParent();
 624           Changed = true;
 625           ++NumCSELoad;
 626           continue;
 627         }
 628       }
 629
 630       // Otherwise, remember that we have this instruction.
 631       AvailableLoads.insert(
 632           MemInst.getPointerOperand(),
 633           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
 634                     MemInst.isAtomic()));
 635       LastStore = nullptr;
 636       continue;
 637     }
 638
 639     // If this instruction may read from memory, forget LastStore.
 640     // Load/store intrinsics will indicate both a read and a write to
 641     // memory.  The target may override this (e.g. so that a store intrinsic
 642     // does not read  from memory, and thus will be treated the same as a
 643     // regular store for commoning purposes).
 644     if (Inst->mayReadFromMemory() &&
 645         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
 646       LastStore = nullptr;
 647
 648     // If this is a read-only call, process it.
 649     if (CallValue::canHandle(Inst)) {
 650       // If we have an available version of this call, and if it is the right
 651       // generation, replace this instruction.
 652       std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
 653       if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
 654         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
 655                      << "  to: " << *InVal.first << '\n');
 656         if (!Inst->use_empty())
 657           Inst->replaceAllUsesWith(InVal.first);
 658         Inst->eraseFromParent();
 659         Changed = true;
 660         ++NumCSECall;
 661         continue;
 662       }
 663
 664       // Otherwise, remember that we have this instruction.
 665       AvailableCalls.insert(
 666           Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
 667       continue;
 668     }
 669
 670     // A release fence requires that all stores complete before it, but does
 671     // not prevent the reordering of following loads 'before' the fence.  As a
 672     // result, we don't need to consider it as writing to memory and don't need
 673     // to advance the generation.  We do need to prevent DSE across the fence,
 674     // but that's handled above.
 675     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
 676       if (FI->getOrdering() == Release) {
 677         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
 678         continue;
 679       }
 680
 681     // write back DSE - If we write back the same value we just loaded from
 682     // the same location and haven't passed any intervening writes or ordering
 683     // operations, we can remove the write.  The primary benefit is in allowing
 684     // the available load table to remain valid and value forward past where
 685     // the store originally was.
 686     if (MemInst.isValid() && MemInst.isStore()) {
 687       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
 688       if (InVal.Data &&
 689           InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
 690           InVal.Generation == CurrentGeneration &&
 691           InVal.MatchingId == MemInst.getMatchingId() &&
 692           // We don't yet handle removing stores with ordering of any kind.
 693           !MemInst.isVolatile() && MemInst.isUnordered()) {
 694         assert((!LastStore ||
 695                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
 696                 MemInst.getPointerOperand()) &&
 697                "can't have an intervening store!");
 698         DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
 699         Inst->eraseFromParent();
 700         Changed = true;
 701         ++NumDSE;
 702         // We can avoid incrementing the generation count since we were able
 703         // to eliminate this store.
 704         continue;
 705       }
 706     }
 707
 708     // Okay, this isn't something we can CSE at all.  Check to see if it is
 709     // something that could modify memory.  If so, our available memory values
 710     // cannot be used so bump the generation count.
 711     if (Inst->mayWriteToMemory()) {
 712       ++CurrentGeneration;
 713
 714       if (MemInst.isValid() && MemInst.isStore()) {
 715         // We do a trivial form of DSE if there are two stores to the same
 716         // location with no intervening loads.  Delete the earlier store.
 717         // At the moment, we don't remove ordered stores, but do remove
 718         // unordered atomic stores.  There's no special requirement (for
 719         // unordered atomics) about removing atomic stores only in favor of
 720         // other atomic stores since we we're going to execute the non-atomic
 721         // one anyway and the atomic one might never have become visible.
 722         if (LastStore) {
 723           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
 724           assert(LastStoreMemInst.isUnordered() &&
 725                  !LastStoreMemInst.isVolatile() &&
 726                  "Violated invariant");
 727           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
 728             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
 729                          << "  due to: " << *Inst << '\n');
 730             LastStore->eraseFromParent();
 731             Changed = true;
 732             ++NumDSE;
 733             LastStore = nullptr;
 734           }
 735           // fallthrough - we can exploit information about this store
 736         }
 737
 738         // Okay, we just invalidated anything we knew about loaded values.  Try
 739         // to salvage *something* by remembering that the stored value is a live
 740         // version of the pointer.  It is safe to forward from volatile stores
 741         // to non-volatile loads, so we don't have to check for volatility of
 742         // the store.
 743         AvailableLoads.insert(
 744             MemInst.getPointerOperand(),
 745             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
 746                       MemInst.isAtomic()));
 747
 748         // Remember that this was the last unordered store we saw for DSE. We
 749         // don't yet handle DSE on ordered or volatile stores since we don't
 750         // have a good way to model the ordering requirement for following
 751         // passes  once the store is removed.  We could insert a fence, but
 752         // since fences are slightly stronger than stores in their ordering,
 753         // it's not clear this is a profitable transform. Another option would
 754         // be to merge the ordering with that of the post dominating store.
 755         if (MemInst.isUnordered() && !MemInst.isVolatile())
 756           LastStore = Inst;
 757         else
 758           LastStore = nullptr;
 759       }
 760     }
 761   }
 762
 763   return Changed;
 764 }
 765
 766 bool EarlyCSE::run() {
 767   // Note, deque is being used here because there is significant performance
 768   // gains over vector when the container becomes very large due to the
 769   // specific access patterns. For more information see the mailing list
 770   // discussion on this:
 771   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
 772   std::deque<StackNode *> nodesToProcess;
 773
 774   bool Changed = false;
 775
 776   // Process the root node.
 777   nodesToProcess.push_back(new StackNode(
 778       AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
 779       DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
 780
 781   // Save the current generation.
 782   unsigned LiveOutGeneration = CurrentGeneration;
 783
 784   // Process the stack.
 785   while (!nodesToProcess.empty()) {
 786     // Grab the first item off the stack. Set the current generation, remove
 787     // the node from the stack, and process it.
 788     StackNode *NodeToProcess = nodesToProcess.back();
 789
 790     // Initialize class members.
 791     CurrentGeneration = NodeToProcess->currentGeneration();
 792
 793     // Check if the node needs to be processed.
 794     if (!NodeToProcess->isProcessed()) {
 795       // Process the node.
 796       Changed |= processNode(NodeToProcess->node());
 797       NodeToProcess->childGeneration(CurrentGeneration);
 798       NodeToProcess->process();
 799     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
 800       // Push the next child onto the stack.
 801       DomTreeNode *child = NodeToProcess->nextChild();
 802       nodesToProcess.push_back(
 803           new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
 804                         NodeToProcess->childGeneration(), child, child->begin(),
 805                         child->end()));
 806     } else {
 807       // It has been processed, and there are no more children to process,
 808       // so delete it and pop it off the stack.
 809       delete NodeToProcess;
 810       nodesToProcess.pop_back();
 811     }
 812   } // while (!nodes...)
 813
 814   // Reset the current generation.
 815   CurrentGeneration = LiveOutGeneration;
 816
 817   return Changed;
 818 }
 819
 820 PreservedAnalyses EarlyCSEPass::run(Function &F,
 821                                     AnalysisManager<Function> *AM) {
 822   auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
 823   auto &TTI = AM->getResult<TargetIRAnalysis>(F);
 824   auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
 825   auto &AC = AM->getResult<AssumptionAnalysis>(F);
 826
 827   EarlyCSE CSE(TLI, TTI, DT, AC);
 828
 829   if (!CSE.run())
 830     return PreservedAnalyses::all();
 831
 832   // CSE preserves the dominator tree because it doesn't mutate the CFG.
 833   // FIXME: Bundle this with other CFG-preservation.
 834   PreservedAnalyses PA;
 835   PA.preserve<DominatorTreeAnalysis>();
 836   return PA;
 837 }
 838
 839 namespace {
 840 /// \brief A simple and fast domtree-based CSE pass.
 841 ///
 842 /// This pass does a simple depth-first walk over the dominator tree,
 843 /// eliminating trivially redundant instructions and using instsimplify to
 844 /// canonicalize things as it goes. It is intended to be fast and catch obvious
 845 /// cases so that instcombine and other passes are more effective. It is
 846 /// expected that a later pass of GVN will catch the interesting/hard cases.
 847 class EarlyCSELegacyPass : public FunctionPass {
 848 public:
 849   static char ID;
 850
 851   EarlyCSELegacyPass() : FunctionPass(ID) {
 852     initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
 853   }
 854
 855   bool runOnFunction(Function &F) override {
 856     if (skipOptnoneFunction(F))
 857       return false;
 858
 859     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 860     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 861     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 862     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 863
 864     EarlyCSE CSE(TLI, TTI, DT, AC);
 865
 866     return CSE.run();
 867   }
 868
 869   void getAnalysisUsage(AnalysisUsage &AU) const override {
 870     AU.addRequired<AssumptionCacheTracker>();
 871     AU.addRequired<DominatorTreeWrapperPass>();
 872     AU.addRequired<TargetLibraryInfoWrapperPass>();
 873     AU.addRequired<TargetTransformInfoWrapperPass>();
 874     AU.addPreserved<GlobalsAAWrapperPass>();
 875     AU.setPreservesCFG();
 876   }
 877 };
 878 }
 879
 880 char EarlyCSELegacyPass::ID = 0;
 881
 882 FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
 883
 884 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
 885                       false)
 886 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 887 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 888 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 889 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 890 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)