Fixes undominating relaxed loads issue
authorPeizhao Ou <peizhaoo@uci.edu>
Tue, 5 Dec 2017 00:19:06 +0000 (16:19 -0800)
committerPeizhao Ou <peizhaoo@uci.edu>
Tue, 5 Dec 2017 00:19:06 +0000 (16:19 -0800)
lib/CodeGen/CodeGenPrepare.cpp
lib/IR/Verifier.cpp

index d7037a9..df49c8d 100644 (file)
@@ -154,10 +154,15 @@ class TypePromotionTransaction;
     /// DataLayout for the Function being processed.
     const DataLayout *DL;
 
+    // XXX-comment:We need DominatorTree to figure out which instruction to
+    // taint.
+    DominatorTree *DT;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
-        : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
+        : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr),
+        DT(nullptr) {
         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
       }
     bool runOnFunction(Function &F) override;
@@ -168,6 +173,7 @@ class TypePromotionTransaction;
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
+      AU.addRequired<DominatorTreeWrapperPass>();
     }
 
   private:
@@ -202,7 +208,10 @@ class TypePromotionTransaction;
 }
 
 char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+                   "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
                    "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
@@ -249,26 +258,29 @@ void recursivelyFindDependence(SetType* DepSet, Value* Val,
     } else if (I->isBinaryOp()) {
       BinaryOperator* I = dyn_cast<BinaryOperator>(Val);
       Value *Op0 = I->getOperand(0), *Op1 = I->getOperand(1);
-      recursivelyFindDependence(DepSet, Op0, Depth - 1);
-      recursivelyFindDependence(DepSet, Op1, Depth - 1);
+      recursivelyFindDependence(DepSet, Op0, InsertOnlyLeafNodes, Depth - 1);
+      recursivelyFindDependence(DepSet, Op1, InsertOnlyLeafNodes, Depth - 1);
     } else if (I->isCast()) {
       Value* Op0 = I->getOperand(0);
-      recursivelyFindDependence(DepSet, Op0, Depth - 1);
+      recursivelyFindDependence(DepSet, Op0, InsertOnlyLeafNodes, Depth - 1);
     } else if (I->getOpcode() == Instruction::Select) {
       Value* Op0 = I->getOperand(0);
       Value* Op1 = I->getOperand(1);
       Value* Op2 = I->getOperand(2);
-      recursivelyFindDependence(DepSet, Op0, Depth - 1);
-      recursivelyFindDependence(DepSet, Op1, Depth - 1);
-      recursivelyFindDependence(DepSet, Op2, Depth - 1);
+      recursivelyFindDependence(DepSet, Op0, InsertOnlyLeafNodes, Depth - 1);
+      recursivelyFindDependence(DepSet, Op1, InsertOnlyLeafNodes, Depth - 1);
+      recursivelyFindDependence(DepSet, Op2, InsertOnlyLeafNodes, Depth - 1);
     } else if (I->getOpcode() == Instruction::GetElementPtr) {
       for (unsigned i = 0; i < I->getNumOperands(); i++) {
-        recursivelyFindDependence(DepSet, I->getOperand(i), Depth - 1);
+        recursivelyFindDependence(DepSet, I->getOperand(i), InsertOnlyLeafNodes,
+                                  Depth - 1);
       }
     } else if (I->getOpcode() == Instruction::Store) {
       auto* SI = dyn_cast<StoreInst>(Val);
-      recursivelyFindDependence(DepSet, SI->getPointerOperand(), Depth - 1);
-      recursivelyFindDependence(DepSet, SI->getValueOperand(), Depth - 1);
+      recursivelyFindDependence(DepSet, SI->getPointerOperand(),
+                                InsertOnlyLeafNodes, Depth - 1);
+      recursivelyFindDependence(DepSet, SI->getValueOperand(),
+                                InsertOnlyLeafNodes, Depth - 1);
     } else {
       Value* Op0 = nullptr;
       Value* Op1 = nullptr;
@@ -277,8 +289,20 @@ void recursivelyFindDependence(SetType* DepSet, Value* Val,
         case Instruction::FCmp: {
           Op0 = I->getOperand(0);
           Op1 = I->getOperand(1);
-          recursivelyFindDependence(DepSet, Op0, Depth - 1);
-          recursivelyFindDependence(DepSet, Op1, Depth - 1);
+          recursivelyFindDependence(DepSet, Op0, InsertOnlyLeafNodes,
+                                    Depth - 1);
+          recursivelyFindDependence(DepSet, Op1, InsertOnlyLeafNodes,
+                                    Depth - 1);
+          break;
+        }
+        case Instruction::PHI: {
+          for (int i = 0; i < I->getNumOperands(); i++) {
+            auto* op = I->getOperand(i);
+            if (DepSet->count(op) == 0) {
+              recursivelyFindDependence(DepSet, I->getOperand(i),
+                                        InsertOnlyLeafNodes, Depth - 1);
+            }
+          }
           break;
         }
         default: {
@@ -515,9 +539,7 @@ Value* getRootDependence(Value* DepVal) {
 // %4 = or i32 %3, %2
 // %5 = inttoptr i32 %4 to i32*
 // store i32 1, i32* %5, align 4
-bool taintStoreAddress(StoreInst* SI, Value* DepVal,
-                       const char* calling_func = __builtin_FUNCTION()) {
-  DEBUG(dbgs() << "Called from " << calling_func << '\n');
+bool taintStoreAddress(StoreInst* SI, Value* DepVal) {
   // Set the insertion point right after the 'DepVal'.
   Instruction* Inst = nullptr;
   IRBuilder<true, NoFolder> Builder(SI);
@@ -535,7 +557,13 @@ bool taintStoreAddress(StoreInst* SI, Value* DepVal,
   // Figure out if there's a root variable 'DepVal' depends on. For example, we
   // can extract "getelementptr inbounds %struct, %struct* %0, i64 0, i32 123"
   // to be "%struct* %0" since all other operands are constant.
-  DepVal = getRootDependence(DepVal);
+  auto* RootVal = getRootDependence(DepVal);
+  auto* RootInst = dyn_cast<Instruction>(RootVal);
+  auto* DepValInst = dyn_cast<Instruction>(DepVal);
+  if (RootInst && DepValInst &&
+      RootInst->getParent() == DepValInst->getParent()) {
+    DepVal = RootVal;
+  }
 
   // Is this already a dependence-tainted store?
   Value* OldDep = getDependence(Address);
@@ -648,8 +676,11 @@ bool ConditionalBranchDependsOnValue(BranchInst* BI, Value* DepVal) {
 // XXX-update: For a relaxed load 'LI', find the first immediate atomic store or
 // the first conditional branch. Returns nullptr if there's no such immediately
 // following store/branch instructions, which we can only enforce the load with
-// 'acquire'.
-Instruction* findFirstStoreCondBranchInst(LoadInst* LI) {
+// 'acquire'. 'ChainedBB' contains all the blocks chained together with
+// unconditional branches from 'BB' to the block with the first store/cond
+// branch.
+template <typename Vector>
+Instruction* findFirstStoreCondBranchInst(LoadInst* LI, Vector* ChainedBB) {
   // In some situations, relaxed loads can be left as is:
   // 1. The relaxed load is used to calculate the address of the immediate
   // following store;
@@ -663,34 +694,47 @@ Instruction* findFirstStoreCondBranchInst(LoadInst* LI) {
   // However, in this function, we don't deal with them directly. Instead, we
   // just find the immediate following store/condition branch and return it.
 
+  assert(ChainedBB != nullptr && "Chained BB should not be nullptr");
   auto* BB = LI->getParent();
+  ChainedBB->push_back(BB);
   auto BE = BB->end();
   auto BBI = BasicBlock::iterator(LI);
   BBI++;
-  for (; BBI != BE; BBI++) {
-    auto* Inst = dyn_cast<Instruction>(&*BBI);
-    if (Inst == nullptr) {
-      continue;
-    }
-    if (Inst->getOpcode() == Instruction::Store) {
-      return Inst;
-    } else if (Inst->getOpcode() == Instruction::Br) {
-      auto* BrInst = dyn_cast<BranchInst>(Inst);
-      if (BrInst->isConditional()) {
+  while (true) {
+    for (; BBI != BE; BBI++) {
+      auto* Inst = dyn_cast<Instruction>(&*BBI);
+      if (Inst == nullptr) {
+        continue;
+      }
+      if (Inst->getOpcode() == Instruction::Store) {
         return Inst;
-      } else {
-        return nullptr;
+      } else if (Inst->getOpcode() == Instruction::Br) {
+        auto* BrInst = dyn_cast<BranchInst>(Inst);
+        if (BrInst->isConditional()) {
+          return Inst;
+        } else {
+          // Reinitialize iterators with the destination of the unconditional
+          // branch.
+          BB = BrInst->getSuccessor(0);
+          ChainedBB->push_back(BB);
+          BBI = BB->begin();
+          BE = BB->end();
+          break;
+        }
       }
     }
+    if (BBI == BE) {
+      return nullptr;
+    }
   }
-  return nullptr;
 }
 
 // XXX-comment: Returns whether the code has been changed.
 bool taintMonotonicLoads(const SmallVector<LoadInst*, 1>& MonotonicLoadInsts) {
   bool Changed = false;
   for (auto* LI : MonotonicLoadInsts) {
-    auto* FirstInst = findFirstStoreCondBranchInst(LI);
+    SmallVector<BasicBlock*, 2> ChainedBB;
+    auto* FirstInst = findFirstStoreCondBranchInst(LI, &ChainedBB);
     if (FirstInst == nullptr) {
       // We don't seem to be able to taint a following store/conditional branch
       // instruction. Simply make it acquire.
@@ -741,38 +785,124 @@ void AddFakeConditionalBranch(Instruction* SplitInst, Value* Condition) {
 }
 
 // Returns true if the code is changed, and false otherwise.
-void TaintRelaxedLoads(LoadInst* LI) {
+void TaintRelaxedLoads(Instruction* UsageInst) {
   // For better performance, we can add a "AND X 0" instruction before the
   // condition.
-  auto* FirstInst = findFirstStoreCondBranchInst(LI);
-  Instruction* InsertPoint = nullptr;
-  if (FirstInst == nullptr) {
-    InsertPoint = LI->getParent()->getTerminator();
-    InsertPoint = LI->getNextNode();
+  auto* BB = UsageInst->getParent();
+  auto* InsertPoint = UsageInst->getNextNode();
+  IRBuilder<true, NoFolder> Builder(InsertPoint);
+  // First thing is to cast 'UsageInst' to an integer type if necessary.
+  Value* AndTarget = nullptr;
+  if (IntegerType::classof(UsageInst->getType())) {
+    AndTarget = UsageInst;
   } else {
-    InsertPoint = LI->getNextNode();
+    Type* TargetIntegerType = IntegerType::get(
+        UsageInst->getContext(),
+        BB->getModule()->getDataLayout().getPointerSizeInBits());
+    AndTarget = createCast(Builder, UsageInst, TargetIntegerType);
   }
-  IRBuilder<true, NoFolder> Builder(InsertPoint);
+
   auto* AndZero = dyn_cast<Instruction>(
-      Builder.CreateAnd(LI, Constant::getNullValue(LI->getType())));
+      Builder.CreateAnd(AndTarget, Constant::getNullValue(AndTarget->getType())));
   auto* FakeCondition = dyn_cast<Instruction>(Builder.CreateICmp(
-      CmpInst::ICMP_NE, AndZero, Constant::getNullValue(LI->getType())));
+      CmpInst::ICMP_NE, AndZero, Constant::getNullValue(AndTarget->getType())));
   AddFakeConditionalBranch(FakeCondition->getNextNode(), FakeCondition);
 }
 
+// XXX-comment: Finds the appropriate Value derived from an atomic load.
+// 'ChainedBB' contains all the blocks chained together with unconditional
+// branches from LI's parent BB to the block with the first store/cond branch.
+// If we don't find any, it means 'LI' is not used at all (which should not
+// happen in practice). We can simply set 'LI' to be acquire just to be safe.
+template <typename Vector>
+Instruction* findMostRecentDependenceUsage(LoadInst* LI, Instruction* LaterInst,
+                                           Vector* ChainedBB,
+                                           DominatorTree* DT) {
+  typedef SmallSet<Instruction*, 8> UsageSet;
+  typedef DenseMap<BasicBlock*, std::unique_ptr<UsageSet>> UsageMap;
+  assert(ChainedBB->size() >= 1 && "ChainedBB must have >=1 size");
+  // Mapping from basic block in 'ChainedBB' to the set of dependence usage of
+  // 'LI' in each block.
+  UsageMap usage_map;
+  auto* LoadBB = LI->getParent();
+  usage_map[LoadBB] = make_unique<UsageSet>();
+  usage_map[LoadBB]->insert(LI);
+
+  for (auto* BB : *ChainedBB) {
+    if (usage_map[BB] == nullptr) {
+      usage_map[BB] = make_unique<UsageSet>();
+    }
+    auto& usage_set = usage_map[BB];
+    if (usage_set->size() == 0) {
+      // The value has not been used.
+      return nullptr;
+    }
+    // Calculate the usage in the current BB first.
+    std::list<Value*> bb_usage_list;
+    std::copy(usage_set->begin(), usage_set->end(),
+              std::back_inserter(bb_usage_list));
+    for (auto list_iter = bb_usage_list.begin();
+         list_iter != bb_usage_list.end(); list_iter++) {
+      auto* val = *list_iter;
+      for (auto* U : val->users()) {
+        Instruction* Inst = nullptr;
+        if (!(Inst = dyn_cast<Instruction>(U))) {
+          continue;
+        }
+        assert(Inst && "Usage value must be an instruction");
+        auto iter =
+            std::find(ChainedBB->begin(), ChainedBB->end(), Inst->getParent());
+        if (iter == ChainedBB->end()) {
+          // Only care about usage within ChainedBB.
+          continue;
+        }
+        auto* UsageBB = *iter;
+        if (UsageBB == BB) {
+          // Current BB.
+          if (!usage_set->count(Inst)) {
+            bb_usage_list.push_back(Inst);
+            usage_set->insert(Inst);
+          }
+        } else {
+          // A following BB.
+          if (usage_map[UsageBB] == nullptr) {
+            usage_map[UsageBB] = make_unique<UsageSet>();
+          }
+          usage_map[UsageBB]->insert(Inst);
+        }
+      }
+    }
+  }
+
+  // Pick one usage that is in LaterInst's block and that dominates 'LaterInst'.
+  auto* LaterBB = LaterInst->getParent();
+  auto& usage_set = usage_map[LaterBB];
+  Instruction* usage_inst = nullptr;
+  for (auto* inst : *usage_set) {
+    if (DT->dominates(inst, LaterInst)) {
+      usage_inst = inst;
+      break;
+    }
+  }
+
+  assert(usage_inst && "The usage instruction in the same block but after the "
+                       "later instruction");
+  return usage_inst;
+}
+
 // XXX-comment: Returns whether the code has been changed.
 bool AddFakeConditionalBranchAfterMonotonicLoads(
-    const SmallVector<LoadInst*, 1>& MonotonicLoadInsts) {
+    const SmallVector<LoadInst*, 1>& MonotonicLoadInsts, DominatorTree* DT) {
   bool Changed = false;
   for (auto* LI : MonotonicLoadInsts) {
-    auto* FirstInst = findFirstStoreCondBranchInst(LI);
+    SmallVector<BasicBlock*, 2> ChainedBB;
+    auto* FirstInst = findFirstStoreCondBranchInst(LI, &ChainedBB);
     if (FirstInst != nullptr) {
       if (FirstInst->getOpcode() == Instruction::Store) {
         if (StoreAddressDependOnValue(dyn_cast<StoreInst>(FirstInst), LI)) {
           continue;
         }
-      } else if (FirstInst->getOpcode() == Instruction::Br &&
-                 isa<BranchInst>(FirstInst)) {
+      } else if (FirstInst->getOpcode() == Instruction::Br) {
         if (ConditionalBranchDependsOnValue(dyn_cast<BranchInst>(FirstInst),
                                             LI)) {
           continue;
@@ -788,11 +918,40 @@ bool AddFakeConditionalBranchAfterMonotonicLoads(
     StoreInst* SI = nullptr;;
     if (FirstInst && (SI = dyn_cast<StoreInst>(FirstInst))) {
       // For immediately coming stores, taint the address of the store.
-      taintStoreAddress(SI, LI);
+      if (SI->getParent() == LI->getParent() || DT->dominates(LI, SI)) {
+        Changed |= taintStoreAddress(SI, LI);
+      } else {
+        auto* Inst =
+            findMostRecentDependenceUsage(LI, FirstInst, &ChainedBB, DT);
+        if (!Inst) {
+          LI->setOrdering(Acquire);
+          Changed = true;
+        } else {
+          Changed |= taintStoreAddress(SI, Inst);
+        }
+      }
     } else {
-      // For immediately coming branch, directly add a fake branch.
-      TaintRelaxedLoads(LI);
-      Changed = true;
+      // No upcoming branch
+      if (!FirstInst) {
+        TaintRelaxedLoads(LI);
+        Changed = true;
+      } else {
+        // For immediately coming branch, directly add a fake branch.
+        if (FirstInst->getParent() == LI->getParent() ||
+            DT->dominates(LI, FirstInst)) {
+          TaintRelaxedLoads(LI);
+          Changed = true;
+        } else {
+          auto* Inst =
+              findMostRecentDependenceUsage(LI, FirstInst, &ChainedBB, DT);
+          if (Inst) {
+            TaintRelaxedLoads(Inst);
+          } else {
+            LI->setOrdering(Acquire);
+          }
+          Changed = true;
+        }
+      }
     }
   }
   return Changed;
@@ -1067,6 +1226,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   OptSize = F.optForSize();
 
   /// This optimization identifies DIV instructions that can be
@@ -1187,7 +1347,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     }
   }
   EverMadeChange |=
-      AddFakeConditionalBranchAfterMonotonicLoads(MonotonicLoadInsts);
+      AddFakeConditionalBranchAfterMonotonicLoads(MonotonicLoadInsts, DT);
 
   return EverMadeChange;
 }
@@ -1282,7 +1442,7 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
       const Instruction *UI = cast<Instruction>(U);
       if (UI->getParent() != DestBB || !isa<PHINode>(UI))
         return false;
-      // If User is inside DestBB block and it is a PHINode then check
+      // IfUser is inside DestBB block and it is a PHINode then check
       // incoming value. If incoming value is not from BB then this is
       // a complex condition (e.g. preheaders) we want to avoid here.
       if (UI->getParent() == DestBB) {
index ba6bbb1..c8f99c5 100644 (file)
@@ -3355,7 +3355,7 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
 
   const Use &U = I.getOperandUse(i);
   if (!(InstsInThisBlock.count(Op) || DT.dominates(Op, U))) {
-    i = *((unsigned*) nullptr);
+    dbgs() << "Problematic function: \n" << *I.getParent()->getParent() << "\n";
   }
   Assert(InstsInThisBlock.count(Op) || DT.dominates(Op, U),
          "Instruction does not dominate all uses!", Op, &I);