Hacks: add "dmb ld" fence before every relaxed store/relaxed RMWs
authorPeizhao Ou <peizhaoo@uci.edu>
Tue, 3 Apr 2018 06:43:48 +0000 (23:43 -0700)
committerPeizhao Ou <peizhaoo@uci.edu>
Tue, 3 Apr 2018 06:43:48 +0000 (23:43 -0700)
lib/CodeGen/AtomicExpandPass.cpp
lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

index 2a0e6af67e4f7977a39c865ac33d50d853845d6b..9647fb9e5d5319c5cce404195d895a8a9dcf4d5a 100644 (file)
@@ -119,41 +119,6 @@ bool AtomicExpand::runOnFunction(Function &F) {
   SmallVector<LoadInst*, 1> MonotonicLoadInsts;
 
   bool MadeChange = false;
-  // XXX-comment: Converts relaxed stores to release stores.
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (I->isAtomic()) {
-      switch (I->getOpcode()) {
-        case Instruction::Store: {
-          auto* SI = dyn_cast<StoreInst>(&*I);
-          if (SI->getOrdering() == Monotonic) {
-            SI->setOrdering(Release);
-            MadeChange = true;
-          }
-          break;
-        }
-        case Instruction::AtomicCmpXchg: {
-          auto* CmpXInst = dyn_cast<AtomicCmpXchgInst>(&*I);
-          if (CmpXInst->getSuccessOrdering() == Monotonic) {
-            CmpXInst->setSuccessOrdering(Release);
-            MadeChange = true;
-          }
-          break;
-        }
-        case Instruction::AtomicRMW: {
-          auto* RMWInst = dyn_cast<AtomicRMWInst>(&*I);
-          if (RMWInst->getOrdering() == Monotonic) {
-            RMWInst->setOrdering(Release);
-            MadeChange = true;
-          }
-          break;
-        }
-        default: {
-          break;
-        }
-      }
-    }
-  }
-
   // Changing control-flow while iterating through it is a bad idea, so gather a
   // list of all atomic instructions before we start.
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
index 43664df3b861ab370621a9fd5d6d7616acf85f02..988bc24ee6e3951c4dddd3012f8806836819c49d 100644 (file)
@@ -1526,6 +1526,76 @@ bool AArch64LoadStoreOpt::tryToMergeLdStInst(
 
 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
                                         bool enableNarrowLdOpt) {
+  // XXX-update: Try to add a 'dmb ld' fence before a relaxed store in the form
+  // of machine code.
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;
+       ++MBBI) {
+    MachineInstr* MI = MBBI;
+    // If this is not a atomic/volatile op, ignore it.
+    if (!MI->hasOrderedMemoryRef()) {
+      continue;
+    }
+    switch (MI->getOpcode()) {
+      // Scaled instructions.
+      case AArch64::STRWui:
+      case AArch64::STRXui:
+      case AArch64::STRBui:
+      case AArch64::STRBBui:
+      case AArch64::STRHui:
+      case AArch64::STRHHui:
+      case AArch64::STRSui:
+      case AArch64::STRDui:
+      case AArch64::STRQui:
+
+      case AArch64::STRBBroW:
+      case AArch64::STRBroW:
+      case AArch64::STRDroW:
+      case AArch64::STRHHroW:
+      case AArch64::STRHroW:
+      case AArch64::STRQroW:
+      case AArch64::STRSroW:
+      case AArch64::STRWroW:
+      case AArch64::STRXroW:
+
+      case AArch64::STRBBroX:
+      case AArch64::STRBroX:
+      case AArch64::STRDroX:
+      case AArch64::STRHHroX:
+      case AArch64::STRHroX:
+      case AArch64::STRQroX:
+      case AArch64::STRSroX:
+      case AArch64::STRWroX:
+      case AArch64::STRXroX:
+
+      case AArch64::STRQpre:
+
+      // Unscaled instructions.
+      case AArch64::STURSi:
+      case AArch64::STURDi:
+      case AArch64::STURQi:
+      case AArch64::STURWi:
+      case AArch64::STURXi:
+
+      // STXR*
+      case AArch64::STXRX:
+      case AArch64::STXRW:
+      case AArch64::STXRB:
+      case AArch64::STXRH:
+      {
+        // XXX-FIXME: This is a hacky number for "DMB LD". We should use the
+        // enum type rather than a constant.
+        unsigned ImmType = 13;
+        MachineInstr* DMBInst = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                        TII->get(AArch64::DMB))
+                                    .addImm(ImmType);
+        (void) DMBInst;
+        DEBUG(dbgs() << "Added barrier instruction\n\t" << *DMBInst
+                     << "\n\tfor " << *MI << "\n");
+      }
+      default: { break; }
+    }
+  }
+
   bool Modified = false;
   // Three tranformations to do here:
   // 1) Find loads that directly read from stores and promote them by