[BDCE] Add a bit-tracking DCE pass

author Hal Finkel <hfinkel@anl.gov>

Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)
author Hal Finkel <hfinkel@anl.gov>
Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h

index 7ad1ad1d05640070d09c105d4e79bd1b76617215..48c19a6e31177b83a3765a411aea5d190efa3464 100644 (file)
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -35,6 +35,9 @@ extern "C" {
  /** See llvm::createAggressiveDCEPass function. */
  void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
  
+/** See llvm::createBitTrackingDCEPass function. */
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
+
  /** See llvm::createAlignmentFromAssumptionsPass function. */
  void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM);
  
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h

index 85162a27f9a74914c73843be27913e7a4e970fdd..363937314861761bd15eba494c069f8e8cecb2af 100644 (file)
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -65,6 +65,7 @@ void initializeTarget(PassRegistry&);
  void initializeAAEvalPass(PassRegistry&);
  void initializeAddDiscriminatorsPass(PassRegistry&);
  void initializeADCEPass(PassRegistry&);
+void initializeBDCEPass(PassRegistry&);
  void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
  void initializeAliasAnalysisCounterPass(PassRegistry&);
  void initializeAliasDebuggerPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h

index 4b1858b5365ff9bb9ed48f21e8e9b0eb3b2b9ac2..18343e29fa925cc135fd0215b2464f58bc0ec1c8 100644 (file)
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -49,6 +49,7 @@ namespace {
  
        (void) llvm::createAAEvalPass();
        (void) llvm::createAggressiveDCEPass();
+      (void) llvm::createBitTrackingDCEPass();
        (void) llvm::createAliasAnalysisCounterPass();
        (void) llvm::createAliasDebugger();
        (void) llvm::createArgumentPromotionPass();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h

index 287f530aa7374eeee35eb6d75e18a793654298a7..3bf5f9496da6f8d001fdaa14c170ad57b8757fad 100644 (file)
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -80,6 +80,13 @@ FunctionPass *createDeadStoreEliminationPass();
  //
  FunctionPass *createAggressiveDCEPass();
  
+//===----------------------------------------------------------------------===//
+//
+// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
+// remove computations of dead bits.
+//
+FunctionPass *createBitTrackingDCEPass();
+
  //===----------------------------------------------------------------------===//
  //
  // SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp

index 00b8c7cbbd7c87e43ee3ad52fbdd35a94a340a02..9e34a4ea07159646f683737fd28e22ce25e7c087 100644 (file)
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -252,6 +252,11 @@ void PassManagerBuilder::populateModulePassManager(
    MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
    MPM.add(createSCCPPass());                  // Constant prop with SCCP
  
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
+
    // Run instcombine after redundancy elimination to exploit opportunities
    // opened up by them.
    MPM.add(createInstructionCombiningPass());
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp

new file mode 100644 (file)

index 0000000..afe0cd1
--- /dev/null
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -0,0 +1,408 @@
+//===---- BDCE.cpp - Bit-tracking dead code elimination -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Bit-Tracking Dead Code Elimination pass. Some
+// instructions (shifts, some ands, ors, etc.) kill some of their input bits.
+// We track these dead bits and remove instructions that compute only these
+// dead bits.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bdce"
+
+STATISTIC(NumRemoved, "Number of instructions removed (unused)");
+STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
+
+namespace {
+struct BDCE : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  BDCE() : FunctionPass(ID) {
+    initializeBDCEPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function& F) override;
+
+  void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+  }
+
+  void determineLiveOperandBits(const Instruction *UserI,
+                                const Instruction *I, unsigned OperandNo,
+                                const APInt &AOut, APInt &AB,
+                                APInt &KnownZero, APInt &KnownOne,
+                                APInt &KnownZero2, APInt &KnownOne2);
+
+  AssumptionCache *AC;
+  const DataLayout *DL;
+  DominatorTree *DT;
+};
+}
+
+char BDCE::ID = 0;
+INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
+                    false, false)
+
+static bool isAlwaysLive(Instruction *I) {
+  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+         isa<LandingPadInst>(I) || I->mayHaveSideEffects();
+}
+
+void BDCE::determineLiveOperandBits(const Instruction *UserI,
+                                    const Instruction *I, unsigned OperandNo,
+                                    const APInt &AOut, APInt &AB,
+                                    APInt &KnownZero, APInt &KnownOne,
+                                    APInt &KnownZero2, APInt &KnownOne2) {
+  unsigned BitWidth = AB.getBitWidth();
+
+  // We're called once per operand, but for some instructions, we need to
+  // compute known bits of both operands in order to determine the live bits of
+  // either (when both operands are instructions themselves). We don't,
+  // however, want to do this twice, so we cache the result in APInts that live
+  // in the caller. For the two-relevant-operands case, both operand values are
+  // provided here.
+  auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1,
+                              const Value *V2) {
+    KnownZero = APInt(BitWidth, 0);
+    KnownOne =  APInt(BitWidth, 0);
+    computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC,
+                     UserI, DT);
+
+    if (V2) {
+      KnownZero2 = APInt(BitWidth, 0);
+      KnownOne2 =  APInt(BitWidth, 0);
+      computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC,
+                       UserI, DT);
+    }
+  };
+
+  switch (UserI->getOpcode()) {
+  default: break;
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap:
+        // The alive bits of the input are the swapped alive bits of
+        // the output.
+        AB = AOut.byteSwap();
+        break;
+      case Intrinsic::ctlz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the left of, and including, the leftmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getHighBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+        }
+        break;
+      case Intrinsic::cttz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the right of, and including, the rightmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getLowBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+        }
+        break;
+      }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+    // Find the highest live output bit. We don't need any more input
+    // bits than that (adds, and thus subtracts, ripple only to the
+    // left).
+    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+    break;
+  case Instruction::Shl:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.lshr(ShiftAmt);
+
+        // If the shift is nuw/nsw, then the high bits are not dead
+        // (because we've promised that they *must* be zero).
+        const ShlOperator *S = cast<ShlOperator>(UserI);
+        if (S->hasNoSignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+        else if (S->hasNoUnsignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::LShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<LShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::AShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+        // Because the high input bit is replicated into the
+        // high-order bits of the result, if we need any of those
+        // bits, then we must keep the highest input bit.
+        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+            .getBoolValue())
+          AB.setBit(BitWidth-1);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<AShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::And:
+    AB = AOut;
+
+    // For bits that are known zero, the corresponding bits in the
+    // other operand are dead (unless they're both zero, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownZero2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownZero & ~KnownZero2);
+    }
+    break;
+  case Instruction::Or:
+    AB = AOut;
+
+    // For bits that are known one, the corresponding bits in the
+    // other operand are dead (unless they're both one, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownOne2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownOne & ~KnownOne2);
+    }
+    break;
+  case Instruction::Xor:
+  case Instruction::PHI:
+    AB = AOut;
+    break;
+  case Instruction::Trunc:
+    AB = AOut.zext(BitWidth);
+    break;
+  case Instruction::ZExt:
+    AB = AOut.trunc(BitWidth);
+    break;
+  case Instruction::SExt:
+    AB = AOut.trunc(BitWidth);
+    // Because the high input bit is replicated into the
+    // high-order bits of the result, if we need any of those
+    // bits, then we must keep the highest input bit.
+    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+                                      AOut.getBitWidth() - BitWidth))
+        .getBoolValue())
+      AB.setBit(BitWidth-1);
+    break;
+  case Instruction::Select:
+    if (OperandNo != 0)
+      AB = AOut;
+    break;
+  }
+}
+
+bool BDCE::runOnFunction(Function& F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  DL = F.getParent()->getDataLayout();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  DenseMap<Instruction *, APInt> AliveBits;
+  SmallVector<Instruction*, 128> Worklist;
+
+  // The set of visited instructions (non-integer-typed only).
+  SmallPtrSet<Instruction*, 128> Visited;
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : inst_range(F)) {
+    if (!isAlwaysLive(&I))
+      continue;
+
+    // For integer-valued instructions, set up an initial empty set of alive
+    // bits and add the instruction to the work list. For other instructions
+    // add their operands to the work list (for integer values operands, mark
+    // all bits as live).
+    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+      AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+      Worklist.push_back(&I);
+      continue;
+    }
+
+    // Non-integer-typed instructions...
+    for (Use &OI : I.operands()) {
+      if (Instruction *J = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Worklist.push_back(J);
+      }
+    }
+    // To save memory, we don't add I to the Visited set here. Instead, we
+    // check isAlwaysLive on every instruction when searching for dead
+    // instructions later (we need to check isAlwaysLive for the
+    // integer-typed instructions anyway).
+  }
+
+  // Propagate liveness backwards to operands.
+  while (!Worklist.empty()) {
+    Instruction *UserI = Worklist.pop_back_val();
+
+    DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
+    APInt AOut;
+    if (UserI->getType()->isIntegerTy()) {
+      AOut = AliveBits[UserI];
+      DEBUG(dbgs() << " Alive Out: " << AOut);
+    }
+    DEBUG(dbgs() << "\n");
+
+    if (!UserI->getType()->isIntegerTy())
+      Visited.insert(UserI);
+
+    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+    // Compute the set of alive bits for each operand. These are anded into the
+    // existing set, if any, and if that changes the set of alive bits, the
+    // operand is added to the work-list.
+    for (Use &OI : UserI->operands()) {
+      if (Instruction *I = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+          unsigned BitWidth = IT->getBitWidth();
+          APInt AB = APInt::getAllOnesValue(BitWidth);
+          if (UserI->getType()->isIntegerTy() && !AOut &&
+              !isAlwaysLive(UserI)) {
+            AB = APInt(BitWidth, 0);
+          } else {
+            // If all bits of the output are dead, then all bits of the input 
+            // Bits of each operand that are used to compute alive bits of the
+            // output are alive, all others are dead.
+            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+                                     KnownZero, KnownOne,
+                                     KnownZero2, KnownOne2);
+          }
+
+          // If we've added to the set of alive bits (or the operand has not
+          // been previously visited), then re-queue the operand to be visited
+          // again.
+          APInt ABPrev(BitWidth, 0);
+          auto ABI = AliveBits.find(I);
+          if (ABI != AliveBits.end())
+            ABPrev = ABI->second;
+
+          APInt ABNew = AB | ABPrev;
+          if (ABNew != ABPrev || ABI == AliveBits.end()) {
+            AliveBits[I] = std::move(ABNew);
+            Worklist.push_back(I);
+          }
+        } else if (!Visited.count(I)) {
+          Worklist.push_back(I);
+        }
+      }
+    }
+  }
+
+  bool Changed = false;
+  // The inverse of the live set is the dead set.  These are those instructions
+  // which have no side effects and do not influence the control flow or return
+  // value of the function, and may therefore be deleted safely.
+  // NOTE: We reuse the Worklist vector here for memory efficiency.
+  for (Instruction &I : inst_range(F)) {
+    // For live instructions that have all dead bits, first make them dead by
+    // replacing all uses with something else. Then, if they don't need to
+    // remain live (because they have side effects, etc.) we can remove them.
+    if (I.getType()->isIntegerTy()) {
+      auto ABI = AliveBits.find(&I);
+      if (ABI != AliveBits.end()) {
+        if (ABI->second.getBoolValue())
+          continue;
+
+        DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+        // FIXME: In theory we could substitute undef here instead of zero.
+        // This should be reconsidered once we settle on the semantics of
+        // undef, poison, etc.
+        Value *Zero = ConstantInt::get(I.getType(), 0);
+        ++NumSimplified;
+        I.replaceAllUsesWith(Zero);
+        Changed = true;
+      }
+    } else if (Visited.count(&I)) {
+      continue;
+    }
+
+    if (isAlwaysLive(&I))
+      continue;
+
+    DEBUG(dbgs() << "BDCE: Removing: " << I << " (unused)\n");
+    Worklist.push_back(&I);
+    I.dropAllReferences();
+    Changed = true;
+  }
+
+  for (Instruction *&I : Worklist) {
+    ++NumRemoved;
+    I->eraseFromParent();
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createBitTrackingDCEPass() {
+  return new BDCE();
+}
+
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt

index 1f381e6d2ecb83940c7a0b06b233f82a3f6d2036..4d2b18e7afbe3cea7030e37cc31848abc696c5bd 100644 (file)
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,6 +1,7 @@
  add_llvm_library(LLVMScalarOpts
    ADCE.cpp
    AlignmentFromAssumptions.cpp
+  BDCE.cpp
    ConstantHoisting.cpp
    ConstantProp.cpp
    CorrelatedValuePropagation.cpp
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp

index 653b846f6b611245c34531a6bda5459465e7d056..f0728dccd24f141412c7fbdce476eb59b769918d 100644 (file)
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
  /// ScalarOpts library.
  void llvm::initializeScalarOpts(PassRegistry &Registry) {
    initializeADCEPass(Registry);
+  initializeBDCEPass(Registry);
    initializeAlignmentFromAssumptionsPass(Registry);
    initializeSampleProfileLoaderPass(Registry);
    initializeConstantHoistingPass(Registry);
@@ -83,6 +84,10 @@ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
    unwrap(PM)->add(createAggressiveDCEPass());
  }
  
+void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBitTrackingDCEPass());
+}
+
  void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
    unwrap(PM)->add(createAlignmentFromAssumptionsPass());
  }
diff --git a/test/Transforms/BDCE/basic.ll b/test/Transforms/BDCE/basic.ll

new file mode 100644 (file)

index 0000000..6e748c6
--- /dev/null
+++ b/test/Transforms/BDCE/basic.ll
@@ -0,0 +1,348 @@
+; RUN: opt -S -bdce -instsimplify < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s -check-prefix=CHECK-IO
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @bar(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 4
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 8
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %shr = ashr i32 %or15, 4
+  ret i32 %shr
+
+; CHECK-LABEL: @bar
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+
+; Check that instsimplify is not doing this all on its own.
+; CHECK-IO-LABEL: @bar
+; CHECK-IO: tail call signext i32 @foo(i32 signext 5)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 3)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 2)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 1)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 0)
+; CHECK-IO: tail call signext i32 @foo(i32 signext 4)
+; CHECK-IO: ret i32
+}
+
+; Function Attrs: nounwind readnone
+declare signext i32 @foo(i32 signext) #0
+
+; Function Attrs: nounwind readnone
+define signext i32 @far(i32 signext %x) #1 {
+entry:
+  %call = tail call signext i32 @goo(i32 signext 5) #1
+  %and = and i32 %call, 4
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @goo(i32 signext 3) #1
+  %and2 = and i32 %call1, 8
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @goo(i32 signext 2) #1
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @goo(i32 signext 1) #1
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @goo(i32 signext 0) #1
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @goo(i32 signext 4) #1
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %shr = ashr i32 %or15, 4
+  ret i32 %shr
+
+; CHECK-LABEL: @far
+; Calls to foo(5) and foo(3) are still there, but their results are not used.
+; CHECK: tail call signext i32 @goo(i32 signext 5)
+; CHECK-NEXT: tail call signext i32 @goo(i32 signext 3)
+; CHECK-NEXT: tail call signext i32 @goo(i32 signext 2)
+; CHECK: tail call signext i32 @goo(i32 signext 1)
+; CHECK: tail call signext i32 @goo(i32 signext 0)
+; CHECK: tail call signext i32 @goo(i32 signext 4)
+; CHECK: ret i32
+
+; Check that instsimplify is not doing this all on its own.
+; CHECK-IO-LABEL: @far
+; CHECK-IO: tail call signext i32 @goo(i32 signext 5)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 3)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 2)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 1)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 0)
+; CHECK-IO: tail call signext i32 @goo(i32 signext 4)
+; CHECK-IO: ret i32
+}
+
+declare signext i32 @goo(i32 signext) #1
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar1(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %bs = tail call i32 @llvm.bswap.i32(i32 %or15) #0
+  %shr = ashr i32 %bs, 4
+  ret i32 %shr
+
+; CHECK-LABEL: @tar1
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #0
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar2(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %shl = shl i32 %or15, 10
+  ret i32 %shl
+
+; CHECK-LABEL: @tar2
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar3(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %add = add i32 %or15, 5
+  %shl = shl i32 %add, 10
+  ret i32 %shl
+
+; CHECK-LABEL: @tar3
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar4(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %sub = sub i32 %or15, 5
+  %shl = shl i32 %sub, 10
+  ret i32 %shl
+
+; CHECK-LABEL: @tar4
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar5(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %xor = xor i32 %or15, 5
+  %shl = shl i32 %xor, 10
+  ret i32 %shl
+
+; CHECK-LABEL: @tar5
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @tar7(i32 signext %x, i1 %b) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %v = select i1 %b, i32 %or15, i32 5
+  %shl = shl i32 %v, 10
+  ret i32 %shl
+
+; CHECK-LABEL: @tar7
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i32
+}
+
+; Function Attrs: nounwind readnone
+define signext i16 @tar8(i32 signext %x) #0 {
+entry:
+  %call = tail call signext i32 @foo(i32 signext 5) #0
+  %and = and i32 %call, 33554432
+  %or = or i32 %and, %x
+  %call1 = tail call signext i32 @foo(i32 signext 3) #0
+  %and2 = and i32 %call1, 67108864
+  %or3 = or i32 %or, %and2
+  %call4 = tail call signext i32 @foo(i32 signext 2) #0
+  %and5 = and i32 %call4, 16
+  %or6 = or i32 %or3, %and5
+  %call7 = tail call signext i32 @foo(i32 signext 1) #0
+  %and8 = and i32 %call7, 32
+  %or9 = or i32 %or6, %and8
+  %call10 = tail call signext i32 @foo(i32 signext 0) #0
+  %and11 = and i32 %call10, 64
+  %or12 = or i32 %or9, %and11
+  %call13 = tail call signext i32 @foo(i32 signext 4) #0
+  %and14 = and i32 %call13, 128
+  %or15 = or i32 %or12, %and14
+  %tr = trunc i32 %or15 to i16
+  ret i16 %tr
+
+; CHECK-LABEL: @tar8
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 5)
+; CHECK-NOT: tail call signext i32 @foo(i32 signext 3)
+; CHECK: tail call signext i32 @foo(i32 signext 2)
+; CHECK: tail call signext i32 @foo(i32 signext 1)
+; CHECK: tail call signext i32 @foo(i32 signext 0)
+; CHECK: tail call signext i32 @foo(i32 signext 4)
+; CHECK: ret i16
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/BDCE/dce-pure.ll b/test/Transforms/BDCE/dce-pure.ll

new file mode 100644 (file)

index 0000000..6a432fc
--- /dev/null
+++ b/test/Transforms/BDCE/dce-pure.ll
@@ -0,0 +1,33 @@
+; RUN: opt -bdce -S < %s | FileCheck %s
+
+declare i32 @strlen(i8*) readonly nounwind
+
+define void @test1() {
+  call i32 @strlen( i8* null )
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: call
+; CHECK: ret void
+}
+
+define i32 @test2() {
+  ; invoke of pure function should not be deleted!
+  invoke i32 @strlen( i8* null ) readnone
+                  to label %Cont unwind label %Other
+
+Cont:           ; preds = %0
+  ret i32 0
+
+Other:          ; preds = %0
+   %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret i32 1
+
+; CHECK-LABEL: @test2
+; CHECK: invoke
+; CHECK: ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+
author	Hal Finkel <hfinkel@anl.gov>
	Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Tue, 17 Feb 2015 01:36:59 +0000 (01:36 +0000)
include/llvm-c/Transforms/Scalar.h		patch \| blob \| history
include/llvm/InitializePasses.h		patch \| blob \| history
include/llvm/LinkAllPasses.h		patch \| blob \| history
include/llvm/Transforms/Scalar.h		patch \| blob \| history
lib/Transforms/IPO/PassManagerBuilder.cpp		patch \| blob \| history
lib/Transforms/Scalar/BDCE.cpp	[new file with mode: 0644]	patch \| blob
lib/Transforms/Scalar/CMakeLists.txt		patch \| blob \| history
lib/Transforms/Scalar/Scalar.cpp		patch \| blob \| history
test/Transforms/BDCE/basic.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/BDCE/dce-pure.ll	[new file with mode: 0644]	patch \| blob