From 71ccd4f786cc6396cc035ee0add95eebbdcbc48e Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Fri, 23 Oct 2015 20:37:08 +0000
Subject: [PATCH] Handle non-constant shifts in computeKnownBits, and use
 computeKnownBits for constant folding in InstCombine/Simplify

First, the motivation: LLVM currently does not realize that:

  ((2072 >> (L == 0)) >> 7) & 1 == 0

where L is some arbitrary value. Whether you right-shift 2072 by 7 or by 8, the
lowest-order bit is always zero. There are obviously several ways to go about
fixing this, but the generic solution pursued in this patch is to teach
computeKnownBits something about shifts by a non-constant amount. Previously,
we would give up completely on these. Instead, in cases where we know something
about the low-order bits of the shift-amount operand, we can combine (and
together) the associated restrictions for all shift amounts consistent with
that knowledge. As a further generalization, I refactored all of the logic for
all three kinds of shifts to have this capability. This works well in the above
case, for example, because the dynamic shift amount can only be 0 or 1, and
thus we can say a lot about the known bits of the result.

This brings us to the second part of this change: Even when we know all of the
bits of a value via computeKnownBits, nothing used to constant-fold the result.
This introduces the necessary code into InstCombine and InstSimplify. I've
added it into both because:

  1. InstCombine won't automatically pick up the associated logic in
     InstSimplify (InstCombine uses InstSimplify, but not via the API that
     passes in the original instruction).

  2. Putting the logic in InstCombine allows the resulting simplifications to become
     part of the iterative worklist

  3. Putting the logic in InstSimplify allows the resulting simplifications to be
     used by everywhere else that calls SimplifyInstruction (inlining, unrolling,
     and many others).

And this requires a small change to our definition of an ephemeral value so
that we don't break the rest case from r246696 (where the icmp feeding the
@llvm.assume, is also feeding a br). Under the old definition, the icmp would
not be considered ephemeral (because it is used by the br), but this causes the
assume to remove itself (in addition to simplifying the branch structure), and
it seems more-useful to prevent that from happening.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251146 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Analysis/InstructionSimplify.cpp          |  11 ++
 lib/Analysis/ValueTracking.cpp                | 145 ++++++++++++++----
 .../InstCombine/InstructionCombining.cpp      |  21 +++
 test/Transforms/InstCombine/all-bits-shift.ll |  46 ++++++
 test/Transforms/InstCombine/div.ll            |   4 +-
 .../InstCombine/load-combine-metadata.ll      |   6 +-
 test/Transforms/InstSimplify/shift-128-kb.ll  |  22 +++
 7 files changed, 215 insertions(+), 40 deletions(-)
 create mode 100644 test/Transforms/InstCombine/all-bits-shift.ll
 create mode 100644 test/Transforms/InstSimplify/shift-128-kb.ll

diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index be5ce2960ea..cf6aa9a1163 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -4145,6 +4145,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
   }
 
+  // In general, it is possible for computeKnownBits to determine all bits in a
+  // value even when the operands are not all constants.
+  if (!Result && I->getType()->isIntegerTy()) {
+    unsigned BitWidth = I->getType()->getScalarSizeInBits();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+    if ((KnownZero | KnownOne).isAllOnesValue())
+      Result = ConstantInt::get(I->getContext(), KnownOne);
+  }
+
   /// If called on unreachable code, the above logic may report that the
   /// instruction simplified to itself.  Make life easier for users by
   /// detecting that case here, returning a safe value instead.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index ad6be85f87a..ea2321e832e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -393,6 +393,12 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
   SmallPtrSet<const Value *, 32> Visited;
   SmallPtrSet<const Value *, 16> EphValues;
 
+  // The instruction defining an assumption's condition itself is always
+  // considered ephemeral to that assumption (even if it has other
+  // non-ephemeral users). See r246696's test case for an example.
+  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+    return true;
+
   while (!WorkSet.empty()) {
     const Value *V = WorkSet.pop_back_val();
     if (!Visited.insert(V).second)
@@ -967,6 +973,71 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
   }
 }
 
+// Compute known bits from a shift operator, including those with a
+// non-constant shift amount. KnownZero and KnownOne are the outputs of this
+// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
+// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
+// functors that, given the known-zero or known-one bits respectively, and a
+// shift amount, compute the implied known-zero or known-one bits of the shift
+// operator's result respectively for that shift amount. The results from calling
+// KZF and KOF are conservatively combined for all permitted shift amounts.
+template <typename KZFunctor, typename KOFunctor>
+static void computeKnownBitsFromShiftOperator(Operator *I,
+              APInt &KnownZero, APInt &KnownOne,
+              APInt &KnownZero2, APInt &KnownOne2,
+              const DataLayout &DL, unsigned Depth, const Query &Q,
+              KZFunctor KZF, KOFunctor KOF) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    KnownZero = KZF(KnownZero, ShiftAmt);
+    KnownOne  = KOF(KnownOne, ShiftAmt);
+    return;
+  }
+
+  computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+
+  // Note: We cannot use KnownZero.getLimitedValue() here, because if
+  // BitWidth > 64 and any upper bits are known, we'll end up returning the
+  // limit value (which implies all bits are known).
+  uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
+  uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+
+  // It would be more-clearly correct to use the two temporaries for this
+  // calculation. Reusing the APInts here to prevent unnecessary allocations.
+  KnownZero.clearAllBits(), KnownOne.clearAllBits();
+
+  // Early exit if we can't constrain any well-defined shift amount.
+  if (!(ShiftAmtKZ & (BitWidth-1)) && !(ShiftAmtKO & (BitWidth-1)))
+    return;
+
+  computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
+    // Combine the shifted known input bits only for those shift amounts
+    // compatible with its known constraints.
+    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
+      continue;
+    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
+      continue;
+
+    KnownZero &= KZF(KnownZero2, ShiftAmt);
+    KnownOne  &= KOF(KnownOne2, ShiftAmt);
+  }
+
+  // If there are no compatible shift amounts, then we've proven that the shift
+  // amount must be >= the BitWidth, and the result is undefined. We could
+  // return anything we'd like, but we need to make sure the sets of known bits
+  // stay disjoint (it should be better for some other code to actually
+  // propagate the undef than to pick a value here using known bits).
+  if ((KnownZero & KnownOne) != 0)
+    KnownZero.clearAllBits(), KnownOne.clearAllBits();
+}
+
 static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
                                          APInt &KnownOne, const DataLayout &DL,
                                          unsigned Depth, const Query &Q) {
@@ -1104,48 +1175,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
       KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     break;
   }
-  case Instruction::Shl:
+  case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero <<= ShiftAmt;
-      KnownOne  <<= ShiftAmt;
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
-    }
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return (KnownZero << ShiftAmt) |
+             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return KnownOne << ShiftAmt;
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
-  case Instruction::LShr:
+  }
+  case Instruction::LShr: {
     // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-
-      // Unsigned shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-      // high bits known zero.
-      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-    }
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownZero, ShiftAmt) |
+             // High bits known zero.
+             APInt::getHighBitsSet(BitWidth, ShiftAmt);
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownOne, ShiftAmt);
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
-  case Instruction::AShr:
+  }
+  case Instruction::AShr: {
     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownZero, ShiftAmt);
+    };
 
-      // Signed shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownOne, ShiftAmt);
+    };
 
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
-        KnownZero |= HighBits;
-      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
-        KnownOne |= HighBits;
-    }
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
+  }
   case Instruction::Sub: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9b19732acfc..69a2661f031 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2734,6 +2734,27 @@ bool InstCombiner::run() {
       }
     }
 
+    // In general, it is possible for computeKnownBits to determine all bits in a
+    // value even when the operands are not all constants.
+    if (!I->use_empty() && I->getType()->isIntegerTy()) {
+      unsigned BitWidth = I->getType()->getScalarSizeInBits();
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
+      if ((KnownZero | KnownOne).isAllOnesValue()) {
+        Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+        DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
+                        " from: " << *I << '\n');
+
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
+    }
+
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
       BasicBlock *BB = I->getParent();
diff --git a/test/Transforms/InstCombine/all-bits-shift.ll b/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 00000000000..b9eb19cf2ad
--- /dev/null
+++ b/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32, i32* @a, align 4
+  %lnot = icmp eq i32 %1, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %shr.i = lshr i32 2072, %lnot.ext
+  %call.lobit = lshr i32 %shr.i, 7
+  %2 = and i32 %call.lobit, 1
+  %3 = load i32, i32* %0, align 4
+  %or = or i32 %2, %3
+  store i32 %or, i32* %0, align 4
+  %4 = load i32, i32* @a, align 4
+  %lnot.1 = icmp eq i32 %4, 0
+  %lnot.ext.1 = zext i1 %lnot.1 to i32
+  %shr.i.1 = lshr i32 2072, %lnot.ext.1
+  %call.lobit.1 = lshr i32 %shr.i.1, 7
+  %5 = and i32 %call.lobit.1, 1
+  %or.1 = or i32 %5, %or
+  store i32 %or.1, i32* %0, align 4
+  ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+;   ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 3194c015fd7..27a316113e5 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) {
   %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
   ret <2 x i32> %div
 ; CHECK-LABEL: @test31(
-; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
-; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
 }
 
 define i32 @test32(i32 %a, i32 %b) {
diff --git a/test/Transforms/InstCombine/load-combine-metadata.ll b/test/Transforms/InstCombine/load-combine-metadata.ll
index 9b9c1fe607b..24b26fa4213 100644
--- a/test/Transforms/InstCombine/load-combine-metadata.ll
+++ b/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
   ret void
 }
 
-; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9}
-!0 = !{ i32 0, i32 1 }
-!1 = !{ i32 8, i32 9 }
+; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
 !2 = !{!2}
 !3 = !{!3, !2}
 !4 = !{!4, !2}
diff --git a/test/Transforms/InstSimplify/shift-128-kb.ll b/test/Transforms/InstSimplify/shift-128-kb.ll
new file mode 100644
index 00000000000..3f69ecccaf5
--- /dev/null
+++ b/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+entry:
+  %conv = zext i32 %IntegerBitWidth to i64
+  %sub = sub i64 128, %conv
+  %conv1 = trunc i64 %sub to i32
+  %conv2 = zext i64 %Val to i128
+  %sh_prom = zext i32 %conv1 to i128
+  %shl = shl i128 %conv2, %sh_prom
+  %shr = ashr i128 %shl, %sh_prom
+  %cmp = icmp slt i128 %shr, 0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @_Z10isNegativemj
+; CHECK-NOT: ret i1 false
+; CHECK: ret i1 %cmp
+
-- 
2.34.1