From c52d6d530c80207b6cc33633b901d3e332ab34b6 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 14 Oct 2015 22:42:12 +0000 Subject: [PATCH] Tighten known bits for ctpop based on zero input bits This is a cleaned up patch from the one written by John Regehr based on the findings of the Souper superoptimizer. The basic idea here is that input bits that are known zero reduce the maximum count that the intrinsic could return. We know that the number of bits required to represent a particular count is at most log2(N)+1. Differential Revision: http://reviews.llvm.org/D13253 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 14 +++++++-- test/Transforms/InstCombine/ctpop.ll | 45 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/InstCombine/ctpop.ll diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 5caffee8fe7..d2a1f8737cd 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1375,8 +1375,18 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, + Depth + 1, Q); + // We can bound the space the count needs. Also, bits known to be zero + // can't contribute to the population. + unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); + unsigned LeadingZeros = + APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); + assert(LeadingZeros >= 0 && LeadingZeros <= BitWidth); + KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); + KnownOne &= ~KnownZero; + // TODO: we could bound KnownOne using the lower bound on the number + // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::fabs: { diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll new file mode 100644 index 00000000000..38612c92aaa --- /dev/null +++ b/test/Transforms/InstCombine/ctpop.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -S -instcombine | FileCheck %s + +declare i32 @llvm.ctpop.i32(i32) +declare i8 @llvm.ctpop.i8(i8) +declare void @llvm.assume(i1) + +define i1 @test1(i32 %arg) { +; CHECK: @test1 +; CHECK: ret i1 false + %and = and i32 %arg, 15 + %cnt = call i32 @llvm.ctpop.i32(i32 %and) + %res = icmp eq i32 %cnt, 9 + ret i1 %res +} + +define i1 @test2(i32 %arg) { +; CHECK: @test2 +; CHECK: ret i1 false + %and = and i32 %arg, 1 + %cnt = call i32 @llvm.ctpop.i32(i32 %and) + %res = icmp eq i32 %cnt, 2 + ret i1 %res +} + +define i1 @test3(i32 %arg) { +; CHECK: @test3 +; CHECK: ret i1 false + ;; Use an assume to make all the bits known without triggering constant + ;; folding. This is trying to hit a corner case where we have to avoid + ;; taking the log of 0. + %assume = icmp eq i32 %arg, 0 + call void @llvm.assume(i1 %assume) + %cnt = call i32 @llvm.ctpop.i32(i32 %arg) + %res = icmp eq i32 %cnt, 2 + ret i1 %res +} + +; Negative test for when we know nothing +define i1 @test4(i8 %arg) { +; CHECK: @test4 +; CHECK: ret i1 %res + %cnt = call i8 @llvm.ctpop.i8(i8 %arg) + %res = icmp eq i8 %cnt, 2 + ret i1 %res +} -- 2.34.1