From ee1b45f2a2dbe7e1ecfdb0cf6c4e0c22eb3f18ae Mon Sep 17 00:00:00 2001 From: Yi Jiang Date: Wed, 20 Aug 2014 22:55:40 +0000 Subject: [PATCH] New InstCombine pattern: (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) to (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) under certain condition git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216135 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAndOrXor.cpp | 55 +++++++++++++++++++ test/Transforms/InstCombine/or.ll | 26 +++++++++ 2 files changed, 81 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index dbc2175196a..743e20b294f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1612,6 +1612,61 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } + // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) + // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) + // The original condition actually refers to the following two ranges: + // [MAX_UINT-C1+1, MAX_UINT-C1+1+C3] and [MAX_UINT-C2+1, MAX_UINT-C2+1+C3] + // We can fold these two ranges if: + // 1) C1 and C2 is unsigned greater than C3. + // 2) The two ranges are separated. + // 3) C1 ^ C2 is one-bit mask. + // 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask. + // This implies all values in the two ranges differ by exactly one bit. + + if ((LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_ULE) && + LHSCC == RHSCC && LHSCst && RHSCst && LHS->hasOneUse() && + RHS->hasOneUse() && LHSCst->getType() == RHSCst->getType() && + LHSCst->getValue() == (RHSCst->getValue())) { + + Value *LAdd = LHS->getOperand(0); + Value *RAdd = RHS->getOperand(0); + + Value *LAddOpnd, *RAddOpnd; + ConstantInt *LAddCst, *RAddCst; + if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddCst))) && + match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddCst))) && + LAddCst->getValue().ugt(LHSCst->getValue()) && + RAddCst->getValue().ugt(LHSCst->getValue())) { + + APInt DiffCst = LAddCst->getValue() ^ RAddCst->getValue(); + if (LAddOpnd == RAddOpnd && DiffCst.isPowerOf2()) { + ConstantInt *MaxAddCst = nullptr; + if (LAddCst->getValue().ult(RAddCst->getValue())) + MaxAddCst = RAddCst; + else + MaxAddCst = LAddCst; + + APInt RRangeLow = -RAddCst->getValue(); + APInt RRangeHigh = RRangeLow + LHSCst->getValue(); + APInt LRangeLow = -LAddCst->getValue(); + APInt LRangeHigh = LRangeLow + LHSCst->getValue(); + APInt LowRangeDiff = RRangeLow ^ LRangeLow; + APInt HighRangeDiff = RRangeHigh ^ LRangeHigh; + APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow + : RRangeLow - LRangeLow; + + if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff && + RangeDiff.ugt(LHSCst->getValue())) { + Value *MaskCst = ConstantInt::get(LAddCst->getType(), ~DiffCst); + + Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskCst); + Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddCst); + return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSCst)); + } + } + } + } + // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (PredicatesFoldable(LHSCC, RHSCC)) { if (LHS->getOperand(0) == RHS->getOperand(1) && diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll index 5d6ca9603f7..23dad212e47 100644 --- a/test/Transforms/InstCombine/or.ll +++ b/test/Transforms/InstCombine/or.ll @@ -480,3 +480,29 @@ define i32 @test45(i32 %x, i32 %y, i32 %z) { %or1 = or i32 %and, %y ret i32 %or1 } + +define i1 @test46(i8 signext %c) { + %c.off = add i8 %c, -97 + %cmp1 = icmp ult i8 %c.off, 26 + %c.off17 = add i8 %c, -65 + %cmp2 = icmp ult i8 %c.off17, 26 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +; CHECK-LABEL: @test46( +; CHECK-NEXT: and i8 %c, -33 +; CHECK-NEXT: add i8 %1, -65 +; CHECK-NEXT: icmp ult i8 %2, 26 +} + +define i1 @test47(i8 signext %c) { + %c.off = add i8 %c, -65 + %cmp1 = icmp ule i8 %c.off, 26 + %c.off17 = add i8 %c, -97 + %cmp2 = icmp ule i8 %c.off17, 26 + %or = or i1 %cmp1, %cmp2 + ret i1 %or +; CHECK-LABEL: @test47( +; CHECK-NEXT: and i8 %c, -33 +; CHECK-NEXT: add i8 %1, -65 +; CHECK-NEXT: icmp ult i8 %2, 27 +} -- 2.34.1