From acb31a245ceef3510f37163e31c0097b0d63cb8f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 9 Jan 2014 10:56:42 +0000 Subject: [PATCH] Handle masked rotate amounts At the moment we expect rotates to have the form: (or (shl X, Y), (shr X, Z)) where Y == bitsize(X) - Z or Z == bitsize(X) - Y. This form means that the (or ...) is undefined for Y == 0 or Z == 0. This undefinedness can be avoided by using Y == (C * bitsize(X) - Z) & (bitsize(X) - 1) or Z == (C * bitsize(X) - Y) & (bitsize(X) - 1) for any integer C (including 0, the most natural choice). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198861 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 88 +++++++++++++++++++----- test/CodeGen/SystemZ/shift-04.ll | 72 +++++++++++++++++++ 2 files changed, 144 insertions(+), 16 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fc51f184ea4..76f1bc85709 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3306,15 +3306,56 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { return false; } -// Return true if we can prove that Neg == OpSize - Pos. This means that +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: // // (or (shift1 X, Neg), (shift2 X, Pos)) // // reduces to a rotate in direction shift2 by Pos and a rotate in direction -// shift1 by Neg. Note that the (or ...) then invokes undefined behavior -// if Pos == 0 (and consequently Neg == OpSize). +// shift1 by Neg. The range [0, OpSize) means that we only need to consider +// shift amounts with defined behavior. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { + // If OpSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) + // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). + // + // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check + // for the stronger condition: + // + // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == OpSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == OpSize). + // + // We could actually use [A] whenever OpSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. + unsigned LoBits = 0; + if (Neg.getOpcode() == ISD::AND && + isPowerOf2_64(OpSize) && + Neg.getOperand(1).getOpcode() == ISD::Constant && + cast(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { + Neg = Neg.getOperand(0); + LoBits = Log2_64(OpSize); + } + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) return 0; @@ -3323,24 +3364,39 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { return 0; SDValue NegOp1 = Neg.getOperand(1); - // The condition we need to prove is now NegC - NegOp1 == OpSize - Pos. - // Check whether the terms match directly. + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // OpSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; if (Pos == NegOp1) - return NegC->getAPIntValue() == OpSize; - + Width = NegC->getAPIntValue(); // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. // Then the condition we want to prove becomes: - // NegC - NegOp1 == OpSize - (NegOp1 + PosC) - // NegC == OpSize - PosC // - // Because NegC and PosC are APInts, this is easier to test as: - // OpSize == NegC + PosC - if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { - ConstantSDNode *PosC = dyn_cast(Pos.getOperand(1)); - return PosC && OpSize == NegC->getAPIntValue() + PosC->getAPIntValue(); - } + // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (OpSize - PosC) & Mask + // OpSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && + Pos.getOperand(0) == NegOp1 && + Pos.getOperand(1).getOpcode() == ISD::Constant) + Width = (cast(Pos.getOperand(1))->getAPIntValue() + + NegC->getAPIntValue()); + else + return false; - return false; + // Now we just need to check that OpSize & Mask == Width & Mask. + if (LoBits) + return Width.getLoBits(LoBits) == 0; + return Width == OpSize; } // A subroutine of MatchRotate used once we have found an OR of two opposite diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll index 0cd32391ed5..de2d74f27fa 100644 --- a/test/CodeGen/SystemZ/shift-04.ll +++ b/test/CodeGen/SystemZ/shift-04.ll @@ -216,3 +216,75 @@ define i32 @f16(i32 %a, i64 %amt) { %or = or i32 %parta, %partb ret i32 %or } + +; Check cases where (-x & 31) is used instead of 32 - x. +define i32 @f17(i32 %x, i32 %y) { +; CHECK-LABEL: f17: +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: br %r14 +entry: + %shl = shl i32 %x, %y + %sub = sub i32 0, %y + %and = and i32 %sub, 31 + %shr = lshr i32 %x, %and + %or = or i32 %shr, %shl + ret i32 %or +} + +; ...and again with ((32 - x) & 31). +define i32 @f18(i32 %x, i32 %y) { +; CHECK-LABEL: f18: +; CHECK: rll %r2, %r2, 0(%r3) +; CHECK: br %r14 +entry: + %shl = shl i32 %x, %y + %sub = sub i32 32, %y + %and = and i32 %sub, 31 + %shr = lshr i32 %x, %and + %or = or i32 %shr, %shl + ret i32 %or +} + +; This is not a rotation. +define i32 @f19(i32 %x, i32 %y) { +; CHECK-LABEL: f19: +; CHECK-NOT: rll +; CHECK: br %r14 +entry: + %shl = shl i32 %x, %y + %sub = sub i32 16, %y + %and = and i32 %sub, 31 + %shr = lshr i32 %x, %and + %or = or i32 %shr, %shl + ret i32 %or +} + +; Repeat f17 with an addition on the shift count. +define i32 @f20(i32 %x, i32 %y) { +; CHECK-LABEL: f20: +; CHECK: rll %r2, %r2, 199(%r3) +; CHECK: br %r14 +entry: + %add = add i32 %y, 199 + %shl = shl i32 %x, %add + %sub = sub i32 0, %add + %and = and i32 %sub, 31 + %shr = lshr i32 %x, %and + %or = or i32 %shr, %shl + ret i32 %or +} + +; ...and again with the InstCombine version. +define i32 @f21(i32 %x, i32 %y) { +; CHECK-LABEL: f21: +; CHECK: rll %r2, %r2, 199(%r3) +; CHECK: br %r14 +entry: + %add = add i32 %y, 199 + %shl = shl i32 %x, %add + %sub = sub i32 -199, %y + %and = and i32 %sub, 31 + %shr = lshr i32 %x, %and + %or = or i32 %shr, %shl + ret i32 %or +} -- 2.34.1