From 6d7a4a2886266317f7aae6e3c806719de539e517 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 5 Sep 2015 00:02:59 +0000 Subject: [PATCH] [PowerPC] Fix and(or(x, c1), c2) -> rlwimi generation PPCISelDAGToDAG has a transformation that generates a rlwimi instruction from an input pattern that looks like this: and(or(x, c1), c2) but the associated logic does not work if there are bits that are 1 in c1 but 0 in c2 (these are normally canonicalized away, but that can't happen if the 'or' has other users. Make sure we abort the transformation if such bits are discovered. Fixes PR24704. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246900 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 18 ++++++++++++--- test/CodeGen/PowerPC/rlwimi-and-or-bits.ll | 27 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/PowerPC/rlwimi-and-or-bits.ll diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0b43b046a44..30d792e0bbc 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2570,13 +2570,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. unsigned MB, ME; - Imm = ~(Imm^Imm2); - if (isRunOfOnes(Imm, MB, ME)) { + if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), diff --git a/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll new file mode 100644 index 00000000000..a74bc727396 --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi-and-or-bits.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@m = external global i32, align 4 + +; Function Attrs: nounwind +define signext i32 @main() #0 { +entry: + +; CHECK-LABEL: @main +; CHECK-NOT: rlwimi +; CHECK: andi + + %0 = load i32, i32* @m, align 4 + %or = or i32 %0, 250 + store i32 %or, i32* @m, align 4 + %and = and i32 %or, 249 + %sub.i = sub i32 %and, 0 + %sext = shl i32 %sub.i, 24 + %conv = ashr exact i32 %sext, 24 + ret i32 %conv +} + +attributes #0 = { nounwind "target-cpu"="pwr7" } +attributes #1 = { nounwind } + -- 2.34.1