From: Hal Finkel Date: Mon, 5 Jan 2015 21:10:24 +0000 (+0000) Subject: [PowerPC] Fold i1 extensions with other ops X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ccc83e4a087fc6d7d4bb879e2901f3a73b77abb1;p=oota-llvm.git [PowerPC] Fold i1 extensions with other ops Consider this function from our README.txt file: int foo(int a, int b) { return (a < b) << 4; } We now explicitly track CR bits by default, so the comment in the README.txt about not really having a SETCC is no longer accurate, but we did generate this somewhat silly code: cmpw 0, 3, 4 li 3, 0 li 12, 1 isel 3, 12, 3, 0 sldi 3, 3, 4 blr which generates the zext as a select between 0 and 1, and then shifts the result by a constant amount. Here we preprocess the DAG in order to fold the results of operations on an extension of an i1 value into the SELECT_I[48] pseudo instruction when the resulting constant can be materialized using one instruction (just like the 0 and 1). This was not implemented as a DAGCombine because the resulting code would have been anti-canonical and depends on replacing chained user nodes, which does not fit well into the lowering paradigm. Now we generate: cmpw 0, 3, 4 li 3, 0 li 12, 16 isel 3, 12, 3, 0 blr which is less silly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225203 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index ab3a01d6b39..6aa786f1173 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -217,6 +217,7 @@ private: void PeepholeCROps(); SDValue combineToCMPB(SDNode *N); + void foldBoolExts(SDValue &Res, SDNode *&N); bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); @@ -3173,6 +3174,73 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { return Res; } +// When CR bit registers are enabled, an extension of an i1 variable to a i32 +// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus +// involves constant materialization of a 0 or a 1 or both. If the result of +// the extension is then operated upon by some operator that can be constant +// folded with a constant 0 or 1, and that constant can be materialized using +// only one instruction (like a zero or one), then we should fold in those +// operations with the select. +void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { + if (!PPCSubTarget->useCRBits()) + return; + + if (N->getOpcode() != ISD::ZERO_EXTEND && + N->getOpcode() != ISD::SIGN_EXTEND && + N->getOpcode() != ISD::ANY_EXTEND) + return; + + if (N->getOperand(0).getValueType() != MVT::i1) + return; + + if (!N->hasOneUse()) + return; + + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Cond = N->getOperand(0); + SDValue ConstTrue = + CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT); + SDValue ConstFalse = CurDAG->getConstant(0, VT); + + do { + SDNode *User = *N->use_begin(); + if (User->getNumOperands() != 2) + break; + + auto TryFold = [this, N, User](SDValue Val) { + SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); + SDValue O0 = UserO0.getNode() == N ? Val : UserO0; + SDValue O1 = UserO1.getNode() == N ? Val : UserO1; + + return CurDAG->FoldConstantArithmetic(User->getOpcode(), + User->getValueType(0), + O0.getNode(), O1.getNode()); + }; + + SDValue TrueRes = TryFold(ConstTrue); + if (!TrueRes) + break; + SDValue FalseRes = TryFold(ConstFalse); + if (!FalseRes) + break; + + // For us to materialize these using one instruction, we must be able to + // represent them as signed 16-bit integers. + uint64_t True = cast(TrueRes)->getZExtValue(), + False = cast(FalseRes)->getZExtValue(); + if (!isInt<16>(True) || !isInt<16>(False)) + break; + + // We can replace User with a new SELECT node, and try again to see if we + // can fold the select with its user. + Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); + N = User; + ConstTrue = TrueRes; + ConstFalse = FalseRes; + } while (N->hasOneUse()); +} + void PPCDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++Position; @@ -3191,6 +3259,9 @@ void PPCDAGToDAGISel::PreprocessISelDAG() { break; } + if (!Res) + foldBoolExts(Res, N); + if (Res) { DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); DEBUG(N->dump(CurDAG)); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 45d31130b83..9c424eb483a 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -252,23 +252,6 @@ anything though, because the compares still wouldn't be shared. ===-------------------------------------------------------------------------=== -We should custom expand setcc instead of pretending that we have it. That -would allow us to expose the access of the crbit after the mfcr, allowing -that access to be trivially folded into other ops. A simple example: - -int foo(int a, int b) { return (a < b) << 4; } - -compiles into: - -_foo: - cmpw cr7, r3, r4 - mfcr r2, 1 - rlwinm r2, r2, 29, 31, 31 - slwi r3, r2, 4 - blr - -===-------------------------------------------------------------------------=== - Fold add and sub with constant into non-extern, non-weak addresses so this: static int a; diff --git a/test/CodeGen/PowerPC/i1-ext-fold.ll b/test/CodeGen/PowerPC/i1-ext-fold.ll new file mode 100644 index 00000000000..19bd8ff6555 --- /dev/null +++ b/test/CodeGen/PowerPC/i1-ext-fold.ll @@ -0,0 +1,54 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind readnone +define signext i32 @foo(i32 signext %a, i32 signext %b) #0 { +entry: + %cmp = icmp slt i32 %a, %b + %conv = zext i1 %cmp to i32 + %shl = shl nuw nsw i32 %conv, 4 + ret i32 %shl + +; CHECK-LABEL: @foo +; CHECK-DAG: cmpw +; CHECK-DAG: li [[REG1:[0-9]+]], 0 +; CHECK-DAG: li [[REG2:[0-9]+]], 16 +; CHECK: isel 3, [[REG2]], [[REG1]], +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 { +entry: + %cmp = icmp slt i32 %a, %b + %conv = zext i1 %cmp to i32 + %shl = shl nuw nsw i32 %conv, 4 + %add1 = or i32 %shl, 5 + ret i32 %add1 + +; CHECK-LABEL: @foo2 +; CHECK-DAG: cmpw +; CHECK-DAG: li [[REG1:[0-9]+]], 5 +; CHECK-DAG: li [[REG2:[0-9]+]], 21 +; CHECK: isel 3, [[REG2]], [[REG1]], +; CHECK: blr +} + +; Function Attrs: nounwind readnone +define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 { +entry: + %cmp = icmp sle i32 %a, %b + %conv = zext i1 %cmp to i32 + %shl = shl nuw nsw i32 %conv, 4 + ret i32 %shl + +; CHECK-LABEL: @foo3 +; CHECK-DAG: cmpw +; CHECK-DAG: li [[REG1:[0-9]+]], 16 +; CHECK: isel 3, 0, [[REG1]], +; CHECK: blr +} + +attributes #0 = { nounwind readnone } +