void PeepholeCROps();
SDValue combineToCMPB(SDNode *N);
+ void foldBoolExts(SDValue &Res, SDNode *&N);
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
return Res;
}
+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+ if (!PPCSubTarget->useCRBits())
+ return;
+
+ if (N->getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOpcode() != ISD::ANY_EXTEND)
+ return;
+
+ if (N->getOperand(0).getValueType() != MVT::i1)
+ return;
+
+ if (!N->hasOneUse())
+ return;
+
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Cond = N->getOperand(0);
+ SDValue ConstTrue =
+ CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
+ SDValue ConstFalse = CurDAG->getConstant(0, VT);
+
+ do {
+ SDNode *User = *N->use_begin();
+ if (User->getNumOperands() != 2)
+ break;
+
+ auto TryFold = [this, N, User](SDValue Val) {
+ SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+ SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+ SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+ return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+ User->getValueType(0),
+ O0.getNode(), O1.getNode());
+ };
+
+ SDValue TrueRes = TryFold(ConstTrue);
+ if (!TrueRes)
+ break;
+ SDValue FalseRes = TryFold(ConstFalse);
+ if (!FalseRes)
+ break;
+
+ // For us to materialize these using one instruction, we must be able to
+ // represent them as signed 16-bit integers.
+ uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+ False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+ if (!isInt<16>(True) || !isInt<16>(False))
+ break;
+
+ // We can replace User with a new SELECT node, and try again to see if we
+ // can fold the select with its user.
+ Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+ N = User;
+ ConstTrue = TrueRes;
+ ConstFalse = FalseRes;
+ } while (N->hasOneUse());
+}
+
void PPCDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
break;
}
+ if (!Res)
+ foldBoolExts(Res, N);
+
if (Res) {
DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
DEBUG(N->dump(CurDAG));
===-------------------------------------------------------------------------===
-We should custom expand setcc instead of pretending that we have it. That
-would allow us to expose the access of the crbit after the mfcr, allowing
-that access to be trivially folded into other ops. A simple example:
-
-int foo(int a, int b) { return (a < b) << 4; }
-
-compiles into:
-
-_foo:
- cmpw cr7, r3, r4
- mfcr r2, 1
- rlwinm r2, r2, 29, 31, 31
- slwi r3, r2, 4
- blr
-
-===-------------------------------------------------------------------------===
-
Fold add and sub with constant into non-extern, non-weak addresses so this:
static int a;
--- /dev/null
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp slt i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ ret i32 %shl
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp slt i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ %add1 = or i32 %shl, 5
+ ret i32 %add1
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 5
+; CHECK-DAG: li [[REG2:[0-9]+]], 21
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp sle i32 %a, %b
+ %conv = zext i1 %cmp to i32
+ %shl = shl nuw nsw i32 %conv, 4
+ ret i32 %shl
+
+; CHECK-LABEL: @foo3
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK: isel 3, 0, [[REG1]],
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+