From d8228924556d3c465da5b858c620b29fd1cf298e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 17 Jan 2011 12:04:57 +0000 Subject: [PATCH] Add a DAGCombine to turn (ctpop x) u< 2 into (x & x-1) == 0. This shaves off 4 popcounts from the hacked 186.crafty source. This is enabled even when a native popcount instruction is available. The combined code is one operation longer but it should be faster nevertheless. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 24 ++++++++++++++++ test/CodeGen/X86/ctpop-combine.ll | 31 +++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 test/CodeGen/X86/ctpop-combine.ll diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f9e0992ea14..1621d61ba9b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1870,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() >= + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll new file mode 100644 index 00000000000..b87637f9424 --- /dev/null +++ b/test/CodeGen/X86/ctpop-combine.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +declare i64 @llvm.ctpop.i64(i64) nounwind readnone + +define i32 @test1(i64 %x) nounwind readnone { + %count = tail call i64 @llvm.ctpop.i64(i64 %x) + %cast = trunc i64 %count to i32 + %cmp = icmp ugt i32 %cast, 1 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK: test1: +; CHECK: leaq -1(%rdi) +; CHECK-NEXT: testq +; CHECK-NEXT: setne +; CHECK: ret +} + + +define i32 @test2(i64 %x) nounwind readnone { + %count = tail call i64 @llvm.ctpop.i64(i64 %x) + %cast = trunc i64 %count to i32 + %cmp = icmp ult i32 %cast, 2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK: test2: +; CHECK: leaq -1(%rdi) +; CHECK-NEXT: testq +; CHECK-NEXT: sete +; CHECK: ret +} + -- 2.34.1