From cc78fcae6aa37bc854138dc874f1d262079c5e7f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 7 Oct 2015 17:28:58 +0000 Subject: [PATCH] [ARM] Promote helper function to SelectionDAG. I'll be using the function in a similar combine for AArch64. The helper was also improved to handle undef values. Part of http://reviews.llvm.org/D13442 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249572 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAGNodes.h | 8 ++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 ++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 46 ++++++----------------- test/CodeGen/ARM/vdiv_combine.ll | 9 +++++ 4 files changed, 48 insertions(+), 34 deletions(-) diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 523e10c8584..74879094525 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1714,6 +1714,14 @@ public: ConstantFPSDNode * getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; + /// \brief If this is a constant FP splat and the splatted constant FP is an + /// exact power or 2, return the log base 2 integer value. Otherwise, + /// return -1. + /// + /// The BitWidth specifies the necessary bit precision. + int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const; + bool isConstant() const; static inline bool classof(const SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 7e02d3ab1fc..3dd22e2c9d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -7190,6 +7191,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { return dyn_cast_or_null(getSplatValue(UndefElements)); } +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + APFloat APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 64e494be514..94f573ce8cd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9808,32 +9808,6 @@ static SDValue PerformSTORECombine(SDNode *N, return SDValue(); } -// isConstVecPow2 - Return true if each vector element is a power of 2, all -// elements are the same constant, C, and Log2(C) ranges from 1 to 32. -static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) -{ - integerPart cN; - integerPart c0 = 0; - for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); - I != E; I++) { - ConstantFPSDNode *C = dyn_cast(ConstVec.getOperand(I)); - if (!C) - return false; - - bool isExact; - APFloat APF = C->getValueAPF(); - if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) - != APFloat::opOK || !isExact) - return false; - - c0 = (I == 0) ? cN : c0; - if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) - return false; - } - C = c0; - return true; -} - /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. @@ -9869,18 +9843,20 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - uint64_t C; - bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; - if (!isConstVecPow2(ConstVec, isSigned, C)) + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); + bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; SDValue FixConv = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), - DAG.getConstant(Log2_64(C), dl, MVT::i32)); + DAG.getConstant(C, dl, MVT::i32)); if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); @@ -9925,12 +9901,14 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - uint64_t C; - bool isSigned = OpOpcode == ISD::SINT_TO_FP; - if (!isConstVecPow2(ConstVec, isSigned, C)) + BitVector UndefElements; + BuildVectorSDNode *BV = cast(ConstVec); + int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); + if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); + bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, @@ -9942,7 +9920,7 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), - ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32)); + ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll index dbbf92efd22..8511dbcb687 100644 --- a/test/CodeGen/ARM/vdiv_combine.ll +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -144,3 +144,12 @@ entry: %div.i = fdiv <8 x float> %vcvt.i, ret <8 x float> %div.i } + +; Can combine splat with an undef. +; CHECK-LABEL: test8 +; CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #1 +define <4 x float> @test8(<4 x i32> %in) { + %vcvt.i = sitofp <4 x i32> %in to <4 x float> + %div.i = fdiv <4 x float> %vcvt.i, + ret <4 x float> %div.i +} -- 2.34.1