From: Tim Northover Date: Fri, 28 Jun 2013 15:29:25 +0000 (+0000) Subject: ARM: ensure fixed-point conversions have sane types X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=bcd8e7ad4d1dd486675e774778b3409464380f62;p=oota-llvm.git ARM: ensure fixed-point conversions have sane types We were generating intrinsics for NEON fixed-point conversions that didn't exist (e.g. float -> i16). There are two cases to consider: + iN is smaller than float. In this case we can do the conversion but need an extend or truncate as well. + iN is larger than float. In this case using the NEON conversion would be incorrect so we don't perform any combining. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185158 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c8d73d76d75..ff8571ba033 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9141,12 +9141,27 @@ static SDValue PerformVCVTCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from f32 to i32. We can handle + // smaller integers by generating an extra truncate, but larger ones would + // be lossy. + return SDValue(); + } + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - N->getValueType(0), - DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, - DAG.getConstant(Log2_64(C), MVT::i32)); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), + NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); + + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); + + return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -9177,12 +9192,28 @@ static SDValue PerformVDIVCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from i32 to f32. We can handle + // smaller integers by generating an extra extend, but larger ones would + // be lossy. + return SDValue(); + } + + SDValue ConvInput = Op.getOperand(0); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + ConvInput); + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), - Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); + ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index c078f493094..9b315b1a486 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -156,3 +156,44 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone + + +define <4 x i16> @fix_float_to_i16(<4 x float> %in) { +; CHECK: fix_float_to_i16: +; CHECK: vcvt.u32.f32 [[TMP:q[0-9]+]], {{q[0-9]+}}, #1 +; CHECK: vmovn.i32 {{d[0-9]+}}, [[TMP]] + + %scale = fmul <4 x float> %in, + %conv = fptoui <4 x float> %scale to <4 x i16> + ret <4 x i16> %conv +} + +define <2 x i64> @fix_float_to_i64(<2 x float> %in) { +; CHECK: fix_float_to_i64: +; CHECK: bl +; CHECK: bl + + %scale = fmul <2 x float> %in, + %conv = fptoui <2 x float> %scale to <2 x i64> + ret <2 x i64> %conv +} + +define <4 x i16> @fix_double_to_i16(<4 x double> %in) { +; CHECK: fix_double_to_i16: +; CHECK: vcvt.s32.f64 +; CHECK: vcvt.s32.f64 + + %scale = fmul <4 x double> %in, + %conv = fptoui <4 x double> %scale to <4 x i16> + ret <4 x i16> %conv +} + +define <2 x i64> @fix_double_to_i64(<2 x double> %in) { +; CHECK: fix_double_to_i64: +; CHECK: bl +; CHECK: bl + %scale = fmul <2 x double> %in, + %conv = fptoui <2 x double> %scale to <2 x i64> + ret <2 x i64> %conv +} + diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll index e6f1338b853..3b43be46b45 100644 --- a/test/CodeGen/ARM/vdiv_combine.ll +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -95,3 +95,44 @@ entry: } declare void @foo_float32x4_t(<4 x float>) + +define <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) { +; CHECK: fix_unsigned_i16_to_float: +; CHECK: vmovl.u16 [[TMP:q[0-9]+]], {{d[0-9]+}} +; CHECK: vcvt.f32.u32 {{q[0-9]+}}, [[TMP]], #1 + + %conv = uitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +define <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) { +; CHECK: fix_signed_i16_to_float: +; CHECK: vmovl.s16 [[TMP:q[0-9]+]], {{d[0-9]+}} +; CHECK: vcvt.f32.s32 {{q[0-9]+}}, [[TMP]], #1 + + %conv = sitofp <4 x i16> %in to <4 x float> + %shift = fdiv <4 x float> %conv, + ret <4 x float> %shift +} + +define <2 x float> @fix_i64_to_float(<2 x i64> %in) { +; CHECK: fix_i64_to_float: +; CHECK: bl +; CHECK: bl + + %conv = uitofp <2 x i64> %in to <2 x float> + %shift = fdiv <2 x float> %conv, + ret <2 x float> %shift +} + +define <2 x double> @fix_i64_to_double(<2 x i64> %in) { +; CHECK: fix_i64_to_double: +; CHECK: bl +; CHECK: bl + + %conv = uitofp <2 x i64> %in to <2 x double> + %shift = fdiv <2 x double> %conv, + ret <2 x double> %shift +} +