From: Hal Finkel Date: Wed, 7 Aug 2013 22:49:12 +0000 (+0000) Subject: Add ISD::FROUND for libm round() X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=41418d17cced656f91038b2482bc9d173b4974b0;p=oota-llvm.git Add ISD::FROUND for libm round() All libm floating-point rounding functions, except for round(), had their own ISD nodes. Recent PowerPC cores have an instruction for round(), and so here I'm adding ISD::FROUND so that round() can be custom lowered as well. For the most part, this is straightforward. I've added an intrinsic and a matching ISD node just like those for nearbyint() and friends. The SelectionDAG pattern I've named frnd (because ISD::FP_ROUND has already claimed fround). This will be used by the PowerPC backend in a follow-up commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187926 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/LangRef.rst b/docs/LangRef.rst index b69e2a35d19..5b8ecd85c62 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -7526,6 +7526,42 @@ Semantics: This function returns the same values as the libm ``nearbyint`` functions would, and handles error conditions in the same way. +'``llvm.round.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.round`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare float @llvm.round.f32(float %Val) + declare double @llvm.round.f64(double %Val) + declare x86_fp80 @llvm.round.f80(x86_fp80 %Val) + declare fp128 @llvm.round.f128(fp128 %Val) + declare ppc_fp128 @llvm.round.ppcf128(ppc_fp128 %Val) + +Overview: +""""""""" + +The '``llvm.round.*``' intrinsics returns the operand rounded to the +nearest integer. + +Arguments: +"""""""""" + +The argument and return value are floating point numbers of the same +type. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``round`` +functions would, and handles error conditions in the same way. + Bit Manipulation Intrinsics --------------------------- diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 9466b90c5e6..45bb7e311b4 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -440,11 +440,11 @@ namespace ISD { /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, /// FLOG, FLOG2, FLOG10, FEXP, FEXP2, - /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR - Perform various unary + /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary /// floating point operations. These are inspired by libm. FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, - FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR, + FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 41289a42c43..e578b509cf9 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -188,6 +188,11 @@ namespace RTLIB { NEARBYINT_F80, NEARBYINT_F128, NEARBYINT_PPCF128, + ROUND_F32, + ROUND_F64, + ROUND_F80, + ROUND_F128, + ROUND_PPCF128, FLOOR_F32, FLOOR_F64, FLOOR_F80, diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 1a849c4c30c..ffa121d60c7 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -298,6 +298,7 @@ let Properties = [IntrReadMem] in { def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; } let Properties = [IntrNoMem] in { diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index 8c1f223bc09..d0245ba4d6a 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -695,6 +695,7 @@ public: case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: + case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl: case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index befab43cf6c..72963da8cba 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -383,6 +383,7 @@ def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>; def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>; def ffloor : SDNode<"ISD::FFLOOR" , SDTFPUnaryOp>; def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>; +def frnd : SDNode<"ISD::FROUND" , SDTFPUnaryOp>; def fround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>; def fextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index b275dfe0a12..4b29824082f 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2946,6 +2946,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: + case Intrinsic::round: return true; } } diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index b48b81767ee..ef532357c5c 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -449,6 +449,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::nearbyint: ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bd844e599f1..1c062f14227 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3231,6 +3231,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index cea0b02969b..f5d234dd11d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -444,6 +445,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { NVT, &Op, 1, false, SDLoc(N)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 63e9af3f595..a7262704575 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -410,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -470,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index bbe11b80da8..237a33a0cff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -241,6 +241,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 54380ec152e..0637412cc1a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -540,6 +541,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1507,6 +1509,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b9f4381b798..e23c5bea224 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4902,7 +4902,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4915,6 +4916,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -5644,6 +5646,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d8ee221cb46..47bab32787b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -142,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 8d8f81b0032..c3afa776c00 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; @@ -706,6 +711,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); setOperationAction(ISD::FCEIL, MVT::f16, Expand); setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FROUND, MVT::f16, Expand); setOperationAction(ISD::FTRUNC, MVT::f16, Expand); setOperationAction(ISD::FLOG , MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); @@ -716,6 +722,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FROUND, MVT::f32, Expand); setOperationAction(ISD::FTRUNC, MVT::f32, Expand); setOperationAction(ISD::FLOG , MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); @@ -726,6 +733,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FROUND, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); setOperationAction(ISD::FLOG , MVT::f128, Expand); setOperationAction(ISD::FLOG2, MVT::f128, Expand); @@ -736,6 +744,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); setOperationAction(ISD::FCEIL, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FROUND, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); // Default ISD::TRAP to expand (which turns it into abort). diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 4e30c537645..05dad8a094d 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -259,6 +259,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } } @@ -309,6 +310,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { case LibFunc::rintf: case LibFunc::rintl: Opcode = ISD::FRINT; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + Opcode = ISD::FROUND; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a62fedc43d1..e452acdafac 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1772,6 +1772,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: + case Intrinsic::round: case Intrinsic::pow: case Intrinsic::fma: case Intrinsic::fmuladd: @@ -1850,6 +1851,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { case LibFunc::nearbyintf: case LibFunc::nearbyintl: return Intrinsic::nearbyint; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + return Intrinsic::round; case LibFunc::pow: case LibFunc::powf: case LibFunc::powl: diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index 95b53b70ea8..566dcc75cd8 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -728,6 +728,58 @@ for.end: ; preds = %for.body, %entry declare double @llvm.nearbyint.f64(double) nounwind readnone +;CHECK-LABEL: @round_f32( +;CHECK: llvm.round.v4f32 +;CHECK: ret void +define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %call = tail call float @llvm.round.f32(float %0) nounwind readnone + %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.round.f32(float) nounwind readnone + +;CHECK-LABEL: @round_f64( +;CHECK: llvm.round.v4f64 +;CHECK: ret void +define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable { +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %call = tail call double @llvm.round.f64(double %0) nounwind readnone + %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx2, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare double @llvm.round.f64(double) nounwind readnone + ;CHECK-LABEL: @fma_f32( ;CHECK: llvm.fma.v4f32 ;CHECK: ret void