::
- declare i16 @llvm.convert.to.fp16(float %a)
+ declare i16 @llvm.convert.to.fp16.f32(float %a)
+ declare i16 @llvm.convert.to.fp16.f64(double %a)
Overview:
"""""""""
-The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
-from single precision floating point format to half precision floating
-point format.
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
+conventional floating point type to half precision floating point format.
Arguments:
""""""""""
Semantics:
""""""""""
-The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
-from single precision floating point format to half precision floating
-point format. The return value is an ``i16`` which contains the
-converted number.
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a
+conventional floating point format to half precision floating point format. The
+return value is an ``i16`` which contains the converted number.
Examples:
"""""""""
.. code-block:: llvm
- %res = call i16 @llvm.convert.to.fp16(float %a)
+ %res = call i16 @llvm.convert.to.fp16.f32(float %a)
store i16 %res, i16* @x, align 2
.. _int_convert_from_fp16:
::
- declare float @llvm.convert.from.fp16(i16 %a)
+ declare float @llvm.convert.from.fp16.f32(i16 %a)
+ declare double @llvm.convert.from.fp16.f64(i16 %a)
Overview:
"""""""""
/// 5) ISD::CvtCode indicating the type of conversion to do
CONVERT_RNDSAT,
- /// FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
- /// promotions and truncation for half-precision (16 bit) floating
- /// numbers. We need special nodes since FP16 is a storage-only type with
- /// special semantics of operations.
- FP16_TO_FP32, FP32_TO_FP16,
+ /// FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions
+ /// and truncation for half-precision (16 bit) floating numbers. These nodes
+ /// form a semi-softened interface for dealing with f16 (as an i16), which
+ /// is often a storage-only type but has native conversions.
+ FP16_TO_FP, FP_TO_FP16,
/// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
/// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
// Intrisics to support half precision floating point format
let Properties = [IntrNoMem] in {
-def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
- GCCBuiltin<"__gnu_f2h_ieee">;
-def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
- GCCBuiltin<"__gnu_h2f_ieee">;
+def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
+def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>;
}
// These convert intrinsics are to support various conversions between
def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
-def f16_to_f32 : SDNode<"ISD::FP16_TO_FP32", SDTIntToFPOp>;
-def f32_to_f16 : SDNode<"ISD::FP32_TO_FP16", SDTFPToIntOp>;
+def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
+def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::FP_TO_FP16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
break;
- case ISD::FP16_TO_FP32:
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ case ISD::FP16_TO_FP: {
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ break;
+ }
+
+ // We can extend to types bigger than f32 in two steps without changing the
+ // result. Since "f16 -> f32" is much more commonly available, give CodeGen
+ // the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
break;
- case ISD::FP32_TO_FP16:
+ }
+ case ISD::FP_TO_FP16:
+ // Can't use two-step truncation here because the rounding may be
+ // significant.
+ assert(Node->getOperand(0).getValueType() == MVT::f32 &&
+ "Don't know libcall for FPROUND_F64_F16");
Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
break;
case ISD::ConstantFP: {
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
- case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
// nodes?
-SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
- SDLoc(N)).first;
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+ false, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break;
- case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+ case ISD::FP_TO_FP16: Res = SoftenFloatOp_FP_TO_FP16(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_FP16(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::f32 &&
+ "Cannot soften in one step");
EVT RVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
SDValue Op = GetSoftenedFloat(N->getOperand(0));
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
- case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+ case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::AND:
case ISD::OR:
DAG.getValueType(N->getValueType(0).getScalarType()));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
- case ISD::FP16_TO_FP32:
+ case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
- SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
- SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_FPOWI(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
- SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_FP16(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
return nullptr;
}
case Intrinsic::convert_to_fp16:
- setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
+ setValue(&I, DAG.getNode(ISD::FP_TO_FP16, sdl,
MVT::i16, getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::convert_from_fp16:
- setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
- MVT::f32, getValue(I.getArgOperand(0))));
+ setValue(&I,
+ DAG.getNode(ISD::FP16_TO_FP, sdl, TLI->getValueType(I.getType()),
+ getValue(I.getArgOperand(0))));
return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
- case ISD::FP16_TO_FP32: return "fp16_to_fp32";
- case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::CONVERT_RNDSAT: {
switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
defm FCVT : FPConversion<"fcvt">;
-def : Pat<(f32_to_f16 FPR32:$Rn),
+def : Pat<(fp_to_f16 FPR32:$Rn),
(i32 (COPY_TO_REGCLASS
(f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
GPR32))>;
-def : Pat<(f32 (f16_to_f32 i32:$Rn)),
+def : Pat<(f32 (f16_to_fp i32:$Rn)),
(FCVTSHr (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS i32:$Rn, FPR32)),
hsub))>;
// When converting from f16 coming directly from a load, make sure we
// load into the FPR16 registers rather than going through the GPRs.
// f16->f32
-def : Pat<(f32 (f16_to_f32 (i32
+def : Pat<(f32 (f16_to_fp (i32
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))))),
(FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f32 (f16_to_f32 (i32
+def : Pat<(f32 (f16_to_fp (i32
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))))),
(FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f32 (f16_to_f32 (i32
+def : Pat <(f32 (f16_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f32 (f16_to_f32 (i32
+def : Pat <(f32 (f16_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
(FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
// f16->f64
-def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))))))),
(FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
-def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+def : Pat<(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))))))),
(FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
-def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
(FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
-def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+def : Pat <(f64 (fextend (f32 (f16_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
(FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
// registers rather than going through the GPRs.
let AddedComplexity = 10 in {
// f32->f16
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
(STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 FPR32:$Rt))),
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
// f64->f16
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)),
(STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend)>;
-def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+def : Pat< (truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)),
(STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
-def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+def : Pat <(truncstorei16 (assertzext (i32 (fp_to_f16 (f32 (fround FPR64:$Rt))))),
(am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
}
}
// Special handling for half-precision FP.
if (!Subtarget->hasFP16()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
}
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>;
-def : Pat<(f32_to_f16 SPR:$a),
+def : Pat<(fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def : Pat<(f16_to_f32 GPR:$a),
+def : Pat<(f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
"}}")))),
Float32Regs, Int16Regs, int_nvvm_h2f>;
-def : Pat<(f32 (f16_to_f32 Int16Regs:$a)),
+def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
(CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
+def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (f32_to_f16 Float32Regs:$a)),
+def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
//
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
- [(set i32:$dst, (f32_to_f16 f32:$src0))]
+ [(set i32:$dst, (fp_to_f16 f32:$src0))]
>;
defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
- [(set f32:$dst, (f16_to_f32 i32:$src0))]
+ [(set f32:$dst, (f16_to_fp i32:$src0))]
>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
// If we don't have F16C support, then lower half float conversions
// into library calls.
if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
- setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
- setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
if (Subtarget->hasPOPCNT()) {
// Patterns for matching conversions from float to half-float and vice versa.
let Predicates = [HasF16C] in {
- def : Pat<(f32_to_f16 FR32:$src),
+ def : Pat<(fp_to_f16 FR32:$src),
(i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
(COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
- def : Pat<(f16_to_f32 GR16:$src),
+ def : Pat<(f16_to_fp GR16:$src),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
(COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
- def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
+ def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
(f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
(VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
}
; CHECK-LABEL: to_half:
; CHECK: fcvt h[[HALFVAL:[0-9]+]], s0
; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
- %res = call i16 @llvm.convert.to.fp16(float %in)
+ %res = call i16 @llvm.convert.to.fp16.f32(float %in)
ret i16 %res
}
; CHECK-LABEL: from_half:
; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
; CHECK: fcvt s0, {{h[0-9]+}}
- %res = call float @llvm.convert.from.fp16(i16 %in)
+ %res = call float @llvm.convert.from.fp16.f32(i16 %in)
ret float %res
}
-declare float @llvm.convert.from.fp16(i16) #1
-declare i16 @llvm.convert.to.fp16(float) #1
+declare float @llvm.convert.from.fp16.f32(i16) #1
+declare i16 @llvm.convert.to.fp16.f32(float) #1
; CHECK-NEXT: ret
%tmp = load i16* %a, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
; CHECK-NEXT: ret
%tmp = load i16* %a, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
%conv = fpext float %tmp1 to double
ret double %conv
}
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
%conv = fpext float %tmp1 to double
ret double %conv
}
%arrayidx = getelementptr inbounds i16* %a, i64 %i
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
%arrayidx = getelementptr inbounds i16* %a, i64 %i
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
%conv = fpext float %tmp1 to double
ret double %conv
}
%arrayidx = getelementptr inbounds i16* %a, i64 10
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
%arrayidx = getelementptr inbounds i16* %a, i64 10
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
%conv = fpext float %tmp1 to double
ret double %conv
}
%arrayidx = getelementptr inbounds i16* %a, i64 -10
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
%arrayidx = getelementptr inbounds i16* %a, i64 -10
%tmp = load i16* %arrayidx, align 2
- %tmp1 = tail call float @llvm.convert.from.fp16(i16 %tmp)
+ %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
%conv = fpext float %tmp1 to double
ret double %conv
}
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
- %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
store i16 %tmp, i16* %a, align 2
ret void
}
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
- %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
store i16 %tmp, i16* %a, align 2
ret void
}
; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
- %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
- %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%idxprom = sext i32 %i to i64
%arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
; CHECK-NEXT: str h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
- %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
- %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
; CHECK-NEXT: str h0, [x0, #20]
; CHECK-NEXT: ret
- %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
- %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
; CHECK-NEXT: stur h0, [x0, #-20]
; CHECK-NEXT: ret
- %tmp = tail call i16 @llvm.convert.to.fp16(float %val)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%arrayidx = getelementptr inbounds i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
- %tmp = tail call i16 @llvm.convert.to.fp16(float %conv)
+ %tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%arrayidx = getelementptr inbounds i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
-declare i16 @llvm.convert.to.fp16(float) nounwind readnone
-declare float @llvm.convert.from.fp16(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
entry:
%0 = load i16* @x, align 2
%1 = load i16* @y, align 2
- %2 = tail call float @llvm.convert.from.fp16(i16 %0)
+ %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
; CHECK: __gnu_h2f_ieee
; CHECK-FP16: vcvtb.f32.f16
- %3 = tail call float @llvm.convert.from.fp16(i16 %1)
+ %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
; CHECK: __gnu_h2f_ieee
; CHECK-FP16: vcvtb.f32.f16
%4 = fadd float %2, %3
- %5 = tail call i16 @llvm.convert.to.fp16(float %4)
+ %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
; CHECK: __gnu_f2h_ieee
; CHECK-FP16: vcvtb.f16.f32
store i16 %5, i16* @x, align 2
ret void
}
-declare float @llvm.convert.from.fp16(i16) nounwind readnone
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
-declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; SI-LABEL: @test_convert_fp16_to_fp32:
; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
; SI: BUFFER_STORE_SHORT [[RESULT]]
define void @test_convert_fp16_to_fp32(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
%val = load float addrspace(1)* %in, align 4
- %cvt = call i16 @llvm.convert.to.fp16(float %val) nounwind readnone
+ %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 2
ret void
}
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-declare float @llvm.convert.from.fp16(i16) nounwind readnone
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
; SI-LABEL: @test_convert_fp16_to_fp32:
; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
; SI: BUFFER_STORE_DWORD [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
%val = load i16 addrspace(1)* %in, align 2
- %cvt = call float @llvm.convert.from.fp16(i16 %val) nounwind readnone
+ %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
}
define void @test1(float %src, i16* %dest) {
- %1 = tail call i16 @llvm.convert.to.fp16(float %src)
+ %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
store i16 %1, i16* %dest, align 2
ret void
}
define float @test2(i16* nocapture %src) {
%1 = load i16* %src, align 2
- %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+ %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
ret float %2
}
; CHECK-LABEL: test2:
define float @test3(float %src) nounwind uwtable readnone {
- %1 = tail call i16 @llvm.convert.to.fp16(float %src)
- %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+ %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
+ %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
ret float %2
}
; F16C-NEXT: vcvtph2ps
; F16C: ret
-declare float @llvm.convert.from.fp16(i16) nounwind readnone
-declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone