X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrVFP.td;h=e0a9314991644c4b1cf315e152bb60a6602bb757;hb=341a7e245e7d503457ef6c294a021e8223268a4e;hp=c79ffddf6b5868d2c199eb89cf52486ea3be86df;hpb=c82157378e452035e6244194f3778e4a558435f3;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index c79ffddf6b5..e0a93149916 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -194,19 +194,40 @@ multiclass vfp_ldst_mult; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; +defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; -} // neverHasSideEffects +} // hasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +// FLDM/FSTM - Load / Store multiple single / double precision registers for +// pre-ARMv6 cores. +// These instructions are deprecated! +def : VFP2MnemonicAlias<"fldmias", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">; +def : VFP2MnemonicAlias<"fldmeas", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfds", "vldmia">; +def : VFP2MnemonicAlias<"fldmiad", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">; +def : VFP2MnemonicAlias<"fldmead", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfdd", "vldmia">; + +def : VFP2MnemonicAlias<"fstmias", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">; +def : VFP2MnemonicAlias<"fstmeas", "vstmia">; +def : VFP2MnemonicAlias<"fstmfds", "vstmdb">; +def : VFP2MnemonicAlias<"fstmiad", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; +def : VFP2MnemonicAlias<"fstmead", "vstmia">; +def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; + def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, @@ -247,7 +268,7 @@ multiclass vfp_ldstx_mult { AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 1; + let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } } @@ -255,6 +276,12 @@ multiclass vfp_ldstx_mult { defm FLDM : vfp_ldstx_mult<"fldm", 1>; defm FSTM : vfp_ldstx_mult<"fstm", 0>; +def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">; +def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">; + +def : VFP2MnemonicAlias<"fstmeax", "fstmiax">; +def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">; + //===----------------------------------------------------------------------===// // FP Binary Operations. // @@ -333,9 +360,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +multiclass vsel_inst opc, int CC> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "", + Uses = [CPSR], AddedComplexity = 4 in { + def S : ASbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11100, opc, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +// The CC constants here match ARMCC::CondCodes. +defm VSELGT : vsel_inst<"gt", 0b11, 12>; +defm VSELGE : vsel_inst<"ge", 0b10, 10>; +defm VSELEQ : vsel_inst<"eq", 0b00, 0>; +defm VSELVS : vsel_inst<"vs", 0b01, 6>; + +multiclass vmaxmin_inst { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11101, 0b00, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>; +defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), - (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, + Requires<[NoHonorSignDependentRounding,HasDPVFP]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; @@ -445,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } // Special case encoding: bits 11-8 is 0b1011. @@ -466,9 +538,11 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{11-8} = 0b1011; let Inst{7-6} = 0b11; let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; } -// Between half-precision and single-precision. For disassembly only. +// Between half, single and double-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -479,12 +553,6 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : Pat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : Pat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; - def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; @@ -493,6 +561,128 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; +def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; +} + +def : Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + +def : Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + +multiclass vcvt_inst rm, + SDPatternOperator node = null_frag> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), + [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), + [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), + [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + + def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), + [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + } +} + +defm VCVTA : vcvt_inst<"a", 0b00, frnd>; +defm VCVTN : vcvt_inst<"n", 0b01>; +defm VCVTP : vcvt_inst<"p", 0b10, fceil>; +defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; + def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", @@ -507,6 +697,68 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +multiclass vrint_inst_zrx { + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{7} = op2; + let Inst{16} = op; + } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, + Requires<[HasFPARMv8]>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; + +multiclass vrint_inst_anpm rm, + SDPatternOperator node = null_frag> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{17-16} = rm; + } + } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm)>, + Requires<[HasFPARMv8]>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>; +defm VRINTN : vrint_inst_anpm<"n", 0b01>; +defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; +defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", @@ -517,7 +769,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>; @@ -525,7 +777,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. @@ -575,7 +827,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let D = VFPNeonDomain; } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", @@ -596,6 +848,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Rt = EXTRACT_SUBREG $Dm, ssub_0 + // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1 + let isExtractSubreg = 1; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -619,7 +876,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, let D = VFPNeonDomain; let DecoderMethod = "DecodeVMOVRRS"; } -} // neverHasSideEffects +} // hasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -644,9 +901,13 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1 + let isRegSequence = 1; } -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -696,6 +957,8 @@ class AVConv1IDs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -767,6 +1030,8 @@ class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Dm{4}; let Inst{15-12} = Sd{4-1}; let Inst{22} = Sd{0}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -888,6 +1153,8 @@ class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; let Inst{15-12} = dst{3-0}; + + let Predicates = [HasVFP2, HasDPVFP]; } def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, @@ -1000,7 +1267,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1016,7 +1283,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; @@ -1027,7 +1294,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1043,7 +1310,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1054,7 +1321,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1070,7 +1337,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1081,7 +1348,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1096,7 +1363,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1110,7 +1377,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1125,7 +1392,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1134,7 +1401,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma x, y, z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1145,7 +1412,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1160,7 +1427,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1169,14 +1436,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma (fneg x), y, z) -> (vfms z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma x, (fneg y), z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1187,7 +1454,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1202,7 +1469,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1211,14 +1478,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), // (fneg (fma x, y, z)) -> (vfnma z, x, y) def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1229,7 +1496,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1243,7 +1510,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1253,21 +1520,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // (fma x, y, (fneg z)) -> (vfnms z, x, y)) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1276,17 +1543,19 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // FP Conditional moves. // -let neverHasSideEffects = 1 in { -def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), - 4, IIC_fpUNA64, - [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; +let hasSideEffects = 0 in { +def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), + IIC_fpUNA64, + [(set (f64 DPR:$Dd), + (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, + RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>; -def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), - 4, IIC_fpUNA32, - [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd">; -} // neverHasSideEffects +def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), + IIC_fpUNA32, + [(set (f32 SPR:$Sd), + (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; +} // hasSideEffects //===----------------------------------------------------------------------===// // Move from VFP System Register to ARM core register. @@ -1331,6 +1600,12 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, mvfr0", []>; def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, mvfr1", []>; + def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>; + def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst", []>; + def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst2", []>; } //===----------------------------------------------------------------------===// @@ -1364,6 +1639,11 @@ let Defs = [FPSCR] in { // System level GPR -> FPSID def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), "vmsr", "\tfpsid, $src", []>; + + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst, $src", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst2, $src", []>; } //===----------------------------------------------------------------------===// @@ -1375,7 +1655,8 @@ let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$Dd, $imm", - [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + [(set DPR:$Dd, vfp_f64imm:$imm)]>, + Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; bits<8> imm; @@ -1412,7 +1693,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // -// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to // support them all, but supporting at least some of the basics is // good to be friendly. def : VFP2MnemonicAlias<"flds", "vldr">; @@ -1457,23 +1738,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">; def : VFP2MnemonicAlias<"fmxr", "vmsr">; // Be friendly and accept the old form of zero-compare -def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", - (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", - (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; // No need for the size suffix on VSQRT. It's implied by the register classes. def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", @@ -1508,3 +1789,14 @@ def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", // VMOVD does. def : VFP2InstAlias<"vmov${p} $Sd, $Sm", (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; + +// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32 +// These aliases provide added functionality over vmov.f instructions by +// allowing users to write assembly containing encoded floating point constants +// (e.g. #0x70 vs #1.0). Without these alises there is no way for the +// assembler to accept encoded fp constants (but the equivalent fp-literal is +// accepted directly by vmovf). +def : VFP3InstAlias<"fconstd${p} $Dd, $val", + (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>; +def : VFP3InstAlias<"fconsts${p} $Sd, $val", + (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;