X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMInstrVFP.td;h=63e7940bb14ee7cee5ed91182c0ab9ccb7271ac8;hp=e3ab30419fac26f1ab861bf2815c19f1b5648865;hb=f1f1b483b6fd0324c325ef50cdc60d28bf38138c;hpb=652199961a8acf3314b15bb3d5133e8a9a56b615 diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e3ab30419fa..63e7940bb14 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -11,22 +11,15 @@ // //===----------------------------------------------------------------------===// -def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; -def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; - //===----------------------------------------------------------------------===// // Operand Definitions. // @@ -43,7 +36,7 @@ def vfp_f32imm : Operand, }], SDNodeXFormgetValueAPF(); uint32_t enc = ARM_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; @@ -55,12 +48,21 @@ def vfp_f64imm : Operand, }], SDNodeXFormgetValueAPF(); uint32_t enc = ARM_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; } +def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; + +def alignedstore32 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 4; +}]>; + // The VCVT to/from fixed-point instructions encode the 'fbits' operand // (the number of fixed bits) differently than it appears in the assembly // source. It's encoded as "Size - fbits" where Size is the size of the @@ -86,11 +88,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", "\t$Dd, $addr", - [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; + [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", - [(set SPR:$Sd, (load addrmode5:$addr))]> { + [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -100,11 +102,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), IIC_fpStore64, "vstr", "\t$Dd, $addr", - [(store (f64 DPR:$Dd), addrmode5:$addr)]>; + [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", - [(store SPR:$Sd, addrmode5:$addr)]> { + [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -185,19 +187,40 @@ multiclass vfp_ldst_mult; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; +defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; -} // neverHasSideEffects +} // hasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +// FLDM/FSTM - Load / Store multiple single / double precision registers for +// pre-ARMv6 cores. +// These instructions are deprecated! +def : VFP2MnemonicAlias<"fldmias", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">; +def : VFP2MnemonicAlias<"fldmeas", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfds", "vldmia">; +def : VFP2MnemonicAlias<"fldmiad", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">; +def : VFP2MnemonicAlias<"fldmead", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfdd", "vldmia">; + +def : VFP2MnemonicAlias<"fstmias", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">; +def : VFP2MnemonicAlias<"fstmeas", "vstmia">; +def : VFP2MnemonicAlias<"fstmfds", "vstmdb">; +def : VFP2MnemonicAlias<"fstmiad", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; +def : VFP2MnemonicAlias<"fstmead", "vstmia">; +def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; + def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, @@ -215,7 +238,42 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", defm : VFPDTAnyInstAlias<"vpop${p}", "$r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; -// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores +// FLDMX, FSTMX - Load and store multiple unknown precision registers for +// pre-armv6 cores. +// These instruction are deprecated so we don't want them to get selected. +multiclass vfp_ldstx_mult { + // Unknown precision + def XIA : + AXXI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeNone, !strconcat(asm, "iax${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def XIA_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "iax${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def XDB_UPD : + AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } +} + +defm FLDM : vfp_ldstx_mult<"fldm", 1>; +defm FSTM : vfp_ldstx_mult<"fstm", 0>; + +def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">; +def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">; + +def : VFP2MnemonicAlias<"fstmeax", "fstmiax">; +def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">; //===----------------------------------------------------------------------===// // FP Binary Operations. @@ -295,9 +353,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +multiclass vsel_inst opc, int CC> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "", + Uses = [CPSR], AddedComplexity = 4 in { + def S : ASbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11100, opc, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +// The CC constants here match ARMCC::CondCodes. +defm VSELGT : vsel_inst<"gt", 0b11, 12>; +defm VSELGE : vsel_inst<"ge", 0b10, 10>; +defm VSELEQ : vsel_inst<"eq", 0b00, 0>; +defm VSELVS : vsel_inst<"vs", 0b01, 6>; + +multiclass vmaxmin_inst { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), + [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>, + Requires<[HasFPARMv8]>; + + def D : ADbInp<0b11101, 0b00, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), + [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>, + Requires<[HasFPARMv8, HasDPVFP]>; + } +} + +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>; +defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), - (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, + Requires<[NoHonorSignDependentRounding,HasDPVFP]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; @@ -407,6 +508,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } // Special case encoding: bits 11-8 is 0b1011. @@ -428,32 +531,175 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{11-8} = 0b1011; let Inst{7-6} = 0b11; let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; } -// Between half-precision and single-precision. For disassembly only. +// Between half, single and double-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; - -def : ARMPat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : ARMPat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; + +def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasFPARMv8, HasDPVFP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; +} + +def : Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + +def : Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + +multiclass vcvt_inst rm, + SDPatternOperator node = null_frag> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), + []>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), + []>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + + def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), + []>, + Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + + def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), + []>, + Requires<[HasFPARMv8, HasDPVFP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + } + + let Predicates = [HasFPARMv8] in { + def : Pat<(i32 (fp_to_sint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast(NAME#"SS") SPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node SPR:$a))), + (COPY_TO_REGCLASS + (!cast(NAME#"US") SPR:$a), + GPR)>; + } + let Predicates = [HasFPARMv8, HasDPVFP] in { + def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast(NAME#"SD") DPR:$a), + GPR)>; + def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))), + (COPY_TO_REGCLASS + (!cast(NAME#"UD") DPR:$a), + GPR)>; + } +} + +defm VCVTA : vcvt_inst<"a", 0b00, frnd>; +defm VCVTN : vcvt_inst<"n", 0b01>; +defm VCVTP : vcvt_inst<"p", 0b10, fceil>; +defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -469,6 +715,68 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +multiclass vrint_inst_zrx { + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{7} = op2; + let Inst{16} = op; + } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, + Requires<[HasFPARMv8]>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; + +multiclass vrint_inst_anpm rm, + SDPatternOperator node = null_frag> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { + let Inst{17-16} = rm; + } + def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { + let Inst{17-16} = rm; + } + } + + def : InstAlias(NAME#"S") SPR:$Sd, SPR:$Sm)>, + Requires<[HasFPARMv8]>; + def : InstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>, + Requires<[HasFPARMv8,HasDPVFP]>; +} + +defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>; +defm VRINTN : vrint_inst_anpm<"n", 0b01>; +defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; +defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", @@ -479,7 +787,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>; @@ -487,7 +795,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. @@ -514,10 +822,12 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let D = VFPNeonDomain; } +// Bitcast i32 -> f32. NEON prefers to use VMOVDRR. def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$Sn), (ins GPR:$Rt), IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", - [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + [(set SPR:$Sn, (bitconvert GPR:$Rt))]>, + Requires<[HasVFP2, UseVMOVSR]> { // Instruction operands. bits<5> Sn; bits<4> Rt; @@ -535,7 +845,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let D = VFPNeonDomain; } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", @@ -556,6 +866,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Rt = EXTRACT_SUBREG $Dm, ssub_0 + // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1 + let isExtractSubreg = 1; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -579,7 +894,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, let D = VFPNeonDomain; let DecoderMethod = "DecodeVMOVRRS"; } -} // neverHasSideEffects +} // hasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -604,9 +919,29 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; -} -let neverHasSideEffects = 1 in + // This instruction is equivalent to + // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1 + let isRegSequence = 1; +} + +// Hoist an fabs or a fneg of a value coming from integer registers +// and do the fabs/fneg on the integer value. This is never a lose +// and could enable the conversion to float to be removed completely. +def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, + Requires<[IsARM, HasV6T2]>; +def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>, + Requires<[IsThumb2, HasV6T2]>; +def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>, + Requires<[IsARM]>; +def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)), + (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>, + Requires<[IsThumb2]>; + +let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -656,6 +991,8 @@ class AVConv1IDs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -677,14 +1014,22 @@ class AVConv1InSs_Encode opcod1, bits<2> opcod2, bits<4> opcod3, def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> { + []> { let Inst{7} = 1; // s32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (sint_to_fp GPR:$a)), + (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), + (VSITOD (VLDRS addrmode5:$a))>; +} + def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd),(ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { + []> { let Inst{7} = 1; // s32 // Some single precision VFP instructions may be executed on both NEON and @@ -692,17 +1037,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), + (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), + (VSITOS (VLDRS addrmode5:$a))>; + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", - [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> { + []> { let Inst{7} = 0; // u32 } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(f64 (uint_to_fp GPR:$a)), + (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; + + def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), + (VUITOD (VLDRS addrmode5:$a))>; +} + def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { + []> { let Inst{7} = 0; // u32 // Some single precision VFP instructions may be executed on both NEON and @@ -710,6 +1069,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), + (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; + +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), + (VUITOS (VLDRS addrmode5:$a))>; + // FP -> Int: class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -727,6 +1092,8 @@ class AVConv1IsD_Encode opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{5} = Dm{4}; let Inst{15-12} = Sd{4-1}; let Inst{22} = Sd{0}; + + let Predicates = [HasVFP2, HasDPVFP]; } class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, @@ -750,14 +1117,22 @@ class AVConv1InsS_Encode opcod1, bits<2> opcod2, bits<4> opcod3, def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; + + def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -765,17 +1140,32 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), + (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), + addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", - [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> { + []> { let Inst{7} = 1; // Z bit } +let Predicates=[HasVFP2, HasDPVFP] in { + def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), + (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; + + def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; +} + def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", - [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { + []> { let Inst{7} = 1; // Z bit // Some single precision VFP instructions may be executed on both NEON and @@ -783,6 +1173,13 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, let D = VFPNeonA8Domain; } +def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), + (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; + +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), + addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. @@ -830,7 +1227,8 @@ let Constraints = "$a = $dst" in { class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : AVConv1XI { + : AVConv1XI, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{0}; @@ -841,11 +1239,14 @@ class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> - : AVConv1XI { + : AVConv1XI, + Sched<[WriteCvtFP]> { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); let Inst{22} = dst{4}; let Inst{15-12} = dst{3-0}; + + let Predicates = [HasVFP2, HasDPVFP]; } def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, @@ -958,7 +1359,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -974,7 +1375,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; @@ -985,7 +1386,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1001,7 +1402,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1012,7 +1413,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1028,7 +1429,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1039,7 +1440,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1054,7 +1455,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1068,7 +1469,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1083,7 +1484,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1092,7 +1493,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma x, y, z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1103,7 +1504,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1118,7 +1519,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1127,14 +1528,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), // (fma (fneg x), y, z) -> (vfms z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma x, (fneg y), z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1145,7 +1546,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1160,7 +1561,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1169,14 +1570,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), // (fneg (fma x, y, z)) -> (vfnma z, x, y) def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1187,7 +1588,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1201,7 +1602,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; @@ -1211,21 +1612,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // (fma x, y, (fneg z)) -> (vfnms z, x, y)) def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; // (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4]>; + Requires<[HasVFP4,HasDPVFP]>; def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1234,17 +1635,19 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // FP Conditional moves. // -let neverHasSideEffects = 1 in { -def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), - 4, IIC_fpUNA64, - [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; +let hasSideEffects = 0 in { +def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), + IIC_fpUNA64, + [(set (f64 DPR:$Dd), + (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, + RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>; -def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), - 4, IIC_fpUNA32, - [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd">; -} // neverHasSideEffects +def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), + IIC_fpUNA32, + [(set (f32 SPR:$Sd), + (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; +} // hasSideEffects //===----------------------------------------------------------------------===// // Move from VFP System Register to ARM core register. @@ -1289,6 +1692,12 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, mvfr0", []>; def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, mvfr1", []>; + def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>; + def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst", []>; + def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpinst2", []>; } //===----------------------------------------------------------------------===// @@ -1322,6 +1731,11 @@ let Defs = [FPSCR] in { // System level GPR -> FPSID def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), "vmsr", "\tfpsid, $src", []>; + + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst, $src", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src), + "vmsr", "\tfpinst2, $src", []>; } //===----------------------------------------------------------------------===// @@ -1333,7 +1747,8 @@ let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$Dd, $imm", - [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + [(set DPR:$Dd, vfp_f64imm:$imm)]>, + Requires<[HasVFP3,HasDPVFP]> { bits<5> Dd; bits<8> imm; @@ -1370,7 +1785,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // -// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to // support them all, but supporting at least some of the basics is // good to be friendly. def : VFP2MnemonicAlias<"flds", "vldr">; @@ -1415,23 +1830,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">; def : VFP2MnemonicAlias<"fmxr", "vmsr">; // Be friendly and accept the old form of zero-compare -def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", - (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", - (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; // No need for the size suffix on VSQRT. It's implied by the register classes. def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; -def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; +def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", @@ -1466,3 +1881,14 @@ def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", // VMOVD does. def : VFP2InstAlias<"vmov${p} $Sd, $Sm", (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; + +// FCONSTD/FCONSTS alias for vmov.f64/vmov.f32 +// These aliases provide added functionality over vmov.f instructions by +// allowing users to write assembly containing encoded floating point constants +// (e.g. #0x70 vs #1.0). Without these alises there is no way for the +// assembler to accept encoded fp constants (but the equivalent fp-literal is +// accepted directly by vmovf). +def : VFP3InstAlias<"fconstd${p} $Dd, $val", + (FCONSTD DPR:$Dd, vfp_f64imm:$val, pred:$p)>; +def : VFP3InstAlias<"fconsts${p} $Sd, $val", + (FCONSTS SPR:$Sd, vfp_f32imm:$val, pred:$p)>;