X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FAArch64%2FAArch64InstrFormats.td;h=5eef82153e39660e6a8744ea9e9b874b53265484;hp=106f2158909635cfa4aca6d86cdaebff21965cd4;hb=ce8e2a0d91724b07d3a262c6005e5705a3b7837e;hpb=a3b8caf8a739fe19ab27259dc95a9771c11df16f diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 106f2158909..5eef82153e3 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -248,6 +248,12 @@ def simm7s16 : Operand { let PrintMethod = "printImmScale<16>"; } +def am_indexed7s8 : ComplexPattern; +def am_indexed7s16 : ComplexPattern; +def am_indexed7s32 : ComplexPattern; +def am_indexed7s64 : ComplexPattern; +def am_indexed7s128 : ComplexPattern; + class AsmImmRange : AsmOperandClass { let Name = "Imm" # Low # "_" # High; let DiagnosticType = "InvalidImm" # Low # "_" # High; @@ -346,9 +352,11 @@ class fixedpoint_i64 let ParserMatchClass = Imm1_64Operand; } +def fixedpoint_f16_i32 : fixedpoint_i32; def fixedpoint_f32_i32 : fixedpoint_i32; def fixedpoint_f64_i32 : fixedpoint_i32; +def fixedpoint_f16_i64 : fixedpoint_i64; def fixedpoint_f32_i64 : fixedpoint_i64; def fixedpoint_f64_i64 : fixedpoint_i64; @@ -402,6 +410,7 @@ def vecshiftR64Narrow : Operand, ImmLeaf; def Imm0_7Operand : AsmImmRange<0, 7>; def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_31Operand : AsmImmRange<0, 31>; @@ -441,11 +450,11 @@ def vecshiftL64 : Operand, ImmLeafgetZExtValue(), 32); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>; def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>; let DiagnosticType = "LogicalSecondSource" in { @@ -525,6 +534,20 @@ def imm0_31 : Operand, ImmLeaf, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// imm0_1 predicate - True if the immediate is in the range [0,1] +def imm0_1 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, ImmLeaf, ImmLeaf; +}]> { + let ParserMatchClass = Imm0_15Operand; +} // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr @@ -614,10 +639,15 @@ def move_vec_shift : Operand { let ParserMatchClass = MoveVecShifterOperand; } -def AddSubImmOperand : AsmOperandClass { - let Name = "AddSubImm"; - let ParserMethod = "tryParseAddSubImm"; - let DiagnosticType = "AddSubSecondSource"; +let DiagnosticType = "AddSubSecondSource" in { + def AddSubImmOperand : AsmOperandClass { + let Name = "AddSubImm"; + let ParserMethod = "tryParseAddSubImm"; + } + def AddSubImmNegOperand : AsmOperandClass { + let Name = "AddSubImmNeg"; + let ParserMethod = "tryParseAddSubImm"; + } } // An ADD/SUB immediate shifter operand: // second operand: @@ -631,8 +661,17 @@ class addsub_shifted_imm let MIOperandInfo = (ops i32imm, i32imm); } +class addsub_shifted_imm_neg + : Operand { + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmNegOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} + def addsub_shifted_imm32 : addsub_shifted_imm; def addsub_shifted_imm64 : addsub_shifted_imm; +def addsub_shifted_imm32_neg : addsub_shifted_imm_neg; +def addsub_shifted_imm64_neg : addsub_shifted_imm_neg; class neg_addsub_shifted_imm : Operand, ComplexPattern { @@ -676,13 +715,24 @@ class arith_extended_reg32to64 : Operand, } // Floating-point immediate. +def fpimm16 : Operand, + PatLeaf<(f16 fpimm), [{ + return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1; + }], SDNodeXFormgetValueAPF(); + uint32_t enc = AArch64_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} def fpimm32 : Operand, PatLeaf<(f32 fpimm), [{ return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1; }], SDNodeXFormgetValueAPF(); uint32_t enc = AArch64_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; @@ -693,7 +743,7 @@ def fpimm64 : Operand, }], SDNodeXFormgetValueAPF(); uint32_t enc = AArch64_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; @@ -768,7 +818,7 @@ def simdimmtype10 : Operand, uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() .bitcastToAPInt() .getZExtValue()); - return CurDAG->getTargetConstant(enc, MVT::i32); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>> { let ParserMatchClass = SIMDImmType10Operand; let PrintMethod = "printSIMDType10Operand"; @@ -808,7 +858,7 @@ class RtSystemI // model patterns with sufficiently fine granularity let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in class HintI - : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "", + : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "", [(int_aarch64_hint imm0_127:$imm)]>, Sched<[WriteHint]> { bits <7> imm; @@ -843,7 +893,7 @@ def MRSSystemRegisterOperand : AsmOperandClass { let ParserMethod = "tryParseSysReg"; let DiagnosticType = "MRS"; } -// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate. +// concatenation of op0, op1, CRn, CRm, op2. 16-bit immediate. def mrs_sysreg_op : Operand { let ParserMatchClass = MRSSystemRegisterOperand; let DecoderMethod = "DecodeMRSSystemRegister"; @@ -861,11 +911,29 @@ def msr_sysreg_op : Operand { let PrintMethod = "printMSRSystemRegister"; } +def PSBHintOperand : AsmOperandClass { + let Name = "PSBHint"; + let ParserMethod = "tryParsePSBHint"; +} +def psbhint_op : Operand { + let ParserMatchClass = PSBHintOperand; + let PrintMethod = "printPSBHintOp"; + let MCOperandPredicate = [{ + // Check, if operand is valid, to fix exhaustive aliasing in disassembly. + // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields. + if (!MCOp.isImm()) + return false; + bool ValidNamed; + (void)AArch64PSBHint::PSBHintMapper().toString(MCOp.getImm(), + STI.getFeatureBits(), ValidNamed); + return ValidNamed; + }]; +} + class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), "mrs", "\t$Rt, $systemreg"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; + bits<16> systemreg; + let Inst{20-5} = systemreg; } // FIXME: Some of these def NZCV, others don't. Best way to model that? @@ -873,24 +941,23 @@ class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), // would do it, but feels like overkill at this point. class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), "msr", "\t$systemreg, $Rt"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; + bits<16> systemreg; + let Inst{20-5} = systemreg; } -def SystemPStateFieldOperand : AsmOperandClass { - let Name = "SystemPStateField"; +def SystemPStateFieldWithImm0_15Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_15"; let ParserMethod = "tryParseSysReg"; } -def pstatefield_op : Operand { - let ParserMatchClass = SystemPStateFieldOperand; +def pstatefield4_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_15Operand; let PrintMethod = "printSystemPStateField"; } let Defs = [NZCV] in -class MSRpstateI - : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm), - "msr", "\t$pstate_field, $imm">, +class MSRpstateImm0_15 + : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm), + "msr", "\t$pstatefield, $imm">, Sched<[WriteSys]> { bits<6> pstatefield; bits<4> imm; @@ -901,6 +968,37 @@ class MSRpstateI let Inst{7-5} = pstatefield{2-0}; let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; +} + +def SystemPStateFieldWithImm0_1Operand : AsmOperandClass { + let Name = "SystemPStateFieldWithImm0_1"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield1_op : Operand { + let ParserMatchClass = SystemPStateFieldWithImm0_1Operand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateImm0_1 + : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm), + "msr", "\t$pstatefield, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bit imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-9} = 0b0100000; + let Inst{8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; + // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns + // Fail the decoder should attempt to decode the instruction as MSRI. + let hasCompleteDecoder = 0; } // SYS and SYSL generic system instructions. @@ -1329,7 +1427,7 @@ multiclass Shift shift_type, string asm, SDNode OpNode> { } class ShiftAlias - : InstAlias; class BaseMulAccum opc, RegisterClass multype, @@ -1351,14 +1449,15 @@ class BaseMulAccum opc, RegisterClass multype, } multiclass MulAccum { + // MADD/MSUB generation is decided by MachineCombiner.cpp def Wrrr : BaseMulAccum, + [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>, Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 0; } def Xrrr : BaseMulAccum, + [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>, Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } @@ -1394,13 +1493,13 @@ class MulHi opc, string asm, SDNode OpNode> } class MulAccumWAlias - : InstAlias; class MulAccumXAlias - : InstAlias; class WideMulAccumAlias - : InstAlias; class BaseCRC32 sz, bit C, RegisterClass StreamReg, @@ -1630,18 +1729,24 @@ class BaseAddSubEReg64 - : InstAlias; -multiclass AddSub { let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in { // Add/Subtract immediate + // Increase the weight of the immediate variant to try to match it before + // the extended register variant. + // We used to match the register variant before the immediate when the + // register argument could be implicitly zero-extended. + let AddedComplexity = 6 in def Wri : BaseAddSubImm { let Inst{31} = 0; } + let AddedComplexity = 6 in def Xri : BaseAddSubImm { let Inst{31} = 1; @@ -1681,6 +1786,14 @@ multiclass AddSub sub Rd, Rn, imm + def : InstAlias(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + // Register/register aliases with no shift when SP is not used. def : AddSubRegAlias(NAME#"Wrs"), GPR32, GPR32, GPR32, 0>; @@ -1701,7 +1814,8 @@ multiclass AddSub; // UXTX #0 } -multiclass AddSubS { +multiclass AddSubS { let isCompare = 1, Defs = [NZCV] in { // Add/Subtract immediate def Wri : BaseAddSubImm { } } // Defs = [NZCV] + // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm + def : InstAlias(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn, + addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn, + addsub_shifted_imm64_neg:$imm), 0>; + // Compare aliases - def : InstAlias(NAME#"Wri") + def : InstAlias(NAME#"Wri") WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; - def : InstAlias(NAME#"Xri") + def : InstAlias(NAME#"Xri") XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; - def : InstAlias(NAME#"Wrx") + def : InstAlias(NAME#"Wrx") WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx") + def : InstAlias(NAME#"Xrx") XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; - def : InstAlias(NAME#"Xrx64") + def : InstAlias(NAME#"Xrx64") XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; - def : InstAlias(NAME#"Wrs") + def : InstAlias(NAME#"Wrs") WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; - def : InstAlias(NAME#"Xrs") + def : InstAlias(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; + // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm + def : InstAlias(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>; + def : InstAlias(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>; + // Compare shorthands - def : InstAlias(NAME#"Wrs") + def : InstAlias(NAME#"Wrs") WZR, GPR32:$src1, GPR32:$src2, 0), 5>; - def : InstAlias(NAME#"Xrs") + def : InstAlias(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, 0), 5>; - def : InstAlias(NAME#"Wrx") + def : InstAlias(NAME#"Wrx") WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; - def : InstAlias(NAME#"Xrx64") + def : InstAlias(NAME#"Xrx64") XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; // Register/register aliases with no shift when SP is not used. @@ -1956,7 +2084,7 @@ class BaseLogicalSReg opc, bit N, RegisterClass regtype, // Aliases for register+register logical instructions. class LogicalRegAlias - : InstAlias; multiclass LogicalImm opc, string mnemonic, SDNode OpNode, @@ -1975,10 +2103,10 @@ multiclass LogicalImm opc, string mnemonic, SDNode OpNode, let Inst{31} = 1; } - def : InstAlias(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, logical_imm32_not:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, logical_imm64_not:$imm), 0>; } @@ -1997,10 +2125,10 @@ multiclass LogicalImmS opc, string mnemonic, SDNode OpNode, } } // end Defs = [NZCV] - def : InstAlias(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, logical_imm32_not:$imm), 0>; - def : InstAlias(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, logical_imm64_not:$imm), 0>; } @@ -2063,9 +2191,12 @@ multiclass LogicalRegS opc, bit N, string mnemonic, //--- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, +class BaseCondComparisonImm + : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $imm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2085,19 +2216,13 @@ class BaseCondSetFlagsImm let Inst{3-0} = nzcv; } -multiclass CondSetFlagsImm { - def Wi : BaseCondSetFlagsImm { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm { - let Inst{31} = 1; - } -} - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseCondComparisonReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", + [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv), + (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { let Uses = [NZCV]; let Defs = [NZCV]; @@ -2117,11 +2242,19 @@ class BaseCondSetFlagsReg let Inst{3-0} = nzcv; } -multiclass CondSetFlagsReg { - def Wr : BaseCondSetFlagsReg { +multiclass CondComparison { + // immediate operand variants + def Wi : BaseCondComparisonImm { let Inst{31} = 0; } - def Xr : BaseCondSetFlagsReg { + def Xi : BaseCondComparisonImm { + let Inst{31} = 1; + } + // register operand variants + def Wr : BaseCondComparisonReg { + let Inst{31} = 0; + } + def Xr : BaseCondComparisonReg { let Inst{31} = 1; } } @@ -2187,7 +2320,8 @@ class BaseCondSelectOp op2, RegisterClass regtype, string asm, def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), SDLoc(N), + MVT::i32); }]>; multiclass CondSelectOp op2, string asm, PatFrag frag> { @@ -2285,7 +2419,7 @@ multiclass LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteLD]>; - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2297,7 +2431,7 @@ multiclass StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteST]>; - def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2465,7 +2599,7 @@ class LoadStore8RO sz, bit V, bits<2> opc, RegisterClass regtype, } class ROInstAlias - : InstAlias; multiclass Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, @@ -2891,7 +3025,7 @@ multiclass LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2903,7 +3037,7 @@ multiclass StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, asm, pattern>, Sched<[WriteST]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2915,7 +3049,7 @@ multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, asm, pat>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; } @@ -2950,7 +3084,7 @@ multiclass LoadUnprivileged sz, bit V, bits<2> opc, (ins GPR64sp:$Rn, simm9:$offset), asm>, Sched<[WriteLD]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2962,7 +3096,7 @@ multiclass StoreUnprivileged sz, bit V, bits<2> opc, asm>, Sched<[WriteST]>; - def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; } @@ -2997,7 +3131,7 @@ class LoadPreIdx sz, bit V, bits<2> opc, RegisterClass regtype, : BaseLoadStorePreIdx, + "$Rn = $wback,@earlyclobber $wback", []>, Sched<[WriteLD, WriteAdr]>; let mayStore = 1, mayLoad = 0 in @@ -3006,7 +3140,7 @@ class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, : BaseLoadStorePreIdx, Sched<[WriteAdr, WriteST]>; @@ -3016,7 +3150,6 @@ class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, // Load/store post-indexed //--- -// (pre-index) load/stores. class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, string asm, string cstr, list pat> : I { @@ -3044,7 +3177,7 @@ class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, : BaseLoadStorePostIdx, + asm, "$Rn = $wback,@earlyclobber $wback", []>, Sched<[WriteLD, WriteI]>; let mayStore = 1, mayLoad = 0 in @@ -3053,7 +3186,7 @@ class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, : BaseLoadStorePostIdx, Sched<[WriteAdr, WriteST, ReadAdrBase]>; @@ -3094,7 +3227,7 @@ multiclass LoadPairOffset opc, bit V, RegisterClass regtype, (ins GPR64sp:$Rn, indextype:$offset), asm>, Sched<[WriteLD, WriteLDHi]>; - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, 0)>; } @@ -3109,7 +3242,7 @@ multiclass StorePairOffset opc, bit V, RegisterClass regtype, asm>, Sched<[WriteSTP]>; - def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, 0)>; } @@ -3117,7 +3250,7 @@ multiclass StorePairOffset opc, bit V, RegisterClass regtype, // (pre-indexed) class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { + : I { bits<5> Rt; bits<5> Rt2; bits<5> Rn; @@ -3158,7 +3291,7 @@ class StorePairPreIdx opc, bit V, RegisterClass regtype, class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, string asm> - : I { + : I { bits<5> Rt; bits<5> Rt2; bits<5> Rn; @@ -3188,8 +3321,8 @@ class LoadPairPostIdx opc, bit V, RegisterClass regtype, let mayStore = 1, mayLoad = 0 in class StorePairPostIdx opc, bit V, RegisterClass regtype, Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, Sched<[WriteAdr, WriteSTP]>; @@ -3284,6 +3417,10 @@ class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, : BaseLoadStoreExclusive { bits<5> Rt; bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; let Inst{9-5} = Rn; let Inst{4-0} = Rt; @@ -3431,6 +3568,20 @@ class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { + // Unscaled half-precision to 32-bit + def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + let Predicates = [HasFullFP16]; + } + + // Unscaled half-precision to 64-bit + def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR16:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + // Unscaled single-precision to 32-bit def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { @@ -3458,6 +3609,25 @@ multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { + // Scaled half-precision to 32-bit + def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, + fixedpoint_f16_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + + // Scaled half-precision to 64-bit + def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, + fixedpoint_f16_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn, + fixedpoint_f16_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + let Predicates = [HasFullFP16]; + } + // Scaled single-precision to 32-bit def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, fixedpoint_f32_i32, asm, @@ -3507,7 +3677,7 @@ class BaseIntegerToFP Rd; bits<5> Rn; bits<6> scale; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21-17} = 0b00001; let Inst{16} = isUnsigned; let Inst{15-10} = scale; @@ -3524,7 +3694,7 @@ class BaseIntegerToFPUnscaled Rd; bits<5> Rn; bits<6> scale; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21-17} = 0b10001; let Inst{16} = isUnsigned; let Inst{15-10} = 0b000000; @@ -3534,33 +3704,55 @@ class BaseIntegerToFPUnscaled { // Unscaled + def UWHri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def UWSri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def UWDri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def UXHri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; } def UXSri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def UXDri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } // Scaled + def SWHri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let scale{5} = 1; + let Predicates = [HasFullFP16]; + } + def SWSri: BaseIntegerToFP { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag let scale{5} = 1; } @@ -3569,16 +3761,25 @@ multiclass IntegerToFP { (fdiv (node GPR32:$Rn), fixedpoint_f64_i32:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag let scale{5} = 1; } + def SXHri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def SXSri: BaseIntegerToFP { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def SXDri: BaseIntegerToFP { (fdiv (node GPR64:$Rn), fixedpoint_f64_i64:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } } @@ -3608,7 +3809,7 @@ class BaseUnscaledConversion rmode, bits<3> opcode, Sched<[WriteFCopy]> { bits<5> Rd; bits<5> Rn; - let Inst{30-23} = 0b00111100; + let Inst{30-24} = 0b0011110; let Inst{21} = 1; let Inst{20-19} = rmode; let Inst{18-16} = opcode; @@ -3658,26 +3859,49 @@ class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, } - multiclass UnscaledConversion { + def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; + } + + def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + let Predicates = [HasFullFP16]; } def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag } def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag } def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, @@ -3750,7 +3974,7 @@ class BaseSingleOperandFPData opcode, RegisterClass regtype, Sched<[WriteF]> { bits<5> Rd; bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21-19} = 0b100; let Inst{18-15} = opcode; let Inst{14-10} = 0b10000; @@ -3760,12 +3984,17 @@ class BaseSingleOperandFPData opcode, RegisterClass regtype, multiclass SingleOperandFPData opcode, string asm, SDPatternOperator node = null_frag> { + def Hr : BaseSingleOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Sr : BaseSingleOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Dr : BaseSingleOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3782,7 +4011,7 @@ class BaseTwoOperandFPData opcode, RegisterClass regtype, bits<5> Rd; bits<5> Rn; bits<5> Rm; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = opcode; @@ -3793,28 +4022,41 @@ class BaseTwoOperandFPData opcode, RegisterClass regtype, multiclass TwoOperandFPData opcode, string asm, SDPatternOperator node = null_frag> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { + def Hrr : BaseTwoOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3832,7 +4074,7 @@ class BaseThreeOperandFPData Rn; bits<5> Rm; bits<5> Ra; - let Inst{31-23} = 0b000111110; + let Inst{31-24} = 0b00011111; let Inst{21} = isNegated; let Inst{20-16} = Rm; let Inst{15} = isSub; @@ -3843,16 +4085,23 @@ class BaseThreeOperandFPData { + def Hrrr : BaseThreeOperandFPData { + let Inst{23-22} = 0b11; // 16-bit size flag + let Predicates = [HasFullFP16]; + } + def Srrr : BaseThreeOperandFPData { - let Inst{22} = 0; // 32-bit size flag + let Inst{23-22} = 0b00; // 32-bit size flag } def Drrr : BaseThreeOperandFPData { - let Inst{22} = 1; // 64-bit size flag + let Inst{23-22} = 0b01; // 64-bit size flag } } @@ -3867,7 +4116,7 @@ class BaseOneOperandFPComparison, Sched<[WriteFCmp]> { bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{15-10} = 0b001000; @@ -3886,7 +4135,7 @@ class BaseTwoOperandFPComparison { bits<5> Rm; bits<5> Rn; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-10} = 0b001000; @@ -3898,24 +4147,36 @@ class BaseTwoOperandFPComparison { let Defs = [NZCV] in { + def Hrr : BaseTwoOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + + def Hri : BaseOneOperandFPComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Srr : BaseTwoOperandFPComparison { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Sri : BaseOneOperandFPComparison { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Drr : BaseTwoOperandFPComparison { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } def Dri : BaseOneOperandFPComparison { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } // Defs = [NZCV] } @@ -3925,17 +4186,20 @@ multiclass FPComparison - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, +class BaseFPCondComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), + mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>, Sched<[WriteFCmp]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + bits<5> Rn; bits<5> Rm; bits<4> nzcv; bits<4> cond; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = cond; @@ -3945,16 +4209,24 @@ class BaseFPCondComparison { - let Defs = [NZCV], Uses = [NZCV] in { - def Srr : BaseFPCondComparison { - let Inst{22} = 0; +multiclass FPCondComparison { + def Hrr : BaseFPCondComparison { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; } - def Drr : BaseFPCondComparison { - let Inst{22} = 1; + def Srr : BaseFPCondComparison { + let Inst{23-22} = 0b00; + } + + def Drr : BaseFPCondComparison { + let Inst{23-22} = 0b01; } - } // Defs = [NZCV], Uses = [NZCV] } //--- @@ -3973,7 +4245,7 @@ class BaseFPCondSelect bits<5> Rm; bits<4> cond; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-16} = Rm; let Inst{15-12} = cond; @@ -3984,12 +4256,17 @@ class BaseFPCondSelect multiclass FPCondSelect { let Uses = [NZCV] in { + def Hrrr : BaseFPCondSelect { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Srrr : BaseFPCondSelect { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Drrr : BaseFPCondSelect { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } // Uses = [NZCV] } @@ -4004,7 +4281,7 @@ class BaseFPMoveImmediate Sched<[WriteFImm]> { bits<5> Rd; bits<8> imm; - let Inst{31-23} = 0b000111100; + let Inst{31-24} = 0b00011110; let Inst{21} = 1; let Inst{20-13} = imm; let Inst{12-5} = 0b10000000; @@ -4012,12 +4289,17 @@ class BaseFPMoveImmediate } multiclass FPMoveImmediate { + def Hi : BaseFPMoveImmediate { + let Inst{23-22} = 0b11; + let Predicates = [HasFullFP16]; + } + def Si : BaseFPMoveImmediate { - let Inst{22} = 0; + let Inst{23-22} = 0b00; } def Di : BaseFPMoveImmediate { - let Inst{22} = 1; + let Inst{23-22} = 0b01; } } } // end of 'let Predicates = [HasFPARMv8]' @@ -4385,7 +4667,7 @@ class BaseSIMDVectorLShiftLongBySize size, } multiclass SIMDVectorLShiftLongBySizeBHS { - let neverHasSideEffects = 1 in { + let hasSideEffects = 0 in { def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, "shll", ".8h", ".8b", "8">; def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, @@ -4723,17 +5005,17 @@ multiclass SIMDFPCmpTwoVector opc, asm, ".2d", "0.0", v2i64, v2f64, OpNode>; - def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; - def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; - def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; } @@ -5262,6 +5544,10 @@ multiclass SIMDZipVectoropc, string asm, def v2i64 : BaseSIMDZipVector<0b111, opc, V128, asm, ".2d", OpNode, v2i64>; + def : Pat<(v4f16 (OpNode V64:$Rn, V64:$Rm)), + (!cast(NAME#"v4i16") V64:$Rn, V64:$Rm)>; + def : Pat<(v8f16 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v8i16") V128:$Rn, V128:$Rm)>; def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), (!cast(NAME#"v2i32") V64:$Rn, V64:$Rm)>; def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), @@ -5296,6 +5582,27 @@ class BaseSIMDThreeScalar size, bits<5> opcode, let Inst{4-0} = Rd; } +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + multiclass SIMDThreeScalarD opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDThreeScalar opc, string asm, def v1i16 : BaseSIMDThreeScalar; } +multiclass SIMDThreeScalarHSTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied; + def v1i16: BaseSIMDThreeScalarTied; +} + multiclass SIMDThreeScalarSD opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -5481,14 +5798,14 @@ multiclass SIMDCmpTwoScalarD opc, string asm, (!cast(NAME # v1i64rz) FPR64:$Rn)>; } -multiclass SIMDCmpTwoScalarSD opc, string asm, +multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { def v1i64rz : BaseSIMDCmpTwoScalar; def v1i32rz : BaseSIMDCmpTwoScalar; - def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; - def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), @@ -5504,7 +5821,7 @@ multiclass SIMDTwoScalarD opc, string asm, (!cast(NAME # "v1i64") FPR64:$Rn)>; } -multiclass SIMDTwoScalarSD opc, string asm> { +multiclass SIMDFPTwoScalar opc, string asm> { def v1i64 : BaseSIMDTwoScalar; def v1i32 : BaseSIMDTwoScalar; } @@ -5587,7 +5904,7 @@ multiclass SIMDPairwiseScalarD opc, string asm> { asm, ".2d">; } -multiclass SIMDPairwiseScalarSD opc, string asm> { +multiclass SIMDFPPairwiseScalar opc, string asm> { def v2i32p : BaseSIMDPairwiseScalar; def v2i64p : BaseSIMDPairwiseScalar : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # - # "|" # size #" $dst$idx, $src$idx2}", + # "|" # size #"\t$dst$idx, $src$idx2}", (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; @@ -5883,7 +6200,7 @@ multiclass SIMDIns { let Inst{20-18} = idx; let Inst{17-16} = 0b10; let Inst{14-12} = idx2; - let Inst{11} = 0; + let Inst{11} = {?}; } def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { bits<2> idx; @@ -5891,7 +6208,7 @@ multiclass SIMDIns { let Inst{20-19} = idx; let Inst{18-16} = 0b100; let Inst{14-13} = idx2; - let Inst{12-11} = 0; + let Inst{12-11} = {?,?}; } def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { bits<1> idx; @@ -5899,7 +6216,7 @@ multiclass SIMDIns { let Inst{20} = idx; let Inst{19-16} = 0b1000; let Inst{14} = idx2; - let Inst{13-11} = 0; + let Inst{13-11} = {?,?,?}; } // For all forms of the INS instruction, the "mov" mnemonic is the @@ -6357,8 +6674,8 @@ class BaseSIMDIndexedTied size, bits<4> opc, let Inst{4-0} = Rd; } -multiclass SIMDFPIndexedSD opc, string asm, - SDPatternOperator OpNode> { +multiclass SIMDFPIndexed opc, string asm, + SDPatternOperator OpNode> { def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, @@ -6420,7 +6737,7 @@ multiclass SIMDFPIndexedSD opc, string asm, } } -multiclass SIMDFPIndexedSDTiedPatterns { +multiclass SIMDFPIndexedTiedPatterns { // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (AArch64duplane32 (v4f32 V128:$Rm), @@ -6472,7 +6789,7 @@ multiclass SIMDFPIndexedSDTiedPatterns { V128:$Rm, VectorIndexD:$idx)>; } -multiclass SIMDFPIndexedSDTied opc, string asm> { +multiclass SIMDFPIndexedTied opc, string asm> { def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, asm, ".2s", ".2s", ".2s", ".s", []> { @@ -8514,6 +8831,174 @@ multiclass SIMDLdSt4SingleAliases { } } // end of 'let Predicates = [HasNEON]' +//---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasV8_1a] in { + +class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list pattern> + : BaseSIMDThreeSameVectorTied { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (v2i32 (!cast(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasV8_1a] + //---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- @@ -8610,6 +9095,178 @@ class SHAInstSS opc, string asm, Intrinsic OpNode> [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>; } // end of 'let Predicates = [HasCrypto]' +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * SWP +// * LDOPregister, and aliases STOPregister + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// ST SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |11111 + +// Instruction syntax: +// +// CAS{}[] , , [] +// CAS{} , , [] +// CASP{} , , , , [] +// CASP{} , , , , [] +// SWP{}[] , , [] +// SWP{} , , [] +// LD{}[] , , [] +// LD{} , , [] +// ST{}[] , [] +// ST{} , [] + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding pattern> + : I { + bits<2> Sz; + bit NP; + bit Acq; + bit Rel; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = Acq; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = Rel; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class BaseCAS + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 1; +} + +multiclass CompareAndSwap Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS; +} + +class BaseCASP + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 0; +} + +multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in + def s : BaseCASP; + let Sz = 0b01, Acq = Acq, Rel = Rel in + def d : BaseCASP; +} + +let Predicates = [HasV8_1a] in +class BaseSWP + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Swap Acq, bits<1> Rel, string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP; + let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP; + let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP; + let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP; +} + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOPregister + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, + string order> { + let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in + def b : BaseLDOPregister; + let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in + def h : BaseLDOPregister; + let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in + def s : BaseLDOPregister; + let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in + def d : BaseLDOPregister; +} + +let Predicates = [HasV8_1a] in +class BaseSTOPregister : + InstAlias; + +multiclass STOPregister { + def : BaseSTOPregister(instr # "Lb")>; + def : BaseSTOPregister(instr # "Lh")>; + def : BaseSTOPregister(instr # "Ls")>; + def : BaseSTOPregister(instr # "Ld")>; + def : BaseSTOPregister(instr # "b")>; + def : BaseSTOPregister(instr # "h")>; + def : BaseSTOPregister(instr # "s")>; + def : BaseSTOPregister(instr # "d")>; +} + +//---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".8B", ".8b">; def : TokenAlias<".4H", ".4h">;