[SDTCisVec<0>]>>;
def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
+def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
//===----------------------------------------------------------------------===//
// Multiclasses
multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
string asmop, SDPatternOperator opnode8B,
SDPatternOperator opnode16B,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, size, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
(outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
SDPatternOperator opnode4S,
SDPatternOperator opnode2D,
ValueType ResTy2S, ValueType ResTy4S,
- ValueType ResTy2D, bit Commutable = 0>
-{
+ ValueType ResTy2D, bit Commutable = 0> {
let isCommutable = Commutable in {
def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm8_asmoperand;
- let PrintMethod = "printNeonUImm8Operand";
+ let PrintMethod = "printUImmHexOperand";
}
def neon_uimm64_mask_asmoperand : AsmOperandClass
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (v2i32 VPR64:$src),
(v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (v4i32 VPR128:$src),
(v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (v4i16 VPR64:$src),
(v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (v8i16 VPR128:$src),
(v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
Instruction inst, RegisterOperand VPRC>
- : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
+ : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
(inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
// Aliases for Vector Move Immediate Shifted
let ParserMatchClass = uimm6_asmoperand;
}
-// Shift Right Immediate - A shift right immediate is encoded differently from
-// other shift immediates. The immh:immb field is encoded like so:
+// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
+// as follows:
//
// Offset Encoding
// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+//
+// The shift right immediate amount, in the range 1 to element bits, is computed
+// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
+// to element bits - 1, is computed as UInt(immh:immb) - Offset.
+
class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
let Name = "ShrImm" # OFFSET;
let RenderMethod = "addImmOperands";
def shr_imm32 : shr_imm<"32">;
def shr_imm64 : shr_imm<"64">;
+class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShlImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShlImm" # OFFSET;
+}
+
+class shl_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftLeftImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
+}
+
+def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
+def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
+def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
+def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
+
+def shl_imm8 : shl_imm<"8">;
+def shl_imm16 : shl_imm<"16">;
+def shl_imm32 : shl_imm<"32">;
+def shl_imm64 : shl_imm<"64">;
+
class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
: NeonI_2VShiftImm<q, u, opcode,
// Variant 3
multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
- string asmop, SDPatternOperator opnode>
-{
+ string asmop, SDPatternOperator opnode> {
def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
(outs FPR32:$Rd), (ins VPR128:$Rn),
asmop # "\t$Rd, $Rn.4s",
defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
int_aarch64_neon_vminv>;
+// The followings are for instruction class (Perm)
+
+class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
+ string asmop, RegisterOperand OpVPR, string OpS>
+ : NeonI_Perm<q, size, opcode,
+ (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [], NoItinerary>;
+
+multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
+ def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
+ def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
+ def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
+ def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
+ def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
+ def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
+ def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
+}
+
+defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
+defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
+defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
+defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
+defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
+defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
+
+// Extract and Insert
+def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
+ (vector_insert node:$Rn,
+ (i32 (vector_extract node:$Rm, node:$Ext)),
+ node:$Ins)>;
+
+def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
+ (vector_insert node:$Rn,
+ (f32 (vector_extract node:$Rm, node:$Ext)),
+ node:$Ins)>;
+
+// uzp1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rn), 2, 1)),
+ (v16i8 VPR128:$Rn), 4, 2)),
+ (v16i8 VPR128:$Rn), 6, 3)),
+ (v16i8 VPR128:$Rn), 8, 4)),
+ (v16i8 VPR128:$Rn), 10, 5)),
+ (v16i8 VPR128:$Rn), 12, 6)),
+ (v16i8 VPR128:$Rn), 14, 7)),
+ (v16i8 VPR128:$Rm), 0, 8)),
+ (v16i8 VPR128:$Rm), 2, 9)),
+ (v16i8 VPR128:$Rm), 4, 10)),
+ (v16i8 VPR128:$Rm), 6, 11)),
+ (v16i8 VPR128:$Rm), 8, 12)),
+ (v16i8 VPR128:$Rm), 10, 13)),
+ (v16i8 VPR128:$Rm), 12, 14)),
+ (v16i8 VPR128:$Rm), 14, 15)),
+ (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rn), 2, 1)),
+ (Ty VPR:$Rn), 4, 2)),
+ (Ty VPR:$Rn), 6, 3)),
+ (Ty VPR:$Rm), 0, 4)),
+ (Ty VPR:$Rm), 2, 5)),
+ (Ty VPR:$Rm), 4, 6)),
+ (Ty VPR:$Rm), 6, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
+def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
+
+class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rn), 2, 1)),
+ (Ty VPR:$Rm), 0, 2)),
+ (Ty VPR:$Rm), 2, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
+def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
+def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
+
+// uzp2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 1, 0)),
+ (v16i8 VPR128:$Rn), 3, 1)),
+ (v16i8 VPR128:$Rn), 5, 2)),
+ (v16i8 VPR128:$Rn), 7, 3)),
+ (v16i8 VPR128:$Rn), 9, 4)),
+ (v16i8 VPR128:$Rn), 11, 5)),
+ (v16i8 VPR128:$Rn), 13, 6)),
+ (v16i8 VPR128:$Rn), 15, 7)),
+ (v16i8 VPR128:$Rm), 1, 8)),
+ (v16i8 VPR128:$Rm), 3, 9)),
+ (v16i8 VPR128:$Rm), 5, 10)),
+ (v16i8 VPR128:$Rm), 7, 11)),
+ (v16i8 VPR128:$Rm), 9, 12)),
+ (v16i8 VPR128:$Rm), 11, 13)),
+ (v16i8 VPR128:$Rm), 13, 14)),
+ (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 1)),
+ (Ty VPR:$Rn), 5, 2)),
+ (Ty VPR:$Rn), 7, 3)),
+ (Ty VPR:$Rm), 1, 4)),
+ (Ty VPR:$Rm), 3, 5)),
+ (Ty VPR:$Rm), 5, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
+def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
+
+class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 1)),
+ (Ty VPR:$Rm), 1, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
+def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
+def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
+
+// zip1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rm), 0, 1)),
+ (v16i8 VPR128:$Rn), 1, 2)),
+ (v16i8 VPR128:$Rm), 1, 3)),
+ (v16i8 VPR128:$Rn), 2, 4)),
+ (v16i8 VPR128:$Rm), 2, 5)),
+ (v16i8 VPR128:$Rn), 3, 6)),
+ (v16i8 VPR128:$Rm), 3, 7)),
+ (v16i8 VPR128:$Rn), 4, 8)),
+ (v16i8 VPR128:$Rm), 4, 9)),
+ (v16i8 VPR128:$Rn), 5, 10)),
+ (v16i8 VPR128:$Rm), 5, 11)),
+ (v16i8 VPR128:$Rn), 6, 12)),
+ (v16i8 VPR128:$Rm), 6, 13)),
+ (v16i8 VPR128:$Rn), 7, 14)),
+ (v16i8 VPR128:$Rm), 7, 15)),
+ (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rn), 1, 2)),
+ (Ty VPR:$Rm), 1, 3)),
+ (Ty VPR:$Rn), 2, 4)),
+ (Ty VPR:$Rm), 2, 5)),
+ (Ty VPR:$Rn), 3, 6)),
+ (Ty VPR:$Rm), 3, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
+def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
+
+class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rn), 1, 2)),
+ (Ty VPR:$Rm), 1, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
+def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
+def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
+
+// zip2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 8, 0)),
+ (v16i8 VPR128:$Rm), 8, 1)),
+ (v16i8 VPR128:$Rn), 9, 2)),
+ (v16i8 VPR128:$Rm), 9, 3)),
+ (v16i8 VPR128:$Rn), 10, 4)),
+ (v16i8 VPR128:$Rm), 10, 5)),
+ (v16i8 VPR128:$Rn), 11, 6)),
+ (v16i8 VPR128:$Rm), 11, 7)),
+ (v16i8 VPR128:$Rn), 12, 8)),
+ (v16i8 VPR128:$Rm), 12, 9)),
+ (v16i8 VPR128:$Rn), 13, 10)),
+ (v16i8 VPR128:$Rm), 13, 11)),
+ (v16i8 VPR128:$Rn), 14, 12)),
+ (v16i8 VPR128:$Rm), 14, 13)),
+ (v16i8 VPR128:$Rn), 15, 14)),
+ (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 4, 0)),
+ (Ty VPR:$Rm), 4, 1)),
+ (Ty VPR:$Rn), 5, 2)),
+ (Ty VPR:$Rm), 5, 3)),
+ (Ty VPR:$Rn), 6, 4)),
+ (Ty VPR:$Rm), 6, 5)),
+ (Ty VPR:$Rn), 7, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
+def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
+
+class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 2, 0)),
+ (Ty VPR:$Rm), 2, 1)),
+ (Ty VPR:$Rn), 3, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
+def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
+def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
+
+// trn1
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rn),
+ (v16i8 VPR128:$Rm), 0, 1)),
+ (v16i8 VPR128:$Rm), 2, 3)),
+ (v16i8 VPR128:$Rm), 4, 5)),
+ (v16i8 VPR128:$Rm), 6, 7)),
+ (v16i8 VPR128:$Rm), 8, 9)),
+ (v16i8 VPR128:$Rm), 10, 11)),
+ (v16i8 VPR128:$Rm), 12, 13)),
+ (v16i8 VPR128:$Rm), 14, 15)),
+ (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rm), 2, 3)),
+ (Ty VPR:$Rm), 4, 5)),
+ (Ty VPR:$Rm), 6, 7)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
+def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
+
+class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei
+ (Ty VPR:$Rn),
+ (Ty VPR:$Rm), 0, 1)),
+ (Ty VPR:$Rm), 2, 3)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
+def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
+def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
+
+// trn2
+def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
+ (v16i8 VPR128:$Rm),
+ (v16i8 VPR128:$Rn), 1, 0)),
+ (v16i8 VPR128:$Rn), 3, 2)),
+ (v16i8 VPR128:$Rn), 5, 4)),
+ (v16i8 VPR128:$Rn), 7, 6)),
+ (v16i8 VPR128:$Rn), 9, 8)),
+ (v16i8 VPR128:$Rn), 11, 10)),
+ (v16i8 VPR128:$Rn), 13, 12)),
+ (v16i8 VPR128:$Rn), 15, 14)),
+ (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
+
+class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
+ : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 2)),
+ (Ty VPR:$Rn), 5, 4)),
+ (Ty VPR:$Rn), 7, 6)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
+def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
+
+class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
+ PatFrag ei>
+ : Pat<(Ty (ei (Ty (ei
+ (Ty VPR:$Rm),
+ (Ty VPR:$Rn), 1, 0)),
+ (Ty VPR:$Rn), 3, 2)),
+ (INST VPR:$Rn, VPR:$Rm)>;
+
+def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
+def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
+def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
+
+// End of implementation for instruction class (Perm)
+
// The followings are for instruction class (3V Diff)
// normal long/long2 pattern
multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, sext, VPR64, v8i16, v8i8>;
}
}
-multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
}
}
-multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, zext, VPR64, v8i16, v8i8>;
}
}
-multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
(ResTy (ext (OpTy OpVPR:$Rm))))))],
NoItinerary>;
-multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, sext, VPR64, v8i16, v8i8>;
def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
-multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
-multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, zext, VPR64, v8i16, v8i8>;
def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
-multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
// Get the high half part of the vector element.
-multiclass NeonI_get_high
-{
+multiclass NeonI_get_high {
def _8h : PatFrag<(ops node:$Rn),
(v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
(v8i16 (Neon_vdup (i32 8)))))))>;
(OpTy VPR128:$Rm))))))],
NoItinerary>;
-multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
opnode, NI_get_hi_8h, v8i8, v8i16>;
NoItinerary>;
// normal narrow pattern
-multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
opnode, VPR64, VPR128, v8i8, v8i16>;
let neverHasSideEffects = 1;
}
-multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
- string asmop> {
+multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
(OpTy OpVPR:$Rm))))))],
NoItinerary>;
-multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, VPR64, v8i16, v8i8, v8i8>;
defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
-multiclass NeonI_Op_High<SDPatternOperator op>
-{
+multiclass NeonI_Op_High<SDPatternOperator op> {
def _16B : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v8i8 (Neon_High16B node:$Rn)), (v8i8 (Neon_High16B node:$Rm)))>;
+ (op (v8i8 (Neon_High16B node:$Rn)),
+ (v8i8 (Neon_High16B node:$Rm)))>;
def _8H : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v4i16 (Neon_High8H node:$Rn)), (v4i16 (Neon_High8H node:$Rm)))>;
+ (op (v4i16 (Neon_High8H node:$Rn)),
+ (v4i16 (Neon_High8H node:$Rm)))>;
def _4S : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v2i32 (Neon_High4S node:$Rn)), (v2i32 (Neon_High4S node:$Rm)))>;
-
+ (op (v2i32 (Neon_High4S node:$Rn)),
+ (v2i32 (Neon_High4S node:$Rm)))>;
}
defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
-multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
- string asmop, string opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
!cast<PatFrag>(opnode # "_16B"),
let Constraints = "$src = $Rd";
}
-multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- SDPatternOperator subop>
-{
+multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, SDPatternOperator subop>{
def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, subop, VPR64, v8i16, v8i8, v8i8>;
def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
add, int_arm_neon_vabdu>;
-multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- string subop>
-{
+multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, string subop> {
def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
opnode, !cast<PatFrag>(subop # "_16B"),
VPR128, v8i16, v16i8, v8i8>;
"NI_uabdl_hi">;
// Long pattern with 2 operands
-multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, VPR128, VPR64, v8i16, v8i8>;
(ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
NoItinerary>;
-
-multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
- string asmop,
- string opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
!cast<PatFrag>(opnode # "_16B"),
let Constraints = "$src = $Rd";
}
-multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, v8i16, v8i8>;
def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
let Constraints = "$src = $Rd";
}
-multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
- string asmop,
- SDPatternOperator subop,
- string opnode>
-{
+multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator subop, string opnode> {
def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
subop, !cast<PatFrag>(opnode # "_16B"),
VPR128, v8i16, v16i8>;
defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
sub, "NI_umull_hi">;
-multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode>
-{
+multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
opnode, int_arm_neon_vqdmull,
VPR64, v4i32, v4i16>;
defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
int_arm_neon_vqsubs>;
-multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
opnode, VPR128, VPR64, v4i32, v4i16>;
defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
int_arm_neon_vqdmull, 1>;
-multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
- string asmop,
- string opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
!cast<PatFrag>(opnode # "_8H"),
defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
"NI_qdmull_hi", 1>;
-multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
- string asmop,
- SDPatternOperator opnode>
-{
+multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
opnode, NI_qdmull_hi_8H,
VPR128, v4i32, v8i16>;
defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
int_arm_neon_vqsubs>;
-multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
opnode, VPR128, VPR64, v8i16, v8i8>;
defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
-multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
- string asmop,
- string opnode,
- bit Commutable = 0>
-{
+multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
!cast<PatFrag>(opnode # "_16B"),
}
}
-defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
- "NI_pmull_hi", 1>;
+defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
+ 1>;
// End of implementation for instruction class (3V Diff)
// End of vector load/store multiple N-element structure(class SIMD lselem)
+// The followings are post-index vector load/store multiple N-element
+// structure(class SIMD lselem-post)
+def exact8_asmoperand : AsmOperandClass {
+ let Name = "Exact8";
+ let PredicateMethod = "isExactImm<8>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
+ let ParserMatchClass = exact8_asmoperand;
+}
+
+def exact16_asmoperand : AsmOperandClass {
+ let Name = "Exact16";
+ let PredicateMethod = "isExactImm<16>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
+ let ParserMatchClass = exact16_asmoperand;
+}
+
+def exact24_asmoperand : AsmOperandClass {
+ let Name = "Exact24";
+ let PredicateMethod = "isExactImm<24>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
+ let ParserMatchClass = exact24_asmoperand;
+}
+
+def exact32_asmoperand : AsmOperandClass {
+ let Name = "Exact32";
+ let PredicateMethod = "isExactImm<32>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
+ let ParserMatchClass = exact32_asmoperand;
+}
+
+def exact48_asmoperand : AsmOperandClass {
+ let Name = "Exact48";
+ let PredicateMethod = "isExactImm<48>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
+ let ParserMatchClass = exact48_asmoperand;
+}
+
+def exact64_asmoperand : AsmOperandClass {
+ let Name = "Exact64";
+ let PredicateMethod = "isExactImm<64>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
+ let ParserMatchClass = exact64_asmoperand;
+}
+
+multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"),
+ ImmTy, asmop>;
+
+ defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
+defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "ld1">;
+
+defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
+
+defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "ld3">;
+
+defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm LD1WB2V : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "ld1">;
+defm LD1WB2V_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "ld1">;
+
+defm LD1WB3V : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "ld1">;
+defm LD1WB3V_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "ld1">;
+
+defm LD1WB_4V : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "ld1">;
+defm LD1WB4V_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "ld1">;
+
+multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
+
+ defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
+defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "st1">;
+
+defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
+
+defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "st3">;
+
+defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm ST1WB2V : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "st1">;
+defm ST1WB2V_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "st1">;
+
+defm ST1WB3V : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "st1">;
+defm ST1WB3V_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "st1">;
+
+defm ST1WB4V : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "st1">;
+defm ST1WB4V_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "st1">;
+
+// End of post-index vector load/store multiple N-element structure
+// (class SIMD lselem-post)
+
// Scalar Three Same
+class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRC>
+ : NeonI_Scalar3Same<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+ : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
-multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode,
- string asmop, bit Commutable = 0>
-{
+multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
+ bit Commutable = 0> {
let isCommutable = Commutable in {
- def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
- def sss : NeonI_Scalar3Same<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
}
}
multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
- string asmop, bit Commutable = 0>
-{
+ string asmop, bit Commutable = 0> {
let isCommutable = Commutable in {
- def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
- def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+ def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
}
}
multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
- string asmop, bit Commutable = 0>
-{
+ string asmop, bit Commutable = 0> {
let isCommutable = Commutable in {
- def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
- (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
- def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
- def sss : NeonI_Scalar3Same<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
- def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+ def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
}
}
class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
- (INSTD VPR64:$Rn, VPR64:$Rm)>;
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
Instruction INSTH,
(INSTD FPR64:$Rn, FPR64:$Rm)>;
}
+multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+// Scalar Three Different
+
+class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar3Diff<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
+ def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
+ def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
+}
+
+multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
+ let Constraints = "$Src = $Rd" in {
+ def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
+}
+
// Scalar Two Registers Miscellaneous
+class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
string asmop> {
- def ss : NeonI_Scalar2SameMisc<u, {size_high, 0b0}, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def dd : NeonI_Scalar2SameMisc<u, {size_high, 0b1}, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
+ def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
+ FPR32>;
+ def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
+ FPR64>;
}
multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
- def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
+ def dd: NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
}
multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
: NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
- def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
- (outs FPR8:$Rd), (ins FPR8:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
+ def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
+}
+
+multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
+ def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
+ def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
}
+class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
+ string asmop, RegisterClass FPRC>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
string asmop> {
let Constraints = "$Src = $Rd" in {
- def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
- (outs FPR8:$Rd), (ins FPR8:$Src, FPR8:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Src, FPR16:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Src, FPR32:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
- def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn),
- !strconcat(asmop, " $Rd, $Rn"),
- [], NoItinerary>;
+ def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
+ def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
}
}
class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
: NeonI_Scalar2SameMisc<u, 0b11, opcode,
(outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
- !strconcat(asmop, " $Rd, $Rn, $Imm"),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
[],
NoItinerary>;
+multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+ def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+}
+
class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
- (INSTD VPR64:$Rn, 0)>;
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
+ (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
+ (INSTD FPR64:$Rn, 0)>;
+
+multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
+ (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
+ (INSTS FPR32:$Rn, fpimm:$FPImm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
+ (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))),
+ (INSTD FPR64:$Rn, 0)>;
+}
multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
Instruction INSTD> {
(INSTS FPR32:$Rn)>;
}
+multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+}
+
multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
SDPatternOperator opnode,
Instruction INSTB,
(INSTD FPR64:$Src, FPR64:$Rn)>;
}
+// Scalar Shift By Immediate
+
+class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRC, Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS,
+ Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
+ shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
+ shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
+ shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 (Neon_vdup (i32 imm:$Imm))))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
+ def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
+ (INSTB FPR8:$Rn, imm:$Imm)>;
+ def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 imm:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 imm:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+// Scalar Signed Shift Right (Immediate)
+defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>;
+
+// Scalar Unsigned Shift Right (Immediate)
+defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>;
+
+// Scalar Signed Rounding Shift Right (Immediate)
+defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
+
+// Scalar Unigned Rounding Shift Right (Immediate)
+defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
+
+// Scalar Signed Shift Right and Accumulate (Immediate)
+def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
+
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
+
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
+
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
+
+// Scalar Shift Left (Immediate)
+defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
+defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>;
+
+// Signed Saturating Shift Left (Immediate)
+defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
+ SQSHLbbi, SQSHLhhi,
+ SQSHLssi, SQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
+
+// Unsigned Saturating Shift Left (Immediate)
+defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
+ UQSHLbbi, UQSHLhhi,
+ UQSHLssi, UQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
+
+// Signed Saturating Shift Left Unsigned (Immediate)
+defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
+defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
+ SQSHLUbbi, SQSHLUhhi,
+ SQSHLUssi, SQSHLUddi>;
+
+// Shift Right And Insert (Immediate)
+def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsri, SRI>;
+
+// Shift Left And Insert (Immediate)
+def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
+def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsli, SLI>;
+
+// Signed Saturating Shift Right Narrow (Immediate)
+defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
+ SQSHRNbhi, SQSHRNhsi,
+ SQSHRNsdi>;
+
+// Unsigned Saturating Shift Right Narrow (Immediate)
+defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
+ UQSHRNbhi, UQSHRNhsi,
+ UQSHRNsdi>;
+
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
+ SQRSHRNbhi, SQRSHRNhsi,
+ SQRSHRNsdi>;
+
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
+ UQRSHRNbhi, UQRSHRNhsi,
+ UQRSHRNsdi>;
+
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
+ SQSHRUNbhi, SQSHRUNhsi,
+ SQSHRUNsdi>;
+
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
+ SQRSHRUNbhi, SQRSHRUNhsi,
+ SQRSHRUNsdi>;
+
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
+ int_aarch64_neon_vcvtf64_n_s64,
+ SCVTF_Nssi, SCVTF_Nddi>;
+
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
+ int_aarch64_neon_vcvtf64_n_u64,
+ UCVTF_Nssi, UCVTF_Nddi>;
+
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32,
+ int_aarch64_neon_vcvtd_n_s64_f64,
+ FCVTZS_Nssi, FCVTZS_Nddi>;
+
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32,
+ int_aarch64_neon_vcvtd_n_u64_f64,
+ FCVTZU_Nssi, FCVTZU_Nddi>;
+
// Scalar Integer Add
let isCommutable = 1 in {
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+// Signed Saturating Doubling Multiply-Add Long
+defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
+ SQDMLALshh, SQDMLALdss>;
+
+// Signed Saturating Doubling Multiply-Subtract Long
+defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
+ SQDMLSLshh, SQDMLSLdss>;
+
+// Signed Saturating Doubling Multiply Long
+defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
+defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
+ SQDMULLshh, SQDMULLdss>;
+
// Scalar Signed Integer Convert To Floating-point
defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
CMLTddi>;
+// Scalar Floating-point Compare
+
+// Scalar Floating-point Compare Mask Equal
+defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQsss, FCMEQddd>;
+
+// Scalar Floating-point Compare Mask Equal To Zero
+defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQZssi, FCMEQZddi>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEsss, FCMGEddd>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEZssi, FCMGEZddi>;
+
+// Scalar Floating-point Compare Mask Greather Than
+defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTsss, FCMGTddd>;
+
+// Scalar Floating-point Compare Mask Greather Than Zero
+defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTZssi, FCMGTZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
+ FCMLEZssi, FCMLEZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Zero
+defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
+ FCMLTZssi, FCMLTZddi>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
+ FACGEsss, FACGEddd>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than
+defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
+ FACGTsss, FACGTddd>;
+
// Scalar Absolute Value
defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
USQADDbb, USQADDhh,
USQADDss, USQADDdd>;
+// Scalar Signed Saturating Extract Unsigned Narrow
+defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
+ SQXTUNbh, SQXTUNhs,
+ SQXTUNsd>;
+
+// Scalar Signed Saturating Extract Narrow
+defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
+ SQXTNbh, SQXTNhs,
+ SQXTNsd>;
+
+// Scalar Unsigned Saturating Extract Narrow
+defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
+ UQXTNbh, UQXTNhs,
+ UQXTNsd>;
+
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
let isCommutable = Commutable in {
def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
(outs FPR64:$Rd), (ins VPR128:$Rn),
- !strconcat(asmop, " $Rd, $Rn.2d"),
+ !strconcat(asmop, "\t$Rd, $Rn.2d"),
[],
NoItinerary>;
}
let isCommutable = Commutable in {
def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
(outs FPR32:$Rd), (ins VPR64:$Rn),
- !strconcat(asmop, " $Rd, $Rn.2s"),
+ !strconcat(asmop, "\t$Rd, $Rn.2s"),
[],
NoItinerary>;
}
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+def neon_uimm0_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm == 0;}]> {
+ let ParserMatchClass = neon_uimm0_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm1_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm1_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm2_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = neon_uimm2_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm3_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm4_bare : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+
+// Scalar by element Arithmetic
+
+class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
+ string rmlane, bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR, RegisterClass OpFPR,
+ RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
+ string rmlane,
+ bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR,
+ RegisterClass OpFPR,
+ RegisterOperand OpVPR,
+ Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+// Scalar Floating Point multiply (scalar, by element)
+def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point multiply extended (scalar, by element)
+def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-add (scalar, by element)
+def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-subtract (scalar, by element)
+def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply-add long (scalar, by element)
+def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling
+// multiply-subtract long (scalar, by element)
+def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+
+// Scalar Copy - DUP element to scalar
+class NeonI_Scalar_DUP<string asmop, string asmlane,
+ RegisterClass ResRC, RegisterOperand VPRC,
+ Operand OpImm>
+ : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
//===----------------------------------------------------------------------===//
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
-def neon_uimm0_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm == 0;}]> {
- let ParserMatchClass = neon_uimm0_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
-}
-
-def neon_uimm1_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm1_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
-}
-
-def neon_uimm2_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm2_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
-}
-
-def neon_uimm3_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
+def neon_uimm3 : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm3_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmHexOperand";
}
-def neon_uimm4_bare : Operand<i64>,
- ImmLeaf<i64, [{(void)Imm; return true;}]> {
+def neon_uimm4 : Operand<i64>,
+ ImmLeaf<i64, [{(void)Imm; return true;}]> {
let ParserMatchClass = uimm4_asmoperand;
- let PrintMethod = "printNeonUImm8OperandBare";
+ let PrintMethod = "printUImmHexOperand";
}
class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
let Constraints = "$src = $Rd";
}
+// Bitwise Extract
+class NeonI_Extract<bit q, bits<2> op2, string asmop,
+ string OpS, RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
+ (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
+ ", $Rm." # OpS # ", $Index",
+ [],
+ NoItinerary>{
+ bits<4> Index;
+}
+
+def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
+ VPR64, neon_uimm3> {
+ let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
+}
+
+def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
+ VPR128, neon_uimm4> {
+ let Inst{14-11} = Index;
+}
+
+class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
+ Operand OpImm>
+ : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
+ (i64 OpImm:$Imm))),
+ (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
+
+def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
+
+// Table lookup
+class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary>;
+
+// The vectors in look up table are always 16b
+multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
+defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
+defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
+defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
+
+// Table lookup extention
+class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// The vectors in look up table are always 16b
+multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
+defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
+defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
+defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
+
// The followings are for instruction class (3V Elem)
// Variant 1
let Constraints = "$src = $Rd";
}
-multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
neon_uimm2_bare, VPR64, VPR64, VPR128> {
bits<5> Re;
}
-multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
neon_uimm2_bare, VPR64, VPR64, VPR128> {
(INST OpVPR:$Rn,
(SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op>
-{
+multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
op, VPR64, VPR128, v2i32, v2i32, v4i32,
BinOpFrag<(Neon_vduplane
// Variant 2
-multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
neon_uimm2_bare, VPR64, VPR64, VPR128> {
(INST OpVPR:$Rn,
(SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
-multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op>
-{
+multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
op, VPR64, VPR128, v2f32, v2f32, v4f32,
BinOpFrag<(Neon_vduplane
// The followings are patterns using fma
// -ffp-contract=fast generates fma
-multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
neon_uimm2_bare, VPR64, VPR64, VPR128> {
(SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
-multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op>
-{
+multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
BinOpFrag<(Neon_vduplane
// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
-multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
neon_uimm2_bare, VPR128, VPR64, VPR128> {
defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
-multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop>
-{
+multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
// vector register class for element is always 128-bit to cover the max index
def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
neon_uimm2_bare, VPR128, VPR64, VPR128> {
(INST VPR128:$src, VPR128:$Rn,
(SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op>
-{
+multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
BinOpFrag<(Neon_vduplane
(INST VPR128:$Rn,
(SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op>
-{
+multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
BinOpFrag<(Neon_vduplane
defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
-multiclass NI_qdma<SDPatternOperator op>
-{
+multiclass NI_qdma<SDPatternOperator op> {
def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
(op node:$Ra,
(v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
-multiclass NI_2VEL_v3_qdma_pat<string subop, string op>
-{
+multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
!cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
v4i32, v4i16, v8i16,
def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
neon_uimm0_bare, INSdx>;
-class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
- Operand ResImm, ValueType MidTy>
+class NeonI_INS_element<string asmop, string Res, Operand ResImm>
: NeonI_insert<0b1, 0b1,
(outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
ResImm:$Immd, ResImm:$Immn),
asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
- [(set (ResTy VPR128:$Rd),
- (ResTy (vector_insert
- (ResTy VPR128:$src),
- (MidTy (vector_extract
- (ResTy VPR128:$Rn),
- (ResImm:$Immn))),
- (ResImm:$Immd))))],
+ [],
NoItinerary> {
let Constraints = "$src = $Rd";
bits<4> Immd;
}
//Insert element (vector, from element)
-def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
+def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
}
-def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
+def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
// bit 11 is unspecified.
}
-def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
+def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
let Inst{14-13} = {Immn{1}, Immn{0}};
// bits 11-12 are unspecified.
}
-def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
+def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
let Inst{14} = Immn{0};
// bits 11-13 are unspecified.
}
+multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy, Operand StImm, Operand NaImm,
+ Instruction INS> {
+def : Pat<(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
+ StImm:$Immd, StImm:$Immn)>;
+
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ StImm:$Immd, NaImm:$Immn)>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy VPR128:$Rn),
+ NaImm:$Immd, StImm:$Immn)),
+ sub_64))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ NaImm:$Immd, NaImm:$Immn)),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, INSELb>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, INSELh>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+
multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
ValueType MidTy,
RegisterClass OpFPR, Operand ResImm,
SubRegIndex SubIndex, Instruction INS> {
-def : Pat<(ResTy (vector_insert
- (ResTy VPR128:$src),
- (MidTy (vector_extract
- (ResTy VPR128:$Rn),
- (ResImm:$Immn))),
- (ResImm:$Immd))),
- (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
- ResImm:$Immd, ResImm:$Immn)>;
-
def : Pat <(ResTy (vector_insert
(ResTy VPR128:$src),
(MidTy OpFPR:$Rn),
defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
sub_64, INSELd>;
-multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
- ValueType MidTy, ValueType StTy,
- Operand StImm, Instruction INS> {
-def : Pat<(NaTy (vector_insert
- (NaTy VPR64:$src),
- (MidTy (vector_extract
- (StTy VPR128:$Rn),
- (StImm:$Immn))),
- (NaImm:$Immd))),
- (NaTy (EXTRACT_SUBREG
- (StTy (INS
- (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
- (StTy VPR128:$Rn),
- NaImm:$Immd,
- StImm:$Immn)),
- sub_64))>;
-
-def : Pat<(StTy (vector_insert
- (StTy VPR128:$src),
- (MidTy (vector_extract
- (NaTy VPR64:$Rn),
- (NaImm:$Immn))),
- (StImm:$Immd))),
- (StTy (INS
- (StTy VPR128:$src),
- (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- StImm:$Immd,
- NaImm:$Immn))>;
-
-def : Pat<(NaTy (vector_insert
- (NaTy VPR64:$src),
- (MidTy (vector_extract
- (NaTy VPR64:$Rn),
- (NaImm:$Immn))),
- (NaImm:$Immd))),
- (NaTy (EXTRACT_SUBREG
- (StTy (INS
- (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
- (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Immd,
- NaImm:$Immn)),
- sub_64))>;
-}
-
-defm : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
- v16i8, neon_uimm4_bare, INSELb>;
-defm : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
- v8i16, neon_uimm3_bare, INSELh>;
-defm : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
- v4i32, neon_uimm2_bare, INSELs>;
-defm : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
- v2i64, neon_uimm1_bare, INSELd>;
-
-
class NeonI_SMOV<string asmop, string Res, bit Q,
ValueType OpTy, ValueType eleTy,
Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
(FMOVdd $src)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (f64 FPR64:$src), sub_64)>;
+
class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
- RegisterOperand ResVPR, ValueType ResTy,
- ValueType OpTy, Operand OpImm>
+ RegisterOperand ResVPR, Operand OpImm>
: NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
(ins VPR128:$Rn, OpImm:$Imm),
asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
bits<4> Imm;
}
-def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, v16i8, v16i8,
+def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
neon_uimm4_bare> {
let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
}
-def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, v8i16, v8i16,
+def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
neon_uimm3_bare> {
let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
}
-def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, v4i32, v4i32,
+def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
neon_uimm2_bare> {
let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
}
-def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, v2i64, v2i64,
+def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
neon_uimm1_bare> {
let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
}
-def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, v8i8, v16i8,
+def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
neon_uimm4_bare> {
let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
}
-def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, v4i16, v8i16,
+def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
neon_uimm3_bare> {
let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
}
-def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, v2i32, v4i32,
+def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
neon_uimm2_bare> {
let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
}
def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
(v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
- (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
\ No newline at end of file
+ (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+
+// Crypto Class
+class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$src),
+ (v16i8 VPR128:$Rn))))],
+ NoItinerary>{
+ let Constraints = "$src = $Rd";
+}
+
+def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
+def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
+
+class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
+def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
+
+class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
+ int_arm_neon_sha1su1>;
+def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
+ int_arm_neon_sha256su0>;
+
+class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn),
+ asmop # "\t$Rd, $Rn",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v1i32 FPR32:$Rn))))],
+ NoItinerary>;
+
+def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
+
+class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
+ int_arm_neon_sha1su0>;
+def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
+ int_arm_neon_sha256su1>;
+
+class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v4i32 FPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
+ int_arm_neon_sha256h>;
+def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
+ int_arm_neon_sha256h2>;
+
+class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v1i32 FPR32:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
+def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
+def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
+