1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
21 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
23 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35 [SDTCisVec<0>, SDTCisVec<1>]>>;
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
43 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
48 def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64 [SDTCisVec<0>, SDTCisSameAs<0, 1>,
65 SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
67 def SDT_assertext : SDTypeProfile<1, 1,
68 [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
69 def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
70 def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
72 //===----------------------------------------------------------------------===//
73 // Addressing-mode instantiations
74 //===----------------------------------------------------------------------===//
76 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
77 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
78 !foreach(decls.pattern, Offset,
79 !subst(OFFSET, dword_uimm12, decls.pattern)),
80 !foreach(decls.pattern, address,
81 !subst(OFFSET, dword_uimm12,
82 !subst(ALIGN, min_align8, decls.pattern))),
86 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
87 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
88 !foreach(decls.pattern, Offset,
89 !subst(OFFSET, qword_uimm12, decls.pattern)),
90 !foreach(decls.pattern, address,
91 !subst(OFFSET, qword_uimm12,
92 !subst(ALIGN, min_align16, decls.pattern))),
96 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
97 defm : ls_64_pats<address, Base, Offset, v8i8>;
98 defm : ls_64_pats<address, Base, Offset, v4i16>;
99 defm : ls_64_pats<address, Base, Offset, v2i32>;
100 defm : ls_64_pats<address, Base, Offset, v1i64>;
101 defm : ls_64_pats<address, Base, Offset, v2f32>;
102 defm : ls_64_pats<address, Base, Offset, v1f64>;
104 defm : ls_128_pats<address, Base, Offset, v16i8>;
105 defm : ls_128_pats<address, Base, Offset, v8i16>;
106 defm : ls_128_pats<address, Base, Offset, v4i32>;
107 defm : ls_128_pats<address, Base, Offset, v2i64>;
108 defm : ls_128_pats<address, Base, Offset, v4f32>;
109 defm : ls_128_pats<address, Base, Offset, v2f64>;
112 defm : uimm12_neon_pats<(A64WrapperSmall
113 tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
114 (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
116 //===----------------------------------------------------------------------===//
118 //===----------------------------------------------------------------------===//
120 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
121 string asmop, SDPatternOperator opnode8B,
122 SDPatternOperator opnode16B,
123 bit Commutable = 0> {
124 let isCommutable = Commutable in {
125 def _8B : NeonI_3VSame<0b0, u, size, opcode,
126 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128 [(set (v8i8 VPR64:$Rd),
129 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
132 def _16B : NeonI_3VSame<0b1, u, size, opcode,
133 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135 [(set (v16i8 VPR128:$Rd),
136 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
142 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
143 string asmop, SDPatternOperator opnode,
144 bit Commutable = 0> {
145 let isCommutable = Commutable in {
146 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
147 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
148 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
149 [(set (v4i16 VPR64:$Rd),
150 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
153 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
154 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
155 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
156 [(set (v8i16 VPR128:$Rd),
157 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
160 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
161 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
162 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
163 [(set (v2i32 VPR64:$Rd),
164 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
167 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
168 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
169 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
170 [(set (v4i32 VPR128:$Rd),
171 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
175 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
176 string asmop, SDPatternOperator opnode,
178 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
179 let isCommutable = Commutable in {
180 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
181 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
182 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
183 [(set (v8i8 VPR64:$Rd),
184 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
187 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
188 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
189 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
190 [(set (v16i8 VPR128:$Rd),
191 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
196 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
197 string asmop, SDPatternOperator opnode,
199 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
200 let isCommutable = Commutable in {
201 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
202 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
203 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
204 [(set (v2i64 VPR128:$Rd),
205 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
210 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
211 // but Result types can be integer or floating point types.
212 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
213 string asmop, SDPatternOperator opnode2S,
214 SDPatternOperator opnode4S,
215 SDPatternOperator opnode2D,
216 ValueType ResTy2S, ValueType ResTy4S,
217 ValueType ResTy2D, bit Commutable = 0> {
218 let isCommutable = Commutable in {
219 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
220 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
221 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
222 [(set (ResTy2S VPR64:$Rd),
223 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
226 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
227 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
229 [(set (ResTy4S VPR128:$Rd),
230 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
233 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
234 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
235 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
236 [(set (ResTy2D VPR128:$Rd),
237 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
242 //===----------------------------------------------------------------------===//
243 // Instruction Definitions
244 //===----------------------------------------------------------------------===//
246 // Vector Arithmetic Instructions
248 // Vector Add (Integer and Floating-Point)
250 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
251 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
252 v2f32, v4f32, v2f64, 1>;
254 // Vector Sub (Integer and Floating-Point)
256 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
257 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
258 v2f32, v4f32, v2f64, 0>;
260 // Vector Multiply (Integer and Floating-Point)
262 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
263 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
264 v2f32, v4f32, v2f64, 1>;
266 // Vector Multiply (Polynomial)
268 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
269 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
271 // Vector Multiply-accumulate and Multiply-subtract (Integer)
273 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
274 // two operands constraints.
275 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
276 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
277 bits<5> opcode, SDPatternOperator opnode>
278 : NeonI_3VSame<q, u, size, opcode,
279 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
280 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
281 [(set (OpTy VPRC:$Rd),
282 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
284 let Constraints = "$src = $Rd";
287 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
288 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
290 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
291 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
294 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
295 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
296 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
297 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
298 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
299 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
300 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
301 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
302 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
303 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
304 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
305 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
307 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
308 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
309 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
310 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
311 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
312 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
313 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
314 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
315 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
316 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
317 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
318 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
320 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
322 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
323 (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
325 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
326 (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
328 let Predicates = [HasNEON, UseFusedMAC] in {
329 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
330 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
331 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
332 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
333 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
334 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
336 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
337 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
338 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
339 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
340 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
341 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
344 // We're also allowed to match the fma instruction regardless of compile
346 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
347 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
348 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
349 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
350 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
351 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
353 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
354 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
355 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
356 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
357 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
358 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
360 // Vector Divide (Floating-Point)
362 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
363 v2f32, v4f32, v2f64, 0>;
365 // Vector Bitwise Operations
367 // Vector Bitwise AND
369 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
371 // Vector Bitwise Exclusive OR
373 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
377 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
379 // ORR disassembled as MOV if Vn==Vm
381 // Vector Move - register
382 // Alias for ORR if Vn=Vm.
383 // FIXME: This is actually the preferred syntax but TableGen can't deal with
384 // custom printing of aliases.
385 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
386 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
387 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
388 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
390 // The MOVI instruction takes two immediate operands. The first is the
391 // immediate encoding, while the second is the cmode. A cmode of 14, or
392 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
393 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
394 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
396 def Neon_not8B : PatFrag<(ops node:$in),
397 (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
398 def Neon_not16B : PatFrag<(ops node:$in),
399 (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
401 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
402 (or node:$Rn, (Neon_not8B node:$Rm))>;
404 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
405 (or node:$Rn, (Neon_not16B node:$Rm))>;
407 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
408 (and node:$Rn, (Neon_not8B node:$Rm))>;
410 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
411 (and node:$Rn, (Neon_not16B node:$Rm))>;
414 // Vector Bitwise OR NOT - register
416 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
417 Neon_orn8B, Neon_orn16B, 0>;
419 // Vector Bitwise Bit Clear (AND NOT) - register
421 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
422 Neon_bic8B, Neon_bic16B, 0>;
424 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
425 SDPatternOperator opnode16B,
427 Instruction INST16B> {
428 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
429 (INST8B VPR64:$Rn, VPR64:$Rm)>;
430 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
431 (INST8B VPR64:$Rn, VPR64:$Rm)>;
432 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
433 (INST8B VPR64:$Rn, VPR64:$Rm)>;
434 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
435 (INST16B VPR128:$Rn, VPR128:$Rm)>;
436 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
437 (INST16B VPR128:$Rn, VPR128:$Rm)>;
438 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
439 (INST16B VPR128:$Rn, VPR128:$Rm)>;
442 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
443 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
444 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
445 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
446 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
447 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
449 // Vector Bitwise Select
450 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
451 0b0, 0b1, 0b01, 0b00011, vselect>;
453 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
454 0b1, 0b1, 0b01, 0b00011, vselect>;
456 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
458 Instruction INST16B> {
459 // Disassociate type from instruction definition
460 def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
461 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462 def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
463 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
464 def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
465 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466 def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
467 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468 def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
469 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470 def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
473 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474 def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
475 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
477 // Allow to match BSL instruction pattern with non-constant operand
478 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
479 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
480 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
481 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
482 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
483 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
484 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
485 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
486 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
487 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
488 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
489 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
490 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
491 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
492 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
493 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
494 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
495 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
496 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
497 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
498 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
499 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
500 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
501 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
503 // Allow to match llvm.arm.* intrinsics.
504 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
505 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
506 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
507 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
508 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
509 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
511 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
512 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
513 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
514 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
515 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
517 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
518 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
519 def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
520 (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
521 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
522 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
523 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
524 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
525 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
526 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
527 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
528 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
529 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
530 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
531 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
532 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
533 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
534 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
535 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
536 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
537 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
538 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
539 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
542 // Additional patterns for bitwise instruction BSL
543 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
545 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
546 (vselect node:$src, node:$Rn, node:$Rm),
547 [{ (void)N; return false; }]>;
549 // Vector Bitwise Insert if True
551 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
552 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
553 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
554 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
556 // Vector Bitwise Insert if False
558 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
559 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
560 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
561 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
563 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
565 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
566 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
567 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
568 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
570 // Vector Absolute Difference and Accumulate (Unsigned)
571 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
572 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
573 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
574 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
575 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
576 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
577 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
578 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
579 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
580 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
581 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
582 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
584 // Vector Absolute Difference and Accumulate (Signed)
585 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
586 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
587 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
588 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
589 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
590 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
591 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
592 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
593 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
594 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
595 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
596 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
599 // Vector Absolute Difference (Signed, Unsigned)
600 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
601 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
603 // Vector Absolute Difference (Floating Point)
604 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
605 int_arm_neon_vabds, int_arm_neon_vabds,
606 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
608 // Vector Reciprocal Step (Floating Point)
609 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
610 int_arm_neon_vrecps, int_arm_neon_vrecps,
612 v2f32, v4f32, v2f64, 0>;
614 // Vector Reciprocal Square Root Step (Floating Point)
615 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
616 int_arm_neon_vrsqrts,
617 int_arm_neon_vrsqrts,
618 int_arm_neon_vrsqrts,
619 v2f32, v4f32, v2f64, 0>;
621 // Vector Comparisons
623 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
624 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
625 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
626 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
627 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
628 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
629 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
630 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
631 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
632 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
634 // NeonI_compare_aliases class: swaps register operands to implement
635 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
636 class NeonI_compare_aliases<string asmop, string asmlane,
637 Instruction inst, RegisterOperand VPRC>
638 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
640 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
642 // Vector Comparisons (Integer)
644 // Vector Compare Mask Equal (Integer)
645 let isCommutable =1 in {
646 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
649 // Vector Compare Mask Higher or Same (Unsigned Integer)
650 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
652 // Vector Compare Mask Greater Than or Equal (Integer)
653 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
655 // Vector Compare Mask Higher (Unsigned Integer)
656 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
658 // Vector Compare Mask Greater Than (Integer)
659 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
661 // Vector Compare Mask Bitwise Test (Integer)
662 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
664 // Vector Compare Mask Less or Same (Unsigned Integer)
665 // CMLS is alias for CMHS with operands reversed.
666 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
667 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
668 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
669 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
670 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
671 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
672 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
674 // Vector Compare Mask Less Than or Equal (Integer)
675 // CMLE is alias for CMGE with operands reversed.
676 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
677 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
678 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
679 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
680 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
681 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
682 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
684 // Vector Compare Mask Lower (Unsigned Integer)
685 // CMLO is alias for CMHI with operands reversed.
686 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
687 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
688 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
689 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
690 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
691 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
692 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
694 // Vector Compare Mask Less Than (Integer)
695 // CMLT is alias for CMGT with operands reversed.
696 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
697 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
698 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
699 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
700 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
701 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
702 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
705 def neon_uimm0_asmoperand : AsmOperandClass
708 let PredicateMethod = "isUImm<0>";
709 let RenderMethod = "addImmOperands";
712 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
713 let ParserMatchClass = neon_uimm0_asmoperand;
714 let PrintMethod = "printNeonUImm0Operand";
718 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
720 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
721 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
722 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
723 [(set (v8i8 VPR64:$Rd),
724 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
727 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
728 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
729 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
730 [(set (v16i8 VPR128:$Rd),
731 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
734 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
735 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
736 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
737 [(set (v4i16 VPR64:$Rd),
738 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
741 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
742 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
743 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
744 [(set (v8i16 VPR128:$Rd),
745 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
748 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
749 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
750 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
751 [(set (v2i32 VPR64:$Rd),
752 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
755 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
756 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
757 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
758 [(set (v4i32 VPR128:$Rd),
759 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
762 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
763 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
764 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
765 [(set (v2i64 VPR128:$Rd),
766 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
770 // Vector Compare Mask Equal to Zero (Integer)
771 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
773 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
774 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
776 // Vector Compare Mask Greater Than Zero (Signed Integer)
777 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
779 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
780 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
782 // Vector Compare Mask Less Than Zero (Signed Integer)
783 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
785 // Vector Comparisons (Floating Point)
787 // Vector Compare Mask Equal (Floating Point)
788 let isCommutable =1 in {
789 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
790 Neon_cmeq, Neon_cmeq,
791 v2i32, v4i32, v2i64, 0>;
794 // Vector Compare Mask Greater Than Or Equal (Floating Point)
795 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
796 Neon_cmge, Neon_cmge,
797 v2i32, v4i32, v2i64, 0>;
799 // Vector Compare Mask Greater Than (Floating Point)
800 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
801 Neon_cmgt, Neon_cmgt,
802 v2i32, v4i32, v2i64, 0>;
804 // Vector Compare Mask Less Than Or Equal (Floating Point)
805 // FCMLE is alias for FCMGE with operands reversed.
806 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
807 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
808 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
810 // Vector Compare Mask Less Than (Floating Point)
811 // FCMLT is alias for FCMGT with operands reversed.
812 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
813 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
814 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
817 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
818 string asmop, CondCode CC>
820 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
821 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
822 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
823 [(set (v2i32 VPR64:$Rd),
824 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpz32:$FPImm), CC)))],
827 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
828 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
829 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
830 [(set (v4i32 VPR128:$Rd),
831 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
834 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
835 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
836 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
837 [(set (v2i64 VPR128:$Rd),
838 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
842 // Vector Compare Mask Equal to Zero (Floating Point)
843 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
845 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
846 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
848 // Vector Compare Mask Greater Than Zero (Floating Point)
849 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
851 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
852 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
854 // Vector Compare Mask Less Than Zero (Floating Point)
855 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
857 // Vector Absolute Comparisons (Floating Point)
859 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
860 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
861 int_arm_neon_vacged, int_arm_neon_vacgeq,
862 int_aarch64_neon_vacgeq,
863 v2i32, v4i32, v2i64, 0>;
865 // Vector Absolute Compare Mask Greater Than (Floating Point)
866 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
867 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
868 int_aarch64_neon_vacgtq,
869 v2i32, v4i32, v2i64, 0>;
871 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
872 // FACLE is alias for FACGE with operands reversed.
873 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
874 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
875 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
877 // Vector Absolute Compare Mask Less Than (Floating Point)
878 // FACLT is alias for FACGT with operands reversed.
879 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
880 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
881 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
883 // Vector halving add (Integer Signed, Unsigned)
884 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
885 int_arm_neon_vhadds, 1>;
886 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
887 int_arm_neon_vhaddu, 1>;
889 // Vector halving sub (Integer Signed, Unsigned)
890 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
891 int_arm_neon_vhsubs, 0>;
892 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
893 int_arm_neon_vhsubu, 0>;
895 // Vector rouding halving add (Integer Signed, Unsigned)
896 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
897 int_arm_neon_vrhadds, 1>;
898 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
899 int_arm_neon_vrhaddu, 1>;
901 // Vector Saturating add (Integer Signed, Unsigned)
902 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
903 int_arm_neon_vqadds, 1>;
904 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
905 int_arm_neon_vqaddu, 1>;
907 // Vector Saturating sub (Integer Signed, Unsigned)
908 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
909 int_arm_neon_vqsubs, 1>;
910 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
911 int_arm_neon_vqsubu, 1>;
913 // Vector Shift Left (Signed and Unsigned Integer)
914 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
915 int_arm_neon_vshifts, 1>;
916 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
917 int_arm_neon_vshiftu, 1>;
919 // Vector Saturating Shift Left (Signed and Unsigned Integer)
920 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
921 int_arm_neon_vqshifts, 1>;
922 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
923 int_arm_neon_vqshiftu, 1>;
925 // Vector Rouding Shift Left (Signed and Unsigned Integer)
926 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
927 int_arm_neon_vrshifts, 1>;
928 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
929 int_arm_neon_vrshiftu, 1>;
931 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
932 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
933 int_arm_neon_vqrshifts, 1>;
934 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
935 int_arm_neon_vqrshiftu, 1>;
937 // Vector Maximum (Signed and Unsigned Integer)
938 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
939 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
941 // Vector Minimum (Signed and Unsigned Integer)
942 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
943 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
945 // Vector Maximum (Floating Point)
946 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
947 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
948 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
950 // Vector Minimum (Floating Point)
951 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
952 int_arm_neon_vmins, int_arm_neon_vmins,
953 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
955 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
956 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
957 int_aarch64_neon_vmaxnm,
958 int_aarch64_neon_vmaxnm,
959 int_aarch64_neon_vmaxnm,
960 v2f32, v4f32, v2f64, 1>;
962 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
963 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
964 int_aarch64_neon_vminnm,
965 int_aarch64_neon_vminnm,
966 int_aarch64_neon_vminnm,
967 v2f32, v4f32, v2f64, 1>;
969 // Vector Maximum Pairwise (Signed and Unsigned Integer)
970 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
971 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
973 // Vector Minimum Pairwise (Signed and Unsigned Integer)
974 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
975 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
977 // Vector Maximum Pairwise (Floating Point)
978 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
979 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
980 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
982 // Vector Minimum Pairwise (Floating Point)
983 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
984 int_arm_neon_vpmins, int_arm_neon_vpmins,
985 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
987 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
988 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
989 int_aarch64_neon_vpmaxnm,
990 int_aarch64_neon_vpmaxnm,
991 int_aarch64_neon_vpmaxnm,
992 v2f32, v4f32, v2f64, 1>;
994 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
995 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
996 int_aarch64_neon_vpminnm,
997 int_aarch64_neon_vpminnm,
998 int_aarch64_neon_vpminnm,
999 v2f32, v4f32, v2f64, 1>;
1001 // Vector Addition Pairwise (Integer)
1002 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1004 // Vector Addition Pairwise (Floating Point)
1005 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1009 v2f32, v4f32, v2f64, 1>;
1011 // Vector Saturating Doubling Multiply High
1012 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1013 int_arm_neon_vqdmulh, 1>;
1015 // Vector Saturating Rouding Doubling Multiply High
1016 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1017 int_arm_neon_vqrdmulh, 1>;
1019 // Vector Multiply Extended (Floating Point)
1020 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1021 int_aarch64_neon_vmulx,
1022 int_aarch64_neon_vmulx,
1023 int_aarch64_neon_vmulx,
1024 v2f32, v4f32, v2f64, 1>;
1026 // Patterns to match llvm.aarch64.* intrinsic for
1027 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1028 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1029 : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1031 (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1034 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1035 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1036 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1037 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1038 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1040 // Vector Immediate Instructions
1042 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1044 def _asmoperand : AsmOperandClass
1046 let Name = "NeonMovImmShift" # PREFIX;
1047 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1048 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1052 // Definition of vector immediates shift operands
1054 // The selectable use-cases extract the shift operation
1055 // information from the OpCmode fields encoded in the immediate.
1056 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1057 uint64_t OpCmode = N->getZExtValue();
1059 unsigned ShiftOnesIn;
1061 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1062 if (!HasShift) return SDValue();
1063 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1066 // Vector immediates shift operands which accept LSL and MSL
1067 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1068 // or 0, 8 (LSLH) or 8, 16 (MSL).
1069 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1070 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1071 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1072 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1074 multiclass neon_mov_imm_shift_operands<string PREFIX,
1075 string HALF, string ISHALF, code pred>
1077 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1080 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1082 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1083 let ParserMatchClass =
1084 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1088 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1090 unsigned ShiftOnesIn;
1092 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1093 return (HasShift && !ShiftOnesIn);
1096 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1098 unsigned ShiftOnesIn;
1100 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1101 return (HasShift && ShiftOnesIn);
1104 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1106 unsigned ShiftOnesIn;
1108 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1109 return (HasShift && !ShiftOnesIn);
1112 def neon_uimm1_asmoperand : AsmOperandClass
1115 let PredicateMethod = "isUImm<1>";
1116 let RenderMethod = "addImmOperands";
1119 def neon_uimm2_asmoperand : AsmOperandClass
1122 let PredicateMethod = "isUImm<2>";
1123 let RenderMethod = "addImmOperands";
1126 def neon_uimm8_asmoperand : AsmOperandClass
1129 let PredicateMethod = "isUImm<8>";
1130 let RenderMethod = "addImmOperands";
1133 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1134 let ParserMatchClass = neon_uimm8_asmoperand;
1135 let PrintMethod = "printUImmHexOperand";
1138 def neon_uimm64_mask_asmoperand : AsmOperandClass
1140 let Name = "NeonUImm64Mask";
1141 let PredicateMethod = "isNeonUImm64Mask";
1142 let RenderMethod = "addNeonUImm64MaskOperands";
1145 // MCOperand for 64-bit bytemask with each byte having only the
1146 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1147 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1148 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1149 let PrintMethod = "printNeonUImm64MaskOperand";
1152 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1153 SDPatternOperator opnode>
1155 // shift zeros, per word
1156 def _2S : NeonI_1VModImm<0b0, op,
1158 (ins neon_uimm8:$Imm,
1159 neon_mov_imm_LSL_operand:$Simm),
1160 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1161 [(set (v2i32 VPR64:$Rd),
1162 (v2i32 (opnode (timm:$Imm),
1163 (neon_mov_imm_LSL_operand:$Simm))))],
1166 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1169 def _4S : NeonI_1VModImm<0b1, op,
1171 (ins neon_uimm8:$Imm,
1172 neon_mov_imm_LSL_operand:$Simm),
1173 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1174 [(set (v4i32 VPR128:$Rd),
1175 (v4i32 (opnode (timm:$Imm),
1176 (neon_mov_imm_LSL_operand:$Simm))))],
1179 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1182 // shift zeros, per halfword
1183 def _4H : NeonI_1VModImm<0b0, op,
1185 (ins neon_uimm8:$Imm,
1186 neon_mov_imm_LSLH_operand:$Simm),
1187 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1188 [(set (v4i16 VPR64:$Rd),
1189 (v4i16 (opnode (timm:$Imm),
1190 (neon_mov_imm_LSLH_operand:$Simm))))],
1193 let cmode = {0b1, 0b0, Simm, 0b0};
1196 def _8H : NeonI_1VModImm<0b1, op,
1198 (ins neon_uimm8:$Imm,
1199 neon_mov_imm_LSLH_operand:$Simm),
1200 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1201 [(set (v8i16 VPR128:$Rd),
1202 (v8i16 (opnode (timm:$Imm),
1203 (neon_mov_imm_LSLH_operand:$Simm))))],
1206 let cmode = {0b1, 0b0, Simm, 0b0};
1210 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1211 SDPatternOperator opnode,
1212 SDPatternOperator neonopnode>
1214 let Constraints = "$src = $Rd" in {
1215 // shift zeros, per word
1216 def _2S : NeonI_1VModImm<0b0, op,
1218 (ins VPR64:$src, neon_uimm8:$Imm,
1219 neon_mov_imm_LSL_operand:$Simm),
1220 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1221 [(set (v2i32 VPR64:$Rd),
1222 (v2i32 (opnode (v2i32 VPR64:$src),
1223 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1224 neon_mov_imm_LSL_operand:$Simm)))))))],
1227 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1230 def _4S : NeonI_1VModImm<0b1, op,
1232 (ins VPR128:$src, neon_uimm8:$Imm,
1233 neon_mov_imm_LSL_operand:$Simm),
1234 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1235 [(set (v4i32 VPR128:$Rd),
1236 (v4i32 (opnode (v4i32 VPR128:$src),
1237 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1238 neon_mov_imm_LSL_operand:$Simm)))))))],
1241 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1244 // shift zeros, per halfword
1245 def _4H : NeonI_1VModImm<0b0, op,
1247 (ins VPR64:$src, neon_uimm8:$Imm,
1248 neon_mov_imm_LSLH_operand:$Simm),
1249 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1250 [(set (v4i16 VPR64:$Rd),
1251 (v4i16 (opnode (v4i16 VPR64:$src),
1252 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1253 neon_mov_imm_LSL_operand:$Simm)))))))],
1256 let cmode = {0b1, 0b0, Simm, 0b1};
1259 def _8H : NeonI_1VModImm<0b1, op,
1261 (ins VPR128:$src, neon_uimm8:$Imm,
1262 neon_mov_imm_LSLH_operand:$Simm),
1263 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1264 [(set (v8i16 VPR128:$Rd),
1265 (v8i16 (opnode (v8i16 VPR128:$src),
1266 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1267 neon_mov_imm_LSL_operand:$Simm)))))))],
1270 let cmode = {0b1, 0b0, Simm, 0b1};
1275 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1276 SDPatternOperator opnode>
1278 // shift ones, per word
1279 def _2S : NeonI_1VModImm<0b0, op,
1281 (ins neon_uimm8:$Imm,
1282 neon_mov_imm_MSL_operand:$Simm),
1283 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1284 [(set (v2i32 VPR64:$Rd),
1285 (v2i32 (opnode (timm:$Imm),
1286 (neon_mov_imm_MSL_operand:$Simm))))],
1289 let cmode = {0b1, 0b1, 0b0, Simm};
1292 def _4S : NeonI_1VModImm<0b1, op,
1294 (ins neon_uimm8:$Imm,
1295 neon_mov_imm_MSL_operand:$Simm),
1296 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1297 [(set (v4i32 VPR128:$Rd),
1298 (v4i32 (opnode (timm:$Imm),
1299 (neon_mov_imm_MSL_operand:$Simm))))],
1302 let cmode = {0b1, 0b1, 0b0, Simm};
1306 // Vector Move Immediate Shifted
1307 let isReMaterializable = 1 in {
1308 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1311 // Vector Move Inverted Immediate Shifted
1312 let isReMaterializable = 1 in {
1313 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1316 // Vector Bitwise Bit Clear (AND NOT) - immediate
1317 let isReMaterializable = 1 in {
1318 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1322 // Vector Bitwise OR - immedidate
1324 let isReMaterializable = 1 in {
1325 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1329 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1330 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1331 // BIC immediate instructions selection requires additional patterns to
1332 // transform Neon_movi operands into BIC immediate operands
1334 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1335 uint64_t OpCmode = N->getZExtValue();
1337 unsigned ShiftOnesIn;
1338 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1339 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1340 // Transform encoded shift amount 0 to 1 and 1 to 0.
1341 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1344 def neon_mov_imm_LSLH_transform_operand
1347 unsigned ShiftOnesIn;
1349 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1350 return (HasShift && !ShiftOnesIn); }],
1351 neon_mov_imm_LSLH_transform_XFORM>;
1353 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1354 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1355 def : Pat<(v4i16 (and VPR64:$src,
1356 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1357 (BICvi_lsl_4H VPR64:$src, 0,
1358 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1360 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1361 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1362 def : Pat<(v8i16 (and VPR128:$src,
1363 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1364 (BICvi_lsl_8H VPR128:$src, 0,
1365 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1368 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1369 SDPatternOperator neonopnode,
1371 Instruction INST8H> {
1372 def : Pat<(v8i8 (opnode VPR64:$src,
1373 (bitconvert(v4i16 (neonopnode timm:$Imm,
1374 neon_mov_imm_LSLH_operand:$Simm))))),
1375 (INST4H VPR64:$src, neon_uimm8:$Imm,
1376 neon_mov_imm_LSLH_operand:$Simm)>;
1377 def : Pat<(v1i64 (opnode VPR64:$src,
1378 (bitconvert(v4i16 (neonopnode timm:$Imm,
1379 neon_mov_imm_LSLH_operand:$Simm))))),
1380 (INST4H VPR64:$src, neon_uimm8:$Imm,
1381 neon_mov_imm_LSLH_operand:$Simm)>;
1383 def : Pat<(v16i8 (opnode VPR128:$src,
1384 (bitconvert(v8i16 (neonopnode timm:$Imm,
1385 neon_mov_imm_LSLH_operand:$Simm))))),
1386 (INST8H VPR128:$src, neon_uimm8:$Imm,
1387 neon_mov_imm_LSLH_operand:$Simm)>;
1388 def : Pat<(v4i32 (opnode VPR128:$src,
1389 (bitconvert(v8i16 (neonopnode timm:$Imm,
1390 neon_mov_imm_LSLH_operand:$Simm))))),
1391 (INST8H VPR128:$src, neon_uimm8:$Imm,
1392 neon_mov_imm_LSLH_operand:$Simm)>;
1393 def : Pat<(v2i64 (opnode VPR128:$src,
1394 (bitconvert(v8i16 (neonopnode timm:$Imm,
1395 neon_mov_imm_LSLH_operand:$Simm))))),
1396 (INST8H VPR128:$src, neon_uimm8:$Imm,
1397 neon_mov_imm_LSLH_operand:$Simm)>;
1400 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1401 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1403 // Additional patterns for Vector Bitwise OR - immedidate
1404 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1407 // Vector Move Immediate Masked
1408 let isReMaterializable = 1 in {
1409 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1412 // Vector Move Inverted Immediate Masked
1413 let isReMaterializable = 1 in {
1414 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1417 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1418 Instruction inst, RegisterOperand VPRC>
1419 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1420 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1422 // Aliases for Vector Move Immediate Shifted
1423 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1424 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1425 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1426 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1428 // Aliases for Vector Move Inverted Immediate Shifted
1429 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1430 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1431 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1432 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1434 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1435 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1436 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1437 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1438 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1440 // Aliases for Vector Bitwise OR - immedidate
1441 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1442 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1443 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1444 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1446 // Vector Move Immediate - per byte
1447 let isReMaterializable = 1 in {
1448 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1449 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1450 "movi\t$Rd.8b, $Imm",
1451 [(set (v8i8 VPR64:$Rd),
1452 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1457 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1458 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1459 "movi\t$Rd.16b, $Imm",
1460 [(set (v16i8 VPR128:$Rd),
1461 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1467 // Vector Move Immediate - bytemask, per double word
1468 let isReMaterializable = 1 in {
1469 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1470 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1471 "movi\t $Rd.2d, $Imm",
1472 [(set (v2i64 VPR128:$Rd),
1473 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1479 // Vector Move Immediate - bytemask, one doubleword
1481 let isReMaterializable = 1 in {
1482 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1483 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1485 [(set (v1i64 FPR64:$Rd),
1486 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1492 // Vector Floating Point Move Immediate
1494 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1495 Operand immOpType, bit q, bit op>
1496 : NeonI_1VModImm<q, op,
1497 (outs VPRC:$Rd), (ins immOpType:$Imm),
1498 "fmov\t$Rd" # asmlane # ", $Imm",
1499 [(set (OpTy VPRC:$Rd),
1500 (OpTy (Neon_fmovi (timm:$Imm))))],
1505 let isReMaterializable = 1 in {
1506 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1507 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1508 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1511 // Vector Shift (Immediate)
1512 // Immediate in [0, 63]
1513 def imm0_63 : Operand<i32> {
1514 let ParserMatchClass = uimm6_asmoperand;
1517 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1521 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1522 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1523 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1524 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1526 // The shift right immediate amount, in the range 1 to element bits, is computed
1527 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1528 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1530 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1531 let Name = "ShrImm" # OFFSET;
1532 let RenderMethod = "addImmOperands";
1533 let DiagnosticType = "ShrImm" # OFFSET;
1536 class shr_imm<string OFFSET> : Operand<i32> {
1537 let EncoderMethod = "getShiftRightImm" # OFFSET;
1538 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1539 let ParserMatchClass =
1540 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1543 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1544 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1545 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1546 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1548 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1549 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1550 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1551 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1553 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1554 let Name = "ShlImm" # OFFSET;
1555 let RenderMethod = "addImmOperands";
1556 let DiagnosticType = "ShlImm" # OFFSET;
1559 class shl_imm<string OFFSET> : Operand<i32> {
1560 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1561 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1562 let ParserMatchClass =
1563 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1566 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1567 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1568 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1569 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1571 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1572 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1573 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1574 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1576 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1577 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1578 : NeonI_2VShiftImm<q, u, opcode,
1579 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1580 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1581 [(set (Ty VPRC:$Rd),
1582 (Ty (OpNode (Ty VPRC:$Rn),
1583 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1586 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1587 // 64-bit vector types.
1588 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1589 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1592 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1593 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1596 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1597 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1600 // 128-bit vector types.
1601 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1602 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1605 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1606 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1609 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1610 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1613 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1614 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1618 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1619 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1621 let Inst{22-19} = 0b0001;
1624 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1626 let Inst{22-20} = 0b001;
1629 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1631 let Inst{22-21} = 0b01;
1634 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1636 let Inst{22-19} = 0b0001;
1639 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1641 let Inst{22-20} = 0b001;
1644 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1646 let Inst{22-21} = 0b01;
1649 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1656 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1659 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1660 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1662 def Neon_High16B : PatFrag<(ops node:$in),
1663 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1664 def Neon_High8H : PatFrag<(ops node:$in),
1665 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1666 def Neon_High4S : PatFrag<(ops node:$in),
1667 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1668 def Neon_High2D : PatFrag<(ops node:$in),
1669 (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1670 def Neon_High4float : PatFrag<(ops node:$in),
1671 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1672 def Neon_High2double : PatFrag<(ops node:$in),
1673 (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1675 def Neon_Low16B : PatFrag<(ops node:$in),
1676 (v8i8 (extract_subvector (v16i8 node:$in),
1678 def Neon_Low8H : PatFrag<(ops node:$in),
1679 (v4i16 (extract_subvector (v8i16 node:$in),
1681 def Neon_Low4S : PatFrag<(ops node:$in),
1682 (v2i32 (extract_subvector (v4i32 node:$in),
1684 def Neon_Low2D : PatFrag<(ops node:$in),
1685 (v1i64 (extract_subvector (v2i64 node:$in),
1687 def Neon_Low4float : PatFrag<(ops node:$in),
1688 (v2f32 (extract_subvector (v4f32 node:$in),
1690 def Neon_Low2double : PatFrag<(ops node:$in),
1691 (v1f64 (extract_subvector (v2f64 node:$in),
1694 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1695 string SrcT, ValueType DestTy, ValueType SrcTy,
1696 Operand ImmTy, SDPatternOperator ExtOp>
1697 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1698 (ins VPR64:$Rn, ImmTy:$Imm),
1699 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1700 [(set (DestTy VPR128:$Rd),
1702 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1703 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1706 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1707 string SrcT, ValueType DestTy, ValueType SrcTy,
1708 int StartIndex, Operand ImmTy,
1709 SDPatternOperator ExtOp, PatFrag getTop>
1710 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1711 (ins VPR128:$Rn, ImmTy:$Imm),
1712 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1713 [(set (DestTy VPR128:$Rd),
1716 (SrcTy (getTop VPR128:$Rn)))),
1717 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1720 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1722 // 64-bit vector types.
1723 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1725 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1728 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1730 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1733 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1735 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1738 // 128-bit vector types
1739 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1740 8, shl_imm8, ExtOp, Neon_High16B> {
1741 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1744 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1745 4, shl_imm16, ExtOp, Neon_High8H> {
1746 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1749 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1750 2, shl_imm32, ExtOp, Neon_High4S> {
1751 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1754 // Use other patterns to match when the immediate is 0.
1755 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1756 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1758 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1759 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1761 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1762 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1764 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1765 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1767 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1768 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1770 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1771 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1775 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1776 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1778 // Rounding/Saturating shift
1779 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1780 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1781 SDPatternOperator OpNode>
1782 : NeonI_2VShiftImm<q, u, opcode,
1783 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1784 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1785 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1786 (i32 ImmTy:$Imm))))],
1789 // shift right (vector by immediate)
1790 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1791 SDPatternOperator OpNode> {
1792 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1794 let Inst{22-19} = 0b0001;
1797 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1799 let Inst{22-20} = 0b001;
1802 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1804 let Inst{22-21} = 0b01;
1807 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1809 let Inst{22-19} = 0b0001;
1812 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1814 let Inst{22-20} = 0b001;
1817 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1819 let Inst{22-21} = 0b01;
1822 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1828 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1829 SDPatternOperator OpNode> {
1830 // 64-bit vector types.
1831 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
1833 let Inst{22-19} = 0b0001;
1836 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
1838 let Inst{22-20} = 0b001;
1841 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
1843 let Inst{22-21} = 0b01;
1846 // 128-bit vector types.
1847 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
1849 let Inst{22-19} = 0b0001;
1852 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
1854 let Inst{22-20} = 0b001;
1857 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
1859 let Inst{22-21} = 0b01;
1862 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
1868 // Rounding shift right
1869 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1870 int_aarch64_neon_vsrshr>;
1871 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1872 int_aarch64_neon_vurshr>;
1874 // Saturating shift left unsigned
1875 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1877 // Saturating shift left
1878 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1879 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1881 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1882 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1884 : NeonI_2VShiftImm<q, u, opcode,
1885 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1886 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1887 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1888 (Ty (OpNode (Ty VPRC:$Rn),
1889 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
1891 let Constraints = "$src = $Rd";
1894 // Shift Right accumulate
1895 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1896 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1898 let Inst{22-19} = 0b0001;
1901 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1903 let Inst{22-20} = 0b001;
1906 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1908 let Inst{22-21} = 0b01;
1911 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1913 let Inst{22-19} = 0b0001;
1916 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1918 let Inst{22-20} = 0b001;
1921 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1923 let Inst{22-21} = 0b01;
1926 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1932 // Shift right and accumulate
1933 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1934 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1936 // Rounding shift accumulate
1937 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1938 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1939 SDPatternOperator OpNode>
1940 : NeonI_2VShiftImm<q, u, opcode,
1941 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1942 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1943 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1944 (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
1946 let Constraints = "$src = $Rd";
1949 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1950 SDPatternOperator OpNode> {
1951 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1953 let Inst{22-19} = 0b0001;
1956 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1958 let Inst{22-20} = 0b001;
1961 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1963 let Inst{22-21} = 0b01;
1966 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1968 let Inst{22-19} = 0b0001;
1971 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1973 let Inst{22-20} = 0b001;
1976 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1978 let Inst{22-21} = 0b01;
1981 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1987 // Rounding shift right and accumulate
1988 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1989 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1991 // Shift insert by immediate
1992 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1993 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1994 SDPatternOperator OpNode>
1995 : NeonI_2VShiftImm<q, u, opcode,
1996 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1997 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1998 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1999 (i32 ImmTy:$Imm))))],
2001 let Constraints = "$src = $Rd";
2004 // shift left insert (vector by immediate)
2005 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2006 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2007 int_aarch64_neon_vsli> {
2008 let Inst{22-19} = 0b0001;
2011 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2012 int_aarch64_neon_vsli> {
2013 let Inst{22-20} = 0b001;
2016 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2017 int_aarch64_neon_vsli> {
2018 let Inst{22-21} = 0b01;
2021 // 128-bit vector types
2022 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2023 int_aarch64_neon_vsli> {
2024 let Inst{22-19} = 0b0001;
2027 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2028 int_aarch64_neon_vsli> {
2029 let Inst{22-20} = 0b001;
2032 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2033 int_aarch64_neon_vsli> {
2034 let Inst{22-21} = 0b01;
2037 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2038 int_aarch64_neon_vsli> {
2043 // shift right insert (vector by immediate)
2044 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2045 // 64-bit vector types.
2046 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2047 int_aarch64_neon_vsri> {
2048 let Inst{22-19} = 0b0001;
2051 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2052 int_aarch64_neon_vsri> {
2053 let Inst{22-20} = 0b001;
2056 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2057 int_aarch64_neon_vsri> {
2058 let Inst{22-21} = 0b01;
2061 // 128-bit vector types
2062 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2063 int_aarch64_neon_vsri> {
2064 let Inst{22-19} = 0b0001;
2067 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2068 int_aarch64_neon_vsri> {
2069 let Inst{22-20} = 0b001;
2072 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2073 int_aarch64_neon_vsri> {
2074 let Inst{22-21} = 0b01;
2077 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2078 int_aarch64_neon_vsri> {
2083 // Shift left and insert
2084 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2086 // Shift right and insert
2087 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2089 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2090 string SrcT, Operand ImmTy>
2091 : NeonI_2VShiftImm<q, u, opcode,
2092 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2093 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2096 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2097 string SrcT, Operand ImmTy>
2098 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2099 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2100 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2102 let Constraints = "$src = $Rd";
2105 // left long shift by immediate
2106 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2107 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2108 let Inst{22-19} = 0b0001;
2111 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2112 let Inst{22-20} = 0b001;
2115 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2116 let Inst{22-21} = 0b01;
2119 // Shift Narrow High
2120 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2122 let Inst{22-19} = 0b0001;
2125 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2127 let Inst{22-20} = 0b001;
2130 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2132 let Inst{22-21} = 0b01;
2136 // Shift right narrow
2137 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2139 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2140 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2141 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2142 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2143 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2144 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2145 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2146 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2148 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2149 (v2i64 (concat_vectors (v1i64 node:$Rm),
2150 (v1i64 node:$Rn)))>;
2151 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2152 (v8i16 (concat_vectors (v4i16 node:$Rm),
2153 (v4i16 node:$Rn)))>;
2154 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2155 (v4i32 (concat_vectors (v2i32 node:$Rm),
2156 (v2i32 node:$Rn)))>;
2157 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2158 (v4f32 (concat_vectors (v2f32 node:$Rm),
2159 (v2f32 node:$Rn)))>;
2160 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2161 (v2f64 (concat_vectors (v1f64 node:$Rm),
2162 (v1f64 node:$Rn)))>;
2164 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2165 (v8i16 (srl (v8i16 node:$lhs),
2166 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2167 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2168 (v4i32 (srl (v4i32 node:$lhs),
2169 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2170 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2171 (v2i64 (srl (v2i64 node:$lhs),
2172 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2173 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2174 (v8i16 (sra (v8i16 node:$lhs),
2175 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2176 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2177 (v4i32 (sra (v4i32 node:$lhs),
2178 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2179 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2180 (v2i64 (sra (v2i64 node:$lhs),
2181 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2183 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2184 multiclass Neon_shiftNarrow_patterns<string shr> {
2185 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2186 (i32 shr_imm8:$Imm)))),
2187 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2188 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2189 (i32 shr_imm16:$Imm)))),
2190 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2191 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2192 (i32 shr_imm32:$Imm)))),
2193 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2195 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2196 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2197 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2198 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2199 VPR128:$Rn, imm:$Imm)>;
2200 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2201 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2202 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2203 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2204 VPR128:$Rn, imm:$Imm)>;
2205 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2206 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2207 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2208 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2209 VPR128:$Rn, imm:$Imm)>;
2212 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2213 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2214 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2215 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2216 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2217 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2218 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2220 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2221 (v1i64 (bitconvert (v8i8
2222 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2223 (!cast<Instruction>(prefix # "_16B")
2224 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2225 VPR128:$Rn, imm:$Imm)>;
2226 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2227 (v1i64 (bitconvert (v4i16
2228 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2229 (!cast<Instruction>(prefix # "_8H")
2230 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2231 VPR128:$Rn, imm:$Imm)>;
2232 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2233 (v1i64 (bitconvert (v2i32
2234 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2235 (!cast<Instruction>(prefix # "_4S")
2236 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2237 VPR128:$Rn, imm:$Imm)>;
2240 defm : Neon_shiftNarrow_patterns<"lshr">;
2241 defm : Neon_shiftNarrow_patterns<"ashr">;
2243 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2244 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2245 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2246 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2247 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2248 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2249 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2251 // Convert fix-point and float-pointing
2252 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2253 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2254 Operand ImmTy, SDPatternOperator IntOp>
2255 : NeonI_2VShiftImm<q, u, opcode,
2256 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2257 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2258 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2259 (i32 ImmTy:$Imm))))],
2262 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2263 SDPatternOperator IntOp> {
2264 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2266 let Inst{22-21} = 0b01;
2269 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2271 let Inst{22-21} = 0b01;
2274 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2280 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2281 SDPatternOperator IntOp> {
2282 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2284 let Inst{22-21} = 0b01;
2287 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2289 let Inst{22-21} = 0b01;
2292 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2298 // Convert fixed-point to floating-point
2299 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2300 int_arm_neon_vcvtfxs2fp>;
2301 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2302 int_arm_neon_vcvtfxu2fp>;
2304 // Convert floating-point to fixed-point
2305 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2306 int_arm_neon_vcvtfp2fxs>;
2307 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2308 int_arm_neon_vcvtfp2fxu>;
2310 multiclass Neon_sshll2_0<SDNode ext>
2312 def _v8i8 : PatFrag<(ops node:$Rn),
2313 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2314 def _v4i16 : PatFrag<(ops node:$Rn),
2315 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2316 def _v2i32 : PatFrag<(ops node:$Rn),
2317 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2320 defm NI_sext_high : Neon_sshll2_0<sext>;
2321 defm NI_zext_high : Neon_sshll2_0<zext>;
2324 //===----------------------------------------------------------------------===//
2325 // Multiclasses for NeonI_Across
2326 //===----------------------------------------------------------------------===//
2330 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2331 string asmop, SDPatternOperator opnode>
2333 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2334 (outs FPR16:$Rd), (ins VPR64:$Rn),
2335 asmop # "\t$Rd, $Rn.8b",
2336 [(set (v1i16 FPR16:$Rd),
2337 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2340 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2341 (outs FPR16:$Rd), (ins VPR128:$Rn),
2342 asmop # "\t$Rd, $Rn.16b",
2343 [(set (v1i16 FPR16:$Rd),
2344 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2347 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2348 (outs FPR32:$Rd), (ins VPR64:$Rn),
2349 asmop # "\t$Rd, $Rn.4h",
2350 [(set (v1i32 FPR32:$Rd),
2351 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2354 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2355 (outs FPR32:$Rd), (ins VPR128:$Rn),
2356 asmop # "\t$Rd, $Rn.8h",
2357 [(set (v1i32 FPR32:$Rd),
2358 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2361 // _1d2s doesn't exist!
2363 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2364 (outs FPR64:$Rd), (ins VPR128:$Rn),
2365 asmop # "\t$Rd, $Rn.4s",
2366 [(set (v1i64 FPR64:$Rd),
2367 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2371 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2372 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2376 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2377 string asmop, SDPatternOperator opnode>
2379 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2380 (outs FPR8:$Rd), (ins VPR64:$Rn),
2381 asmop # "\t$Rd, $Rn.8b",
2382 [(set (v1i8 FPR8:$Rd),
2383 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2386 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2387 (outs FPR8:$Rd), (ins VPR128:$Rn),
2388 asmop # "\t$Rd, $Rn.16b",
2389 [(set (v1i8 FPR8:$Rd),
2390 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2393 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2394 (outs FPR16:$Rd), (ins VPR64:$Rn),
2395 asmop # "\t$Rd, $Rn.4h",
2396 [(set (v1i16 FPR16:$Rd),
2397 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2400 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2401 (outs FPR16:$Rd), (ins VPR128:$Rn),
2402 asmop # "\t$Rd, $Rn.8h",
2403 [(set (v1i16 FPR16:$Rd),
2404 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2407 // _1s2s doesn't exist!
2409 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2410 (outs FPR32:$Rd), (ins VPR128:$Rn),
2411 asmop # "\t$Rd, $Rn.4s",
2412 [(set (v1i32 FPR32:$Rd),
2413 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2417 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2418 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2420 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2421 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2423 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2427 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2428 string asmop, SDPatternOperator opnode> {
2429 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2430 (outs FPR32:$Rd), (ins VPR128:$Rn),
2431 asmop # "\t$Rd, $Rn.4s",
2432 [(set (f32 FPR32:$Rd),
2433 (f32 (opnode (v4f32 VPR128:$Rn))))],
2437 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2438 int_aarch64_neon_vmaxnmv>;
2439 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2440 int_aarch64_neon_vminnmv>;
2442 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2443 int_aarch64_neon_vmaxv>;
2444 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2445 int_aarch64_neon_vminv>;
2447 // The followings are for instruction class (Perm)
2449 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2450 string asmop, RegisterOperand OpVPR, string OpS,
2451 SDPatternOperator opnode, ValueType Ty>
2452 : NeonI_Perm<q, size, opcode,
2453 (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2454 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2455 [(set (Ty OpVPR:$Rd),
2456 (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2459 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2460 SDPatternOperator opnode> {
2461 def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
2462 VPR64, "8b", opnode, v8i8>;
2463 def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2464 VPR128, "16b",opnode, v16i8>;
2465 def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
2466 VPR64, "4h", opnode, v4i16>;
2467 def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
2468 VPR128, "8h", opnode, v8i16>;
2469 def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
2470 VPR64, "2s", opnode, v2i32>;
2471 def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
2472 VPR128, "4s", opnode, v4i32>;
2473 def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
2474 VPR128, "2d", opnode, v2i64>;
2477 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2478 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2479 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2480 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2481 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2482 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2484 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2485 def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2486 (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2488 def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2489 (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2491 def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2492 (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2495 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2496 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2497 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2498 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2499 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2500 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2502 // The followings are for instruction class (3V Diff)
2504 // normal long/long2 pattern
2505 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2506 string asmop, string ResS, string OpS,
2507 SDPatternOperator opnode, SDPatternOperator ext,
2508 RegisterOperand OpVPR,
2509 ValueType ResTy, ValueType OpTy>
2510 : NeonI_3VDiff<q, u, size, opcode,
2511 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2512 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2513 [(set (ResTy VPR128:$Rd),
2514 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2515 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2518 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2519 string asmop, SDPatternOperator opnode,
2520 bit Commutable = 0> {
2521 let isCommutable = Commutable in {
2522 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2523 opnode, sext, VPR64, v8i16, v8i8>;
2524 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2525 opnode, sext, VPR64, v4i32, v4i16>;
2526 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2527 opnode, sext, VPR64, v2i64, v2i32>;
2531 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2532 SDPatternOperator opnode, bit Commutable = 0> {
2533 let isCommutable = Commutable in {
2534 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2535 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2536 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2537 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2538 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2539 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2543 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2544 SDPatternOperator opnode, bit Commutable = 0> {
2545 let isCommutable = Commutable in {
2546 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2547 opnode, zext, VPR64, v8i16, v8i8>;
2548 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2549 opnode, zext, VPR64, v4i32, v4i16>;
2550 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2551 opnode, zext, VPR64, v2i64, v2i32>;
2555 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2556 SDPatternOperator opnode, bit Commutable = 0> {
2557 let isCommutable = Commutable in {
2558 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2559 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2560 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2561 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2562 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2563 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2567 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2568 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2570 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2571 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2573 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2574 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2576 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2577 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2579 // normal wide/wide2 pattern
2580 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2581 string asmop, string ResS, string OpS,
2582 SDPatternOperator opnode, SDPatternOperator ext,
2583 RegisterOperand OpVPR,
2584 ValueType ResTy, ValueType OpTy>
2585 : NeonI_3VDiff<q, u, size, opcode,
2586 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2587 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2588 [(set (ResTy VPR128:$Rd),
2589 (ResTy (opnode (ResTy VPR128:$Rn),
2590 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2593 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2594 SDPatternOperator opnode> {
2595 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2596 opnode, sext, VPR64, v8i16, v8i8>;
2597 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2598 opnode, sext, VPR64, v4i32, v4i16>;
2599 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2600 opnode, sext, VPR64, v2i64, v2i32>;
2603 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2604 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2606 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2607 SDPatternOperator opnode> {
2608 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2609 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2610 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2611 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2612 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2613 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2616 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2617 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2619 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2620 SDPatternOperator opnode> {
2621 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2622 opnode, zext, VPR64, v8i16, v8i8>;
2623 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2624 opnode, zext, VPR64, v4i32, v4i16>;
2625 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2626 opnode, zext, VPR64, v2i64, v2i32>;
2629 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2630 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2632 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2633 SDPatternOperator opnode> {
2634 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2635 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2636 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2638 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2639 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2642 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2643 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2645 // Get the high half part of the vector element.
2646 multiclass NeonI_get_high {
2647 def _8h : PatFrag<(ops node:$Rn),
2648 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2649 (v8i16 (Neon_vdup (i32 8)))))))>;
2650 def _4s : PatFrag<(ops node:$Rn),
2651 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2652 (v4i32 (Neon_vdup (i32 16)))))))>;
2653 def _2d : PatFrag<(ops node:$Rn),
2654 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2655 (v2i64 (Neon_vdup (i32 32)))))))>;
2658 defm NI_get_hi : NeonI_get_high;
2660 // pattern for addhn/subhn with 2 operands
2661 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2662 string asmop, string ResS, string OpS,
2663 SDPatternOperator opnode, SDPatternOperator get_hi,
2664 ValueType ResTy, ValueType OpTy>
2665 : NeonI_3VDiff<q, u, size, opcode,
2666 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2667 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2668 [(set (ResTy VPR64:$Rd),
2670 (OpTy (opnode (OpTy VPR128:$Rn),
2671 (OpTy VPR128:$Rm))))))],
2674 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2675 SDPatternOperator opnode, bit Commutable = 0> {
2676 let isCommutable = Commutable in {
2677 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2678 opnode, NI_get_hi_8h, v8i8, v8i16>;
2679 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2680 opnode, NI_get_hi_4s, v4i16, v4i32>;
2681 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2682 opnode, NI_get_hi_2d, v2i32, v2i64>;
2686 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2687 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2689 // pattern for operation with 2 operands
2690 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2691 string asmop, string ResS, string OpS,
2692 SDPatternOperator opnode,
2693 RegisterOperand ResVPR, RegisterOperand OpVPR,
2694 ValueType ResTy, ValueType OpTy>
2695 : NeonI_3VDiff<q, u, size, opcode,
2696 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2697 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2698 [(set (ResTy ResVPR:$Rd),
2699 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2702 // normal narrow pattern
2703 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2704 SDPatternOperator opnode, bit Commutable = 0> {
2705 let isCommutable = Commutable in {
2706 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2707 opnode, VPR64, VPR128, v8i8, v8i16>;
2708 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2709 opnode, VPR64, VPR128, v4i16, v4i32>;
2710 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2711 opnode, VPR64, VPR128, v2i32, v2i64>;
2715 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2716 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2718 // pattern for acle intrinsic with 3 operands
2719 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2720 string asmop, string ResS, string OpS>
2721 : NeonI_3VDiff<q, u, size, opcode,
2722 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2723 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2725 let Constraints = "$src = $Rd";
2726 let neverHasSideEffects = 1;
2729 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2730 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2731 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2732 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2735 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2736 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2738 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2739 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2741 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2743 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2744 SDPatternOperator coreop>
2745 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2746 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2747 (SrcTy VPR128:$Rm)))))),
2748 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2749 VPR128:$Rn, VPR128:$Rm)>;
2752 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2753 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2754 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2755 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2756 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2757 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2760 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2761 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2762 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2763 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2764 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2765 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2768 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2769 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2770 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2773 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2774 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2775 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2777 // pattern that need to extend result
2778 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2779 string asmop, string ResS, string OpS,
2780 SDPatternOperator opnode,
2781 RegisterOperand OpVPR,
2782 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2783 : NeonI_3VDiff<q, u, size, opcode,
2784 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2785 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2786 [(set (ResTy VPR128:$Rd),
2787 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2788 (OpTy OpVPR:$Rm))))))],
2791 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2792 SDPatternOperator opnode, bit Commutable = 0> {
2793 let isCommutable = Commutable in {
2794 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2795 opnode, VPR64, v8i16, v8i8, v8i8>;
2796 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2797 opnode, VPR64, v4i32, v4i16, v4i16>;
2798 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2799 opnode, VPR64, v2i64, v2i32, v2i32>;
2803 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2804 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2806 multiclass NeonI_Op_High<SDPatternOperator op> {
2807 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2808 (op (v8i8 (Neon_High16B node:$Rn)),
2809 (v8i8 (Neon_High16B node:$Rm)))>;
2810 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2811 (op (v4i16 (Neon_High8H node:$Rn)),
2812 (v4i16 (Neon_High8H node:$Rm)))>;
2813 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2814 (op (v2i32 (Neon_High4S node:$Rn)),
2815 (v2i32 (Neon_High4S node:$Rm)))>;
2818 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2819 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2820 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2821 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2822 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2823 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2825 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2826 bit Commutable = 0> {
2827 let isCommutable = Commutable in {
2828 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2829 !cast<PatFrag>(opnode # "_16B"),
2830 VPR128, v8i16, v16i8, v8i8>;
2831 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2832 !cast<PatFrag>(opnode # "_8H"),
2833 VPR128, v4i32, v8i16, v4i16>;
2834 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2835 !cast<PatFrag>(opnode # "_4S"),
2836 VPR128, v2i64, v4i32, v2i32>;
2840 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2841 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2843 // For pattern that need two operators being chained.
2844 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2845 string asmop, string ResS, string OpS,
2846 SDPatternOperator opnode, SDPatternOperator subop,
2847 RegisterOperand OpVPR,
2848 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2849 : NeonI_3VDiff<q, u, size, opcode,
2850 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2851 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2852 [(set (ResTy VPR128:$Rd),
2854 (ResTy VPR128:$src),
2855 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2856 (OpTy OpVPR:$Rm))))))))],
2858 let Constraints = "$src = $Rd";
2861 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2862 SDPatternOperator opnode, SDPatternOperator subop>{
2863 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2864 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2865 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2866 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2867 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2868 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2871 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2872 add, int_arm_neon_vabds>;
2873 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2874 add, int_arm_neon_vabdu>;
2876 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2877 SDPatternOperator opnode, string subop> {
2878 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2879 opnode, !cast<PatFrag>(subop # "_16B"),
2880 VPR128, v8i16, v16i8, v8i8>;
2881 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2882 opnode, !cast<PatFrag>(subop # "_8H"),
2883 VPR128, v4i32, v8i16, v4i16>;
2884 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2885 opnode, !cast<PatFrag>(subop # "_4S"),
2886 VPR128, v2i64, v4i32, v2i32>;
2889 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2891 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2894 // Long pattern with 2 operands
2895 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2896 SDPatternOperator opnode, bit Commutable = 0> {
2897 let isCommutable = Commutable in {
2898 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2899 opnode, VPR128, VPR64, v8i16, v8i8>;
2900 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2901 opnode, VPR128, VPR64, v4i32, v4i16>;
2902 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2903 opnode, VPR128, VPR64, v2i64, v2i32>;
2907 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2908 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2910 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2911 string asmop, string ResS, string OpS,
2912 SDPatternOperator opnode,
2913 ValueType ResTy, ValueType OpTy>
2914 : NeonI_3VDiff<q, u, size, opcode,
2915 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2916 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2917 [(set (ResTy VPR128:$Rd),
2918 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2921 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2922 string opnode, bit Commutable = 0> {
2923 let isCommutable = Commutable in {
2924 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2925 !cast<PatFrag>(opnode # "_16B"),
2927 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2928 !cast<PatFrag>(opnode # "_8H"),
2930 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2931 !cast<PatFrag>(opnode # "_4S"),
2936 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2938 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2941 // Long pattern with 3 operands
2942 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2943 string asmop, string ResS, string OpS,
2944 SDPatternOperator opnode,
2945 ValueType ResTy, ValueType OpTy>
2946 : NeonI_3VDiff<q, u, size, opcode,
2947 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2948 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2949 [(set (ResTy VPR128:$Rd),
2951 (ResTy VPR128:$src),
2952 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2954 let Constraints = "$src = $Rd";
2957 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2958 SDPatternOperator opnode> {
2959 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2960 opnode, v8i16, v8i8>;
2961 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2962 opnode, v4i32, v4i16>;
2963 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2964 opnode, v2i64, v2i32>;
2967 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2969 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2971 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2973 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2975 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2977 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2979 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2981 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2983 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2984 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2986 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2987 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2989 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2990 string asmop, string ResS, string OpS,
2991 SDPatternOperator subop, SDPatternOperator opnode,
2992 RegisterOperand OpVPR,
2993 ValueType ResTy, ValueType OpTy>
2994 : NeonI_3VDiff<q, u, size, opcode,
2995 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2996 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2997 [(set (ResTy VPR128:$Rd),
2999 (ResTy VPR128:$src),
3000 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3002 let Constraints = "$src = $Rd";
3005 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3006 SDPatternOperator subop, string opnode> {
3007 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3008 subop, !cast<PatFrag>(opnode # "_16B"),
3009 VPR128, v8i16, v16i8>;
3010 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3011 subop, !cast<PatFrag>(opnode # "_8H"),
3012 VPR128, v4i32, v8i16>;
3013 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3014 subop, !cast<PatFrag>(opnode # "_4S"),
3015 VPR128, v2i64, v4i32>;
3018 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3019 add, "NI_smull_hi">;
3020 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3021 add, "NI_umull_hi">;
3023 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3024 sub, "NI_smull_hi">;
3025 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3026 sub, "NI_umull_hi">;
3028 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3029 SDPatternOperator opnode> {
3030 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3031 opnode, int_arm_neon_vqdmull,
3032 VPR64, v4i32, v4i16>;
3033 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3034 opnode, int_arm_neon_vqdmull,
3035 VPR64, v2i64, v2i32>;
3038 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3039 int_arm_neon_vqadds>;
3040 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3041 int_arm_neon_vqsubs>;
3043 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3044 SDPatternOperator opnode, bit Commutable = 0> {
3045 let isCommutable = Commutable in {
3046 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3047 opnode, VPR128, VPR64, v4i32, v4i16>;
3048 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3049 opnode, VPR128, VPR64, v2i64, v2i32>;
3053 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3054 int_arm_neon_vqdmull, 1>;
3056 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3057 string opnode, bit Commutable = 0> {
3058 let isCommutable = Commutable in {
3059 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3060 !cast<PatFrag>(opnode # "_8H"),
3062 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3063 !cast<PatFrag>(opnode # "_4S"),
3068 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3071 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3072 SDPatternOperator opnode> {
3073 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3074 opnode, NI_qdmull_hi_8H,
3075 VPR128, v4i32, v8i16>;
3076 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3077 opnode, NI_qdmull_hi_4S,
3078 VPR128, v2i64, v4i32>;
3081 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3082 int_arm_neon_vqadds>;
3083 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3084 int_arm_neon_vqsubs>;
3086 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3087 SDPatternOperator opnode_8h8b,
3088 SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3089 let isCommutable = Commutable in {
3090 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3091 opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3093 def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3094 opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3098 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3099 int_aarch64_neon_vmull_p64, 1>;
3101 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3102 string opnode, bit Commutable = 0> {
3103 let isCommutable = Commutable in {
3104 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3105 !cast<PatFrag>(opnode # "_16B"),
3109 NeonI_3VDiff<0b1, u, 0b11, opcode,
3110 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3111 asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3112 [(set (v16i8 VPR128:$Rd),
3113 (v16i8 (int_aarch64_neon_vmull_p64
3114 (v1i64 (scalar_to_vector
3115 (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3116 (v1i64 (scalar_to_vector
3117 (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3122 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3125 // End of implementation for instruction class (3V Diff)
3127 // The followings are vector load/store multiple N-element structure
3128 // (class SIMD lselem).
3130 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
3131 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3132 // The structure consists of a sequence of sets of N values.
3133 // The first element of the structure is placed in the first lane
3134 // of the first first vector, the second element in the first lane
3135 // of the second vector, and so on.
3136 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3137 // the three 64-bit vectors list {BA, DC, FE}.
3138 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3139 // 64-bit vectors list {DA, EB, FC}.
3140 // Store instructions store multiple structure to N registers like load.
3143 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3144 RegisterOperand VecList, string asmop>
3145 : NeonI_LdStMult<q, 1, opcode, size,
3146 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3147 asmop # "\t$Rt, [$Rn]",
3151 let neverHasSideEffects = 1;
3154 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3155 def _8B : NeonI_LDVList<0, opcode, 0b00,
3156 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3158 def _4H : NeonI_LDVList<0, opcode, 0b01,
3159 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3161 def _2S : NeonI_LDVList<0, opcode, 0b10,
3162 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3164 def _16B : NeonI_LDVList<1, opcode, 0b00,
3165 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3167 def _8H : NeonI_LDVList<1, opcode, 0b01,
3168 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3170 def _4S : NeonI_LDVList<1, opcode, 0b10,
3171 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3173 def _2D : NeonI_LDVList<1, opcode, 0b11,
3174 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3177 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3178 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3179 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3181 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3183 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3185 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3187 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3188 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3189 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3191 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3192 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3194 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3195 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3197 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3198 RegisterOperand VecList, string asmop>
3199 : NeonI_LdStMult<q, 0, opcode, size,
3200 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3201 asmop # "\t$Rt, [$Rn]",
3205 let neverHasSideEffects = 1;
3208 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3209 def _8B : NeonI_STVList<0, opcode, 0b00,
3210 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3212 def _4H : NeonI_STVList<0, opcode, 0b01,
3213 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3215 def _2S : NeonI_STVList<0, opcode, 0b10,
3216 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3218 def _16B : NeonI_STVList<1, opcode, 0b00,
3219 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3221 def _8H : NeonI_STVList<1, opcode, 0b01,
3222 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3224 def _4S : NeonI_STVList<1, opcode, 0b10,
3225 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3227 def _2D : NeonI_STVList<1, opcode, 0b11,
3228 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3231 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3232 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3233 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3235 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3237 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3239 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3241 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3242 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3243 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3245 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3246 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3248 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3249 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3251 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3252 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3254 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3255 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3257 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3258 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3260 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3261 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3263 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3264 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3266 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3267 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3269 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3270 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3271 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3272 (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3274 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3275 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3276 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3277 (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3279 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3280 (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3281 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3282 (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3284 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3285 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3286 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3287 (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3289 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3290 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3291 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3292 (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3294 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3295 (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3296 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3297 (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3299 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3300 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3301 // these patterns are not needed any more.
3302 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3303 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3304 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3306 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3307 (LSFP8_STR $value, $addr, 0)>;
3308 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3309 (LSFP16_STR $value, $addr, 0)>;
3310 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3311 (LSFP32_STR $value, $addr, 0)>;
3314 // End of vector load/store multiple N-element structure(class SIMD lselem)
3316 // The followings are post-index vector load/store multiple N-element
3317 // structure(class SIMD lselem-post)
3318 def exact1_asmoperand : AsmOperandClass {
3319 let Name = "Exact1";
3320 let PredicateMethod = "isExactImm<1>";
3321 let RenderMethod = "addImmOperands";
3323 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3324 let ParserMatchClass = exact1_asmoperand;
3327 def exact2_asmoperand : AsmOperandClass {
3328 let Name = "Exact2";
3329 let PredicateMethod = "isExactImm<2>";
3330 let RenderMethod = "addImmOperands";
3332 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3333 let ParserMatchClass = exact2_asmoperand;
3336 def exact3_asmoperand : AsmOperandClass {
3337 let Name = "Exact3";
3338 let PredicateMethod = "isExactImm<3>";
3339 let RenderMethod = "addImmOperands";
3341 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3342 let ParserMatchClass = exact3_asmoperand;
3345 def exact4_asmoperand : AsmOperandClass {
3346 let Name = "Exact4";
3347 let PredicateMethod = "isExactImm<4>";
3348 let RenderMethod = "addImmOperands";
3350 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3351 let ParserMatchClass = exact4_asmoperand;
3354 def exact6_asmoperand : AsmOperandClass {
3355 let Name = "Exact6";
3356 let PredicateMethod = "isExactImm<6>";
3357 let RenderMethod = "addImmOperands";
3359 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3360 let ParserMatchClass = exact6_asmoperand;
3363 def exact8_asmoperand : AsmOperandClass {
3364 let Name = "Exact8";
3365 let PredicateMethod = "isExactImm<8>";
3366 let RenderMethod = "addImmOperands";
3368 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3369 let ParserMatchClass = exact8_asmoperand;
3372 def exact12_asmoperand : AsmOperandClass {
3373 let Name = "Exact12";
3374 let PredicateMethod = "isExactImm<12>";
3375 let RenderMethod = "addImmOperands";
3377 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3378 let ParserMatchClass = exact12_asmoperand;
3381 def exact16_asmoperand : AsmOperandClass {
3382 let Name = "Exact16";
3383 let PredicateMethod = "isExactImm<16>";
3384 let RenderMethod = "addImmOperands";
3386 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3387 let ParserMatchClass = exact16_asmoperand;
3390 def exact24_asmoperand : AsmOperandClass {
3391 let Name = "Exact24";
3392 let PredicateMethod = "isExactImm<24>";
3393 let RenderMethod = "addImmOperands";
3395 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3396 let ParserMatchClass = exact24_asmoperand;
3399 def exact32_asmoperand : AsmOperandClass {
3400 let Name = "Exact32";
3401 let PredicateMethod = "isExactImm<32>";
3402 let RenderMethod = "addImmOperands";
3404 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3405 let ParserMatchClass = exact32_asmoperand;
3408 def exact48_asmoperand : AsmOperandClass {
3409 let Name = "Exact48";
3410 let PredicateMethod = "isExactImm<48>";
3411 let RenderMethod = "addImmOperands";
3413 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3414 let ParserMatchClass = exact48_asmoperand;
3417 def exact64_asmoperand : AsmOperandClass {
3418 let Name = "Exact64";
3419 let PredicateMethod = "isExactImm<64>";
3420 let RenderMethod = "addImmOperands";
3422 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3423 let ParserMatchClass = exact64_asmoperand;
3426 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3427 RegisterOperand VecList, Operand ImmTy,
3429 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3430 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3431 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3432 (outs VecList:$Rt, GPR64xsp:$wb),
3433 (ins GPR64xsp:$Rn, ImmTy:$amt),
3434 asmop # "\t$Rt, [$Rn], $amt",
3440 def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3441 (outs VecList:$Rt, GPR64xsp:$wb),
3442 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3443 asmop # "\t$Rt, [$Rn], $Rm",
3449 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3450 Operand ImmTy2, string asmop> {
3451 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3452 !cast<RegisterOperand>(List # "8B_operand"),
3455 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3456 !cast<RegisterOperand>(List # "4H_operand"),
3459 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3460 !cast<RegisterOperand>(List # "2S_operand"),
3463 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3464 !cast<RegisterOperand>(List # "16B_operand"),
3467 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3468 !cast<RegisterOperand>(List # "8H_operand"),
3471 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3472 !cast<RegisterOperand>(List # "4S_operand"),
3475 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3476 !cast<RegisterOperand>(List # "2D_operand"),
3480 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3481 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3482 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3485 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3487 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3490 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3492 // Post-index load multiple 1-element structures from N consecutive registers
3494 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3496 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3497 uimm_exact16, "ld1">;
3499 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3501 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3502 uimm_exact24, "ld1">;
3504 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3506 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3507 uimm_exact32, "ld1">;
3509 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3510 RegisterOperand VecList, Operand ImmTy,
3512 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3513 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3514 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3515 (outs GPR64xsp:$wb),
3516 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3517 asmop # "\t$Rt, [$Rn], $amt",
3523 def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3524 (outs GPR64xsp:$wb),
3525 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3526 asmop # "\t$Rt, [$Rn], $Rm",
3532 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3533 Operand ImmTy2, string asmop> {
3534 defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3535 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3537 defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3538 !cast<RegisterOperand>(List # "4H_operand"),
3541 defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3542 !cast<RegisterOperand>(List # "2S_operand"),
3545 defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3546 !cast<RegisterOperand>(List # "16B_operand"),
3549 defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3550 !cast<RegisterOperand>(List # "8H_operand"),
3553 defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3554 !cast<RegisterOperand>(List # "4S_operand"),
3557 defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3558 !cast<RegisterOperand>(List # "2D_operand"),
3562 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3563 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3564 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3567 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3569 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3572 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3574 // Post-index load multiple 1-element structures from N consecutive registers
3576 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3578 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3579 uimm_exact16, "st1">;
3581 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3583 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3584 uimm_exact24, "st1">;
3586 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3588 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3589 uimm_exact32, "st1">;
3591 // End of post-index vector load/store multiple N-element structure
3592 // (class SIMD lselem-post)
3594 // The followings are vector load/store single N-element structure
3595 // (class SIMD lsone).
3596 def neon_uimm0_bare : Operand<i64>,
3597 ImmLeaf<i64, [{return Imm == 0;}]> {
3598 let ParserMatchClass = neon_uimm0_asmoperand;
3599 let PrintMethod = "printUImmBareOperand";
3602 def neon_uimm1_bare : Operand<i64>,
3603 ImmLeaf<i64, [{return Imm < 2;}]> {
3604 let ParserMatchClass = neon_uimm1_asmoperand;
3605 let PrintMethod = "printUImmBareOperand";
3608 def neon_uimm2_bare : Operand<i64>,
3609 ImmLeaf<i64, [{return Imm < 4;}]> {
3610 let ParserMatchClass = neon_uimm2_asmoperand;
3611 let PrintMethod = "printUImmBareOperand";
3614 def neon_uimm3_bare : Operand<i64>,
3615 ImmLeaf<i64, [{return Imm < 8;}]> {
3616 let ParserMatchClass = uimm3_asmoperand;
3617 let PrintMethod = "printUImmBareOperand";
3620 def neon_uimm4_bare : Operand<i64>,
3621 ImmLeaf<i64, [{return Imm < 16;}]> {
3622 let ParserMatchClass = uimm4_asmoperand;
3623 let PrintMethod = "printUImmBareOperand";
3626 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3627 RegisterOperand VecList, string asmop>
3628 : NeonI_LdOne_Dup<q, r, opcode, size,
3629 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3630 asmop # "\t$Rt, [$Rn]",
3634 let neverHasSideEffects = 1;
3637 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3638 def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3639 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3641 def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3642 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3644 def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3645 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3647 def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3648 !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3650 def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3651 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3653 def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3654 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3656 def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3657 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3659 def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3660 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3663 // Load single 1-element structure to all lanes of 1 register
3664 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3666 // Load single N-element structure to all lanes of N consecutive
3667 // registers (N = 2,3,4)
3668 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3669 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3670 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3673 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3675 : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3676 (VTy (INST GPR64xsp:$Rn))>;
3678 // Match all LD1R instructions
3679 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3681 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3683 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3685 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3687 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3688 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3690 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3691 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3693 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3694 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3696 class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3698 : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
3699 (VTy (INST GPR64xsp:$Rn))>;
3701 def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
3702 def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
3704 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3705 RegisterClass RegList> {
3706 defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3707 defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3708 defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3709 defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3712 // Special vector list operand of 128-bit vectors with bare layout.
3713 // i.e. only show ".b", ".h", ".s", ".d"
3714 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3715 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3716 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3717 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3719 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3720 Operand ImmOp, string asmop>
3721 : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3723 (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3724 asmop # "\t$Rt[$lane], [$Rn]",
3728 let neverHasSideEffects = 1;
3729 let hasExtraDefRegAllocReq = 1;
3730 let Constraints = "$src = $Rt";
3733 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3734 def _B : NeonI_LDN_Lane<r, 0b00, op0,
3735 !cast<RegisterOperand>(List # "B_operand"),
3736 neon_uimm4_bare, asmop> {
3737 let Inst{12-10} = lane{2-0};
3738 let Inst{30} = lane{3};
3741 def _H : NeonI_LDN_Lane<r, 0b01, op0,
3742 !cast<RegisterOperand>(List # "H_operand"),
3743 neon_uimm3_bare, asmop> {
3744 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3745 let Inst{30} = lane{2};
3748 def _S : NeonI_LDN_Lane<r, 0b10, op0,
3749 !cast<RegisterOperand>(List # "S_operand"),
3750 neon_uimm2_bare, asmop> {
3751 let Inst{12-10} = {lane{0}, 0b0, 0b0};
3752 let Inst{30} = lane{1};
3755 def _D : NeonI_LDN_Lane<r, 0b10, op0,
3756 !cast<RegisterOperand>(List # "D_operand"),
3757 neon_uimm1_bare, asmop> {
3758 let Inst{12-10} = 0b001;
3759 let Inst{30} = lane{0};
3763 // Load single 1-element structure to one lane of 1 register.
3764 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3766 // Load single N-element structure to one lane of N consecutive registers
3768 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3769 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3770 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3772 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3773 Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3775 def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3776 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3777 (VTy (EXTRACT_SUBREG
3779 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3783 def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3784 (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3785 (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3788 // Match all LD1LN instructions
3789 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3790 extloadi8, LD1LN_B>;
3792 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3793 extloadi16, LD1LN_H>;
3795 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3797 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3800 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3802 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3805 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3806 Operand ImmOp, string asmop>
3807 : NeonI_LdStOne_Lane<0, r, op2_1, op0,
3808 (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
3809 asmop # "\t$Rt[$lane], [$Rn]",
3813 let neverHasSideEffects = 1;
3814 let hasExtraDefRegAllocReq = 1;
3817 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3818 def _B : NeonI_STN_Lane<r, 0b00, op0,
3819 !cast<RegisterOperand>(List # "B_operand"),
3820 neon_uimm4_bare, asmop> {
3821 let Inst{12-10} = lane{2-0};
3822 let Inst{30} = lane{3};
3825 def _H : NeonI_STN_Lane<r, 0b01, op0,
3826 !cast<RegisterOperand>(List # "H_operand"),
3827 neon_uimm3_bare, asmop> {
3828 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3829 let Inst{30} = lane{2};
3832 def _S : NeonI_STN_Lane<r, 0b10, op0,
3833 !cast<RegisterOperand>(List # "S_operand"),
3834 neon_uimm2_bare, asmop> {
3835 let Inst{12-10} = {lane{0}, 0b0, 0b0};
3836 let Inst{30} = lane{1};
3839 def _D : NeonI_STN_Lane<r, 0b10, op0,
3840 !cast<RegisterOperand>(List # "D_operand"),
3841 neon_uimm1_bare, asmop>{
3842 let Inst{12-10} = 0b001;
3843 let Inst{30} = lane{0};
3847 // Store single 1-element structure from one lane of 1 register.
3848 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
3850 // Store single N-element structure from one lane of N consecutive registers
3852 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
3853 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
3854 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
3856 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3857 Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
3859 def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
3862 (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
3865 def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
3867 (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
3870 // Match all ST1LN instructions
3871 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3872 truncstorei8, ST1LN_B>;
3874 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3875 truncstorei16, ST1LN_H>;
3877 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3879 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3882 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3884 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3887 // End of vector load/store single N-element structure (class SIMD lsone).
3890 // The following are post-index load/store single N-element instructions
3891 // (class SIMD lsone-post)
3893 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3894 RegisterOperand VecList, Operand ImmTy,
3896 let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
3897 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3898 def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3899 (outs VecList:$Rt, GPR64xsp:$wb),
3900 (ins GPR64xsp:$Rn, ImmTy:$amt),
3901 asmop # "\t$Rt, [$Rn], $amt",
3907 def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3908 (outs VecList:$Rt, GPR64xsp:$wb),
3909 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3910 asmop # "\t$Rt, [$Rn], $Rm",
3916 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
3917 Operand uimm_b, Operand uimm_h,
3918 Operand uimm_s, Operand uimm_d> {
3919 defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
3920 !cast<RegisterOperand>(List # "8B_operand"),
3923 defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
3924 !cast<RegisterOperand>(List # "4H_operand"),
3927 defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
3928 !cast<RegisterOperand>(List # "2S_operand"),
3931 defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
3932 !cast<RegisterOperand>(List # "1D_operand"),
3935 defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
3936 !cast<RegisterOperand>(List # "16B_operand"),
3939 defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
3940 !cast<RegisterOperand>(List # "8H_operand"),
3943 defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
3944 !cast<RegisterOperand>(List # "4S_operand"),
3947 defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
3948 !cast<RegisterOperand>(List # "2D_operand"),
3952 // Post-index load single 1-element structure to all lanes of 1 register
3953 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
3954 uimm_exact2, uimm_exact4, uimm_exact8>;
3956 // Post-index load single N-element structure to all lanes of N consecutive
3957 // registers (N = 2,3,4)
3958 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
3959 uimm_exact4, uimm_exact8, uimm_exact16>;
3960 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
3961 uimm_exact6, uimm_exact12, uimm_exact24>;
3962 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
3963 uimm_exact8, uimm_exact16, uimm_exact32>;
3965 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
3966 Constraints = "$Rn = $wb, $Rt = $src",
3967 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3968 class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3969 Operand ImmTy, Operand ImmOp, string asmop>
3970 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3971 (outs VList:$Rt, GPR64xsp:$wb),
3972 (ins GPR64xsp:$Rn, ImmTy:$amt,
3973 VList:$src, ImmOp:$lane),
3974 asmop # "\t$Rt[$lane], [$Rn], $amt",
3980 class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3981 Operand ImmTy, Operand ImmOp, string asmop>
3982 : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3983 (outs VList:$Rt, GPR64xsp:$wb),
3984 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
3985 VList:$src, ImmOp:$lane),
3986 asmop # "\t$Rt[$lane], [$Rn], $Rm",
3991 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
3992 Operand uimm_b, Operand uimm_h,
3993 Operand uimm_s, Operand uimm_d> {
3994 def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
3995 !cast<RegisterOperand>(List # "B_operand"),
3996 uimm_b, neon_uimm4_bare, asmop> {
3997 let Inst{12-10} = lane{2-0};
3998 let Inst{30} = lane{3};
4001 def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
4002 !cast<RegisterOperand>(List # "B_operand"),
4003 uimm_b, neon_uimm4_bare, asmop> {
4004 let Inst{12-10} = lane{2-0};
4005 let Inst{30} = lane{3};
4008 def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4009 !cast<RegisterOperand>(List # "H_operand"),
4010 uimm_h, neon_uimm3_bare, asmop> {
4011 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4012 let Inst{30} = lane{2};
4015 def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4016 !cast<RegisterOperand>(List # "H_operand"),
4017 uimm_h, neon_uimm3_bare, asmop> {
4018 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4019 let Inst{30} = lane{2};
4022 def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4023 !cast<RegisterOperand>(List # "S_operand"),
4024 uimm_s, neon_uimm2_bare, asmop> {
4025 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4026 let Inst{30} = lane{1};
4029 def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4030 !cast<RegisterOperand>(List # "S_operand"),
4031 uimm_s, neon_uimm2_bare, asmop> {
4032 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4033 let Inst{30} = lane{1};
4036 def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4037 !cast<RegisterOperand>(List # "D_operand"),
4038 uimm_d, neon_uimm1_bare, asmop> {
4039 let Inst{12-10} = 0b001;
4040 let Inst{30} = lane{0};
4043 def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4044 !cast<RegisterOperand>(List # "D_operand"),
4045 uimm_d, neon_uimm1_bare, asmop> {
4046 let Inst{12-10} = 0b001;
4047 let Inst{30} = lane{0};
4051 // Post-index load single 1-element structure to one lane of 1 register.
4052 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4053 uimm_exact2, uimm_exact4, uimm_exact8>;
4055 // Post-index load single N-element structure to one lane of N consecutive
4058 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4059 uimm_exact4, uimm_exact8, uimm_exact16>;
4060 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4061 uimm_exact6, uimm_exact12, uimm_exact24>;
4062 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4063 uimm_exact8, uimm_exact16, uimm_exact32>;
4065 let mayStore = 1, neverHasSideEffects = 1,
4066 hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4067 DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4068 class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4069 Operand ImmTy, Operand ImmOp, string asmop>
4070 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4071 (outs GPR64xsp:$wb),
4072 (ins GPR64xsp:$Rn, ImmTy:$amt,
4073 VList:$Rt, ImmOp:$lane),
4074 asmop # "\t$Rt[$lane], [$Rn], $amt",
4080 class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4081 Operand ImmTy, Operand ImmOp, string asmop>
4082 : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4083 (outs GPR64xsp:$wb),
4084 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4086 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4091 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4092 Operand uimm_b, Operand uimm_h,
4093 Operand uimm_s, Operand uimm_d> {
4094 def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4095 !cast<RegisterOperand>(List # "B_operand"),
4096 uimm_b, neon_uimm4_bare, asmop> {
4097 let Inst{12-10} = lane{2-0};
4098 let Inst{30} = lane{3};
4101 def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4102 !cast<RegisterOperand>(List # "B_operand"),
4103 uimm_b, neon_uimm4_bare, asmop> {
4104 let Inst{12-10} = lane{2-0};
4105 let Inst{30} = lane{3};
4108 def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4109 !cast<RegisterOperand>(List # "H_operand"),
4110 uimm_h, neon_uimm3_bare, asmop> {
4111 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4112 let Inst{30} = lane{2};
4115 def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4116 !cast<RegisterOperand>(List # "H_operand"),
4117 uimm_h, neon_uimm3_bare, asmop> {
4118 let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4119 let Inst{30} = lane{2};
4122 def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4123 !cast<RegisterOperand>(List # "S_operand"),
4124 uimm_s, neon_uimm2_bare, asmop> {
4125 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4126 let Inst{30} = lane{1};
4129 def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4130 !cast<RegisterOperand>(List # "S_operand"),
4131 uimm_s, neon_uimm2_bare, asmop> {
4132 let Inst{12-10} = {lane{0}, 0b0, 0b0};
4133 let Inst{30} = lane{1};
4136 def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4137 !cast<RegisterOperand>(List # "D_operand"),
4138 uimm_d, neon_uimm1_bare, asmop> {
4139 let Inst{12-10} = 0b001;
4140 let Inst{30} = lane{0};
4143 def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4144 !cast<RegisterOperand>(List # "D_operand"),
4145 uimm_d, neon_uimm1_bare, asmop> {
4146 let Inst{12-10} = 0b001;
4147 let Inst{30} = lane{0};
4151 // Post-index store single 1-element structure from one lane of 1 register.
4152 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4153 uimm_exact2, uimm_exact4, uimm_exact8>;
4155 // Post-index store single N-element structure from one lane of N consecutive
4156 // registers (N = 2,3,4)
4157 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4158 uimm_exact4, uimm_exact8, uimm_exact16>;
4159 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4160 uimm_exact6, uimm_exact12, uimm_exact24>;
4161 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4162 uimm_exact8, uimm_exact16, uimm_exact32>;
4164 // End of post-index load/store single N-element instructions
4165 // (class SIMD lsone-post)
4167 // Neon Scalar instructions implementation
4168 // Scalar Three Same
4170 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4172 : NeonI_Scalar3Same<u, size, opcode,
4173 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4174 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4178 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4179 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4181 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4182 bit Commutable = 0> {
4183 let isCommutable = Commutable in {
4184 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4185 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4189 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4190 string asmop, bit Commutable = 0> {
4191 let isCommutable = Commutable in {
4192 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4193 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4197 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4198 string asmop, bit Commutable = 0> {
4199 let isCommutable = Commutable in {
4200 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4201 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4202 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4203 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4207 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4208 Instruction INSTD> {
4209 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4210 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4213 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4218 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4219 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4220 (INSTB FPR8:$Rn, FPR8:$Rm)>;
4221 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4222 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4223 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4224 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4227 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4229 Instruction INSTS> {
4230 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4231 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4232 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4233 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4236 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4237 ValueType SResTy, ValueType STy,
4238 Instruction INSTS, ValueType DResTy,
4239 ValueType DTy, Instruction INSTD> {
4240 def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4241 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4242 def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4243 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4246 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4248 : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4249 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4251 // Scalar Three Different
4253 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4254 RegisterClass FPRCD, RegisterClass FPRCS>
4255 : NeonI_Scalar3Diff<u, size, opcode,
4256 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4257 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4261 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4262 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4263 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4266 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4267 let Constraints = "$Src = $Rd" in {
4268 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4269 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4270 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4273 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4274 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4275 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4281 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4283 Instruction INSTS> {
4284 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4285 (INSTH FPR16:$Rn, FPR16:$Rm)>;
4286 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4287 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4290 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4292 Instruction INSTS> {
4293 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4294 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4295 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4296 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4299 // Scalar Two Registers Miscellaneous
4301 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4302 RegisterClass FPRCD, RegisterClass FPRCS>
4303 : NeonI_Scalar2SameMisc<u, size, opcode,
4304 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4305 !strconcat(asmop, "\t$Rd, $Rn"),
4309 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4311 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4313 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4317 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4318 def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4321 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4322 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4323 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4324 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4325 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4328 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4329 : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4331 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4333 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4334 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4335 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4338 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4339 string asmop, RegisterClass FPRC>
4340 : NeonI_Scalar2SameMisc<u, size, opcode,
4341 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4342 !strconcat(asmop, "\t$Rd, $Rn"),
4346 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4349 let Constraints = "$Src = $Rd" in {
4350 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4351 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4352 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4353 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4357 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4359 : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4362 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4364 Instruction INSTD> {
4365 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4367 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4371 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4373 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4376 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4378 Instruction INSTD> {
4379 def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4381 def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4385 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4387 Instruction INSTD> {
4388 def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4390 def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4394 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4396 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4399 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4400 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4401 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4402 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4406 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4408 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4409 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
4410 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4413 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4414 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
4415 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4420 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4422 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4423 (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4424 (INSTD FPR64:$Rn, 0)>;
4426 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4428 : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4429 (i32 neon_uimm0:$Imm), CC)),
4430 (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4432 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4435 Instruction INSTD> {
4436 def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpz32:$FPImm))),
4437 (INSTS FPR32:$Rn, fpz32:$FPImm)>;
4438 def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpz32:$FPImm))),
4439 (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4440 def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), CC)),
4441 (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4444 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4445 Instruction INSTD> {
4446 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4450 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4455 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4456 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4458 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4460 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4464 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4465 SDPatternOperator opnode,
4468 Instruction INSTD> {
4469 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4471 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4473 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4478 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4479 SDPatternOperator opnode,
4483 Instruction INSTD> {
4484 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4485 (INSTB FPR8:$Src, FPR8:$Rn)>;
4486 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4487 (INSTH FPR16:$Src, FPR16:$Rn)>;
4488 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4489 (INSTS FPR32:$Src, FPR32:$Rn)>;
4490 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4491 (INSTD FPR64:$Src, FPR64:$Rn)>;
4494 // Scalar Shift By Immediate
4496 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4497 RegisterClass FPRC, Operand ImmTy>
4498 : NeonI_ScalarShiftImm<u, opcode,
4499 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4500 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4503 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4505 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4507 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4508 let Inst{21-16} = Imm;
4512 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4514 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4515 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4517 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4518 let Inst{18-16} = Imm;
4520 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4522 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4523 let Inst{19-16} = Imm;
4525 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4527 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4528 let Inst{20-16} = Imm;
4532 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4534 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4536 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4537 let Inst{21-16} = Imm;
4541 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4543 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4544 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4546 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4547 let Inst{18-16} = Imm;
4549 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4551 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4552 let Inst{19-16} = Imm;
4554 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4556 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4557 let Inst{20-16} = Imm;
4561 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4562 : NeonI_ScalarShiftImm<u, opcode,
4564 (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4565 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4568 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4569 let Inst{21-16} = Imm;
4570 let Constraints = "$Src = $Rd";
4573 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4574 : NeonI_ScalarShiftImm<u, opcode,
4576 (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4577 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4580 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4581 let Inst{21-16} = Imm;
4582 let Constraints = "$Src = $Rd";
4585 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4586 RegisterClass FPRCD, RegisterClass FPRCS,
4588 : NeonI_ScalarShiftImm<u, opcode,
4589 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4590 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4593 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4595 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4598 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4599 let Inst{18-16} = Imm;
4601 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4604 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4605 let Inst{19-16} = Imm;
4607 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4610 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4611 let Inst{20-16} = Imm;
4615 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4616 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4618 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4619 let Inst{20-16} = Imm;
4621 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4623 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4624 let Inst{21-16} = Imm;
4628 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4629 Instruction INSTD> {
4630 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4631 (INSTD FPR64:$Rn, imm:$Imm)>;
4634 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4635 Instruction INSTD> {
4636 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4637 (INSTD FPR64:$Rn, imm:$Imm)>;
4640 class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
4642 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4643 (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
4644 (INSTD FPR64:$Rn, imm:$Imm)>;
4646 class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
4648 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4649 (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4650 (INSTD FPR64:$Rn, imm:$Imm)>;
4652 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4657 : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4658 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4659 (INSTB FPR8:$Rn, imm:$Imm)>;
4660 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4661 (INSTH FPR16:$Rn, imm:$Imm)>;
4662 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4663 (INSTS FPR32:$Rn, imm:$Imm)>;
4666 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4668 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4669 (i32 shl_imm64:$Imm))),
4670 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4672 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4674 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4675 (i32 shr_imm64:$Imm))),
4676 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4678 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4679 SDPatternOperator opnode,
4682 Instruction INSTD> {
4683 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4684 (INSTH FPR16:$Rn, imm:$Imm)>;
4685 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4686 (INSTS FPR32:$Rn, imm:$Imm)>;
4687 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4688 (INSTD FPR64:$Rn, imm:$Imm)>;
4691 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4693 Instruction INSTD> {
4694 def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4695 (INSTS FPR32:$Rn, imm:$Imm)>;
4696 def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4697 (INSTD FPR64:$Rn, imm:$Imm)>;
4700 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4702 Instruction INSTD> {
4703 def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4704 (INSTS FPR32:$Rn, imm:$Imm)>;
4705 def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4706 (INSTD FPR64:$Rn, imm:$Imm)>;
4709 // Scalar Signed Shift Right (Immediate)
4710 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4711 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4712 // Pattern to match llvm.arm.* intrinsic.
4713 def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
4715 // Scalar Unsigned Shift Right (Immediate)
4716 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4717 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4718 // Pattern to match llvm.arm.* intrinsic.
4719 def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
4721 // Scalar Signed Rounding Shift Right (Immediate)
4722 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4723 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4725 // Scalar Unigned Rounding Shift Right (Immediate)
4726 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4727 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4729 // Scalar Signed Shift Right and Accumulate (Immediate)
4730 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4731 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4732 <int_aarch64_neon_vsrads_n, SSRA>;
4734 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4735 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4736 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4737 <int_aarch64_neon_vsradu_n, USRA>;
4739 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4740 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4741 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4742 <int_aarch64_neon_vrsrads_n, SRSRA>;
4744 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4745 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4746 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4747 <int_aarch64_neon_vrsradu_n, URSRA>;
4749 // Scalar Shift Left (Immediate)
4750 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4751 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4752 // Pattern to match llvm.arm.* intrinsic.
4753 def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
4755 // Signed Saturating Shift Left (Immediate)
4756 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4757 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4759 SQSHLssi, SQSHLddi>;
4760 // Pattern to match llvm.arm.* intrinsic.
4761 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4763 // Unsigned Saturating Shift Left (Immediate)
4764 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4765 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4767 UQSHLssi, UQSHLddi>;
4768 // Pattern to match llvm.arm.* intrinsic.
4769 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4771 // Signed Saturating Shift Left Unsigned (Immediate)
4772 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4773 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4774 SQSHLUbbi, SQSHLUhhi,
4775 SQSHLUssi, SQSHLUddi>;
4777 // Shift Right And Insert (Immediate)
4778 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4779 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4780 <int_aarch64_neon_vsri, SRI>;
4782 // Shift Left And Insert (Immediate)
4783 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4784 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4785 <int_aarch64_neon_vsli, SLI>;
4787 // Signed Saturating Shift Right Narrow (Immediate)
4788 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4789 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4790 SQSHRNbhi, SQSHRNhsi,
4793 // Unsigned Saturating Shift Right Narrow (Immediate)
4794 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4795 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4796 UQSHRNbhi, UQSHRNhsi,
4799 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4800 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4801 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4802 SQRSHRNbhi, SQRSHRNhsi,
4805 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4806 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4807 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4808 UQRSHRNbhi, UQRSHRNhsi,
4811 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4812 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4813 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4814 SQSHRUNbhi, SQSHRUNhsi,
4817 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4818 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4819 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4820 SQRSHRUNbhi, SQRSHRUNhsi,
4823 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4824 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4825 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
4826 SCVTF_Nssi, SCVTF_Nddi>;
4828 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4829 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4830 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
4831 UCVTF_Nssi, UCVTF_Nddi>;
4833 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4834 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4835 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
4836 FCVTZS_Nssi, FCVTZS_Nddi>;
4838 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4839 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4840 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
4841 FCVTZU_Nssi, FCVTZU_Nddi>;
4843 // Patterns For Convert Instructions Between v1f64 and v1i64
4844 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
4846 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4847 (INST FPR64:$Rn, imm:$Imm)>;
4849 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
4851 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4852 (INST FPR64:$Rn, imm:$Imm)>;
4854 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
4857 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
4860 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
4863 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
4866 // Scalar Integer Add
4867 let isCommutable = 1 in {
4868 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4871 // Scalar Integer Sub
4872 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4874 // Pattern for Scalar Integer Add and Sub with D register only
4875 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4876 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4878 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4879 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4880 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4881 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4882 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4884 // Scalar Integer Saturating Add (Signed, Unsigned)
4885 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4886 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4888 // Scalar Integer Saturating Sub (Signed, Unsigned)
4889 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4890 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4893 // Patterns to match llvm.aarch64.* intrinsic for
4894 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
4895 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
4896 SQADDhhh, SQADDsss, SQADDddd>;
4897 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
4898 UQADDhhh, UQADDsss, UQADDddd>;
4899 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
4900 SQSUBhhh, SQSUBsss, SQSUBddd>;
4901 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
4902 UQSUBhhh, UQSUBsss, UQSUBddd>;
4904 // Scalar Integer Saturating Doubling Multiply Half High
4905 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4907 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4908 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4910 // Patterns to match llvm.arm.* intrinsic for
4911 // Scalar Integer Saturating Doubling Multiply Half High and
4912 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4913 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4915 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4918 // Scalar Floating-point Multiply Extended
4919 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4921 // Scalar Floating-point Reciprocal Step
4922 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4923 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
4924 FRECPSsss, f64, f64, FRECPSddd>;
4925 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4926 (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
4928 // Scalar Floating-point Reciprocal Square Root Step
4929 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4930 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
4931 FRSQRTSsss, f64, f64, FRSQRTSddd>;
4932 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4933 (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
4934 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
4936 // Patterns to match llvm.aarch64.* intrinsic for
4937 // Scalar Floating-point Multiply Extended,
4938 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
4940 Instruction INSTD> {
4941 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4942 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4943 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4944 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4947 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
4948 FMULXsss, FMULXddd>;
4949 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4950 (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
4952 // Scalar Integer Shift Left (Signed, Unsigned)
4953 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4954 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4956 // Patterns to match llvm.arm.* intrinsic for
4957 // Scalar Integer Shift Left (Signed, Unsigned)
4958 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4959 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4961 // Patterns to match llvm.aarch64.* intrinsic for
4962 // Scalar Integer Shift Left (Signed, Unsigned)
4963 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4964 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4966 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4967 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4968 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4970 // Patterns to match llvm.aarch64.* intrinsic for
4971 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4972 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4973 SQSHLhhh, SQSHLsss, SQSHLddd>;
4974 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4975 UQSHLhhh, UQSHLsss, UQSHLddd>;
4977 // Patterns to match llvm.arm.* intrinsic for
4978 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4979 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4980 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4982 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4983 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4984 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4986 // Patterns to match llvm.aarch64.* intrinsic for
4987 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4988 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4989 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4991 // Patterns to match llvm.arm.* intrinsic for
4992 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4993 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4994 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4996 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4997 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4998 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
5000 // Patterns to match llvm.aarch64.* intrinsic for
5001 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5002 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
5003 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
5004 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
5005 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
5007 // Patterns to match llvm.arm.* intrinsic for
5008 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5009 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5010 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5012 // Signed Saturating Doubling Multiply-Add Long
5013 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5014 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5015 SQDMLALshh, SQDMLALdss>;
5017 // Signed Saturating Doubling Multiply-Subtract Long
5018 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5019 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5020 SQDMLSLshh, SQDMLSLdss>;
5022 // Signed Saturating Doubling Multiply Long
5023 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5024 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5025 SQDMULLshh, SQDMULLdss>;
5027 // Scalar Signed Integer Convert To Floating-point
5028 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5029 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5032 // Scalar Unsigned Integer Convert To Floating-point
5033 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5034 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5037 // Scalar Floating-point Converts
5038 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5039 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5042 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5043 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5044 FCVTNSss, FCVTNSdd>;
5045 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5047 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5048 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5049 FCVTNUss, FCVTNUdd>;
5050 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5052 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5053 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5054 FCVTMSss, FCVTMSdd>;
5055 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5057 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5058 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5059 FCVTMUss, FCVTMUdd>;
5060 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5062 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5063 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5064 FCVTASss, FCVTASdd>;
5065 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5067 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5068 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5069 FCVTAUss, FCVTAUdd>;
5070 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5072 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5073 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5074 FCVTPSss, FCVTPSdd>;
5075 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5077 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5078 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5079 FCVTPUss, FCVTPUdd>;
5080 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5082 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5083 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5084 FCVTZSss, FCVTZSdd>;
5085 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5088 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5089 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5090 FCVTZUss, FCVTZUdd>;
5091 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5094 // Patterns For Convert Instructions Between v1f64 and v1i64
5095 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5097 : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5099 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5101 : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5103 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5104 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5106 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5107 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5109 // Scalar Floating-point Reciprocal Estimate
5110 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5111 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5112 FRECPEss, FRECPEdd>;
5113 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5116 // Scalar Floating-point Reciprocal Exponent
5117 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5118 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5119 FRECPXss, FRECPXdd>;
5121 // Scalar Floating-point Reciprocal Square Root Estimate
5122 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5123 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5124 FRSQRTEss, FRSQRTEdd>;
5125 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5128 // Scalar Floating-point Round
5129 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5130 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5132 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5133 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5134 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5135 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5136 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5137 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5138 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5140 // Scalar Integer Compare
5142 // Scalar Compare Bitwise Equal
5143 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5144 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5146 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5149 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5150 (INSTD FPR64:$Rn, FPR64:$Rm)>;
5152 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5154 // Scalar Compare Signed Greather Than Or Equal
5155 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5156 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5157 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5159 // Scalar Compare Unsigned Higher Or Same
5160 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5161 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5162 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5164 // Scalar Compare Unsigned Higher
5165 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5166 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5167 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5169 // Scalar Compare Signed Greater Than
5170 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5171 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5172 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5174 // Scalar Compare Bitwise Test Bits
5175 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5176 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5177 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5179 // Scalar Compare Bitwise Equal To Zero
5180 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5181 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5183 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5185 // Scalar Compare Signed Greather Than Or Equal To Zero
5186 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5187 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5189 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5191 // Scalar Compare Signed Greater Than Zero
5192 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5193 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5195 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5197 // Scalar Compare Signed Less Than Or Equal To Zero
5198 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5199 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5201 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5203 // Scalar Compare Less Than Zero
5204 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5205 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5207 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5209 // Scalar Floating-point Compare
5211 // Scalar Floating-point Compare Mask Equal
5212 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5213 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5214 FCMEQsss, v1i64, f64, FCMEQddd>;
5215 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5217 // Scalar Floating-point Compare Mask Equal To Zero
5218 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5219 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5220 FCMEQZssi, FCMEQZddi>;
5222 // Scalar Floating-point Compare Mask Greater Than Or Equal
5223 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5224 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5225 FCMGEsss, v1i64, f64, FCMGEddd>;
5226 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5228 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5229 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5230 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5231 FCMGEZssi, FCMGEZddi>;
5233 // Scalar Floating-point Compare Mask Greather Than
5234 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5235 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5236 FCMGTsss, v1i64, f64, FCMGTddd>;
5237 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5239 // Scalar Floating-point Compare Mask Greather Than Zero
5240 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5241 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5242 FCMGTZssi, FCMGTZddi>;
5244 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5245 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5246 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5247 FCMLEZssi, FCMLEZddi>;
5249 // Scalar Floating-point Compare Mask Less Than Zero
5250 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5251 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5252 FCMLTZssi, FCMLTZddi>;
5254 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5255 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5256 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5257 FACGEsss, v1i64, f64, FACGEddd>;
5258 def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5259 (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5261 // Scalar Floating-point Absolute Compare Mask Greater Than
5262 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5263 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5264 FACGTsss, v1i64, f64, FACGTddd>;
5265 def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5266 (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5268 // Scalar Floating-point Absolute Difference
5269 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5270 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5271 FABDsss, f64, f64, FABDddd>;
5273 // Scalar Absolute Value
5274 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5275 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5277 // Scalar Signed Saturating Absolute Value
5278 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5279 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5280 SQABSbb, SQABShh, SQABSss, SQABSdd>;
5283 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5284 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5286 // Scalar Signed Saturating Negate
5287 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5288 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5289 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5291 // Scalar Signed Saturating Accumulated of Unsigned Value
5292 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5293 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5295 SUQADDss, SUQADDdd>;
5297 // Scalar Unsigned Saturating Accumulated of Signed Value
5298 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5299 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5301 USQADDss, USQADDdd>;
5303 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5304 (v1i64 FPR64:$Rn))),
5305 (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5307 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5308 (v1i64 FPR64:$Rn))),
5309 (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5311 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5314 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5315 (SQABSdd FPR64:$Rn)>;
5317 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5318 (SQNEGdd FPR64:$Rn)>;
5320 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5321 (v1i64 FPR64:$Rn))),
5324 // Scalar Signed Saturating Extract Unsigned Narrow
5325 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5326 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5330 // Scalar Signed Saturating Extract Narrow
5331 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5332 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5336 // Scalar Unsigned Saturating Extract Narrow
5337 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5338 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5342 // Scalar Reduce Pairwise
5344 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5345 string asmop, bit Commutable = 0> {
5346 let isCommutable = Commutable in {
5347 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5348 (outs FPR64:$Rd), (ins VPR128:$Rn),
5349 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5355 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5356 string asmop, bit Commutable = 0>
5357 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5358 let isCommutable = Commutable in {
5359 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5360 (outs FPR32:$Rd), (ins VPR64:$Rn),
5361 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5367 // Scalar Reduce Addition Pairwise (Integer) with
5368 // Pattern to match llvm.arm.* intrinsic
5369 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5371 // Pattern to match llvm.aarch64.* intrinsic for
5372 // Scalar Reduce Addition Pairwise (Integer)
5373 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5374 (ADDPvv_D_2D VPR128:$Rn)>;
5375 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5376 (ADDPvv_D_2D VPR128:$Rn)>;
5378 // Scalar Reduce Addition Pairwise (Floating Point)
5379 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5381 // Scalar Reduce Maximum Pairwise (Floating Point)
5382 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5384 // Scalar Reduce Minimum Pairwise (Floating Point)
5385 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5387 // Scalar Reduce maxNum Pairwise (Floating Point)
5388 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5390 // Scalar Reduce minNum Pairwise (Floating Point)
5391 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5393 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5395 Instruction INSTD> {
5396 def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5398 def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5399 (INSTD VPR128:$Rn)>;
5402 // Patterns to match llvm.aarch64.* intrinsic for
5403 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5404 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5405 FADDPvv_S_2S, FADDPvv_D_2D>;
5407 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5408 FMAXPvv_S_2S, FMAXPvv_D_2D>;
5410 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5411 FMINPvv_S_2S, FMINPvv_D_2D>;
5413 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5414 FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5416 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5417 FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5419 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5420 (FADDPvv_S_2S (v2f32
5422 (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5425 // Scalar by element Arithmetic
5427 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5428 string rmlane, bit u, bit szhi, bit szlo,
5429 RegisterClass ResFPR, RegisterClass OpFPR,
5430 RegisterOperand OpVPR, Operand OpImm>
5431 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5433 (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5434 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5441 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5443 bit u, bit szhi, bit szlo,
5444 RegisterClass ResFPR,
5445 RegisterClass OpFPR,
5446 RegisterOperand OpVPR,
5448 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5450 (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5451 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5454 let Constraints = "$src = $Rd";
5459 // Scalar Floating Point multiply (scalar, by element)
5460 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5461 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5462 let Inst{11} = Imm{1}; // h
5463 let Inst{21} = Imm{0}; // l
5464 let Inst{20-16} = MRm;
5466 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5467 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5468 let Inst{11} = Imm{0}; // h
5469 let Inst{21} = 0b0; // l
5470 let Inst{20-16} = MRm;
5473 // Scalar Floating Point multiply extended (scalar, by element)
5474 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5475 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5476 let Inst{11} = Imm{1}; // h
5477 let Inst{21} = Imm{0}; // l
5478 let Inst{20-16} = MRm;
5480 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5481 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5482 let Inst{11} = Imm{0}; // h
5483 let Inst{21} = 0b0; // l
5484 let Inst{20-16} = MRm;
5487 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5488 SDPatternOperator opnode,
5490 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5491 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5493 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5494 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5495 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5497 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5498 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5499 (ResTy (INST (ResTy FPRC:$Rn),
5500 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5504 def : Pat<(ResTy (opnode
5505 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5507 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5509 def : Pat<(ResTy (opnode
5510 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5512 (ResTy (INST (ResTy FPRC:$Rn),
5513 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5517 // Patterns for Scalar Floating Point multiply (scalar, by element)
5518 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5519 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5520 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5521 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5523 // Patterns for Scalar Floating Point multiply extended (scalar, by element)
5524 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5525 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5526 v2f32, v4f32, neon_uimm1_bare>;
5527 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5528 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5529 v1f64, v2f64, neon_uimm0_bare>;
5531 // Scalar Floating Point fused multiply-add (scalar, by element)
5532 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5533 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5534 let Inst{11} = Imm{1}; // h
5535 let Inst{21} = Imm{0}; // l
5536 let Inst{20-16} = MRm;
5538 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5539 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5540 let Inst{11} = Imm{0}; // h
5541 let Inst{21} = 0b0; // l
5542 let Inst{20-16} = MRm;
5545 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5546 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5547 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5548 let Inst{11} = Imm{1}; // h
5549 let Inst{21} = Imm{0}; // l
5550 let Inst{20-16} = MRm;
5552 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5553 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5554 let Inst{11} = Imm{0}; // h
5555 let Inst{21} = 0b0; // l
5556 let Inst{20-16} = MRm;
5558 // We are allowed to match the fma instruction regardless of compile options.
5559 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5560 Instruction FMLAI, Instruction FMLSI,
5561 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5562 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5564 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5565 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5567 (ResTy (FMLAI (ResTy FPRC:$Ra),
5568 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5570 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5571 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5573 (ResTy (FMLAI (ResTy FPRC:$Ra),
5575 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5578 // swapped fmla operands
5579 def : Pat<(ResTy (fma
5580 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5583 (ResTy (FMLAI (ResTy FPRC:$Ra),
5584 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5586 def : Pat<(ResTy (fma
5587 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5590 (ResTy (FMLAI (ResTy FPRC:$Ra),
5592 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5596 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5597 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5599 (ResTy (FMLSI (ResTy FPRC:$Ra),
5600 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5602 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5603 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5605 (ResTy (FMLSI (ResTy FPRC:$Ra),
5607 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5610 // swapped fmls operands
5611 def : Pat<(ResTy (fma
5612 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5615 (ResTy (FMLSI (ResTy FPRC:$Ra),
5616 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5618 def : Pat<(ResTy (fma
5619 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5622 (ResTy (FMLSI (ResTy FPRC:$Ra),
5624 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5628 // Scalar Floating Point fused multiply-add and
5629 // multiply-subtract (scalar, by element)
5630 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5631 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5632 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5633 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5634 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5635 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5637 // Scalar Signed saturating doubling multiply long (scalar, by element)
5638 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5639 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5640 let Inst{11} = 0b0; // h
5641 let Inst{21} = Imm{1}; // l
5642 let Inst{20} = Imm{0}; // m
5643 let Inst{19-16} = MRm{3-0};
5645 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5646 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5647 let Inst{11} = Imm{2}; // h
5648 let Inst{21} = Imm{1}; // l
5649 let Inst{20} = Imm{0}; // m
5650 let Inst{19-16} = MRm{3-0};
5652 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5653 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5654 let Inst{11} = 0b0; // h
5655 let Inst{21} = Imm{0}; // l
5656 let Inst{20-16} = MRm;
5658 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5659 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5660 let Inst{11} = Imm{1}; // h
5661 let Inst{21} = Imm{0}; // l
5662 let Inst{20-16} = MRm;
5665 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5666 SDPatternOperator opnode,
5668 ValueType ResTy, RegisterClass FPRC,
5669 ValueType OpVTy, ValueType OpTy,
5670 ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5672 def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5673 (OpVTy (scalar_to_vector
5674 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5675 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5678 def : Pat<(ResTy (opnode
5679 (OpVTy (scalar_to_vector
5680 (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5682 (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5686 // Patterns for Scalar Signed saturating doubling
5687 // multiply long (scalar, by element)
5688 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5689 SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5690 i32, VPR64Lo, neon_uimm2_bare>;
5691 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5692 SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5693 i32, VPR128Lo, neon_uimm3_bare>;
5694 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5695 SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5696 i32, VPR64Lo, neon_uimm1_bare>;
5697 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5698 SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5699 i32, VPR128Lo, neon_uimm2_bare>;
5701 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5702 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5703 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5704 let Inst{11} = 0b0; // h
5705 let Inst{21} = Imm{1}; // l
5706 let Inst{20} = Imm{0}; // m
5707 let Inst{19-16} = MRm{3-0};
5709 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5710 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5711 let Inst{11} = Imm{2}; // h
5712 let Inst{21} = Imm{1}; // l
5713 let Inst{20} = Imm{0}; // m
5714 let Inst{19-16} = MRm{3-0};
5716 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5717 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5718 let Inst{11} = 0b0; // h
5719 let Inst{21} = Imm{0}; // l
5720 let Inst{20-16} = MRm;
5722 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5723 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5724 let Inst{11} = Imm{1}; // h
5725 let Inst{21} = Imm{0}; // l
5726 let Inst{20-16} = MRm;
5729 // Scalar Signed saturating doubling
5730 // multiply-subtract long (scalar, by element)
5731 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5732 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5733 let Inst{11} = 0b0; // h
5734 let Inst{21} = Imm{1}; // l
5735 let Inst{20} = Imm{0}; // m
5736 let Inst{19-16} = MRm{3-0};
5738 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5739 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5740 let Inst{11} = Imm{2}; // h
5741 let Inst{21} = Imm{1}; // l
5742 let Inst{20} = Imm{0}; // m
5743 let Inst{19-16} = MRm{3-0};
5745 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5746 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5747 let Inst{11} = 0b0; // h
5748 let Inst{21} = Imm{0}; // l
5749 let Inst{20-16} = MRm;
5751 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5752 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5753 let Inst{11} = Imm{1}; // h
5754 let Inst{21} = Imm{0}; // l
5755 let Inst{20-16} = MRm;
5758 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5759 SDPatternOperator opnode,
5760 SDPatternOperator coreopnode,
5762 ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5764 ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5766 def : Pat<(ResTy (opnode
5767 (ResTy ResFPRC:$Ra),
5768 (ResTy (coreopnode (OpTy FPRC:$Rn),
5769 (OpTy (scalar_to_vector
5770 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5771 (ResTy (INST (ResTy ResFPRC:$Ra),
5772 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5775 def : Pat<(ResTy (opnode
5776 (ResTy ResFPRC:$Ra),
5778 (OpTy (scalar_to_vector
5779 (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5780 (OpTy FPRC:$Rn))))),
5781 (ResTy (INST (ResTy ResFPRC:$Ra),
5782 (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5785 // Patterns for Scalar Signed saturating
5786 // doubling multiply-add long (scalar, by element)
5787 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5788 int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5789 i32, VPR64Lo, neon_uimm2_bare>;
5790 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5791 int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5792 i32, VPR128Lo, neon_uimm3_bare>;
5793 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5794 int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5795 i32, VPR64Lo, neon_uimm1_bare>;
5796 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5797 int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5798 i32, VPR128Lo, neon_uimm2_bare>;
5800 // Patterns for Scalar Signed saturating
5801 // doubling multiply-sub long (scalar, by element)
5802 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5803 int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5804 i32, VPR64Lo, neon_uimm2_bare>;
5805 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5806 int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5807 i32, VPR128Lo, neon_uimm3_bare>;
5808 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5809 int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5810 i32, VPR64Lo, neon_uimm1_bare>;
5811 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5812 int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5813 i32, VPR128Lo, neon_uimm2_bare>;
5815 // Scalar Signed saturating doubling multiply returning
5816 // high half (scalar, by element)
5817 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5818 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5819 let Inst{11} = 0b0; // h
5820 let Inst{21} = Imm{1}; // l
5821 let Inst{20} = Imm{0}; // m
5822 let Inst{19-16} = MRm{3-0};
5824 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5825 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5826 let Inst{11} = Imm{2}; // h
5827 let Inst{21} = Imm{1}; // l
5828 let Inst{20} = Imm{0}; // m
5829 let Inst{19-16} = MRm{3-0};
5831 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5832 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5833 let Inst{11} = 0b0; // h
5834 let Inst{21} = Imm{0}; // l
5835 let Inst{20-16} = MRm;
5837 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5838 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5839 let Inst{11} = Imm{1}; // h
5840 let Inst{21} = Imm{0}; // l
5841 let Inst{20-16} = MRm;
5844 // Patterns for Scalar Signed saturating doubling multiply returning
5845 // high half (scalar, by element)
5846 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5847 SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
5848 i32, VPR64Lo, neon_uimm2_bare>;
5849 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5850 SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
5851 i32, VPR128Lo, neon_uimm3_bare>;
5852 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5853 SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
5854 i32, VPR64Lo, neon_uimm1_bare>;
5855 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5856 SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
5857 i32, VPR128Lo, neon_uimm2_bare>;
5859 // Scalar Signed saturating rounding doubling multiply
5860 // returning high half (scalar, by element)
5861 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5862 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5863 let Inst{11} = 0b0; // h
5864 let Inst{21} = Imm{1}; // l
5865 let Inst{20} = Imm{0}; // m
5866 let Inst{19-16} = MRm{3-0};
5868 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5869 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5870 let Inst{11} = Imm{2}; // h
5871 let Inst{21} = Imm{1}; // l
5872 let Inst{20} = Imm{0}; // m
5873 let Inst{19-16} = MRm{3-0};
5875 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5876 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5877 let Inst{11} = 0b0; // h
5878 let Inst{21} = Imm{0}; // l
5879 let Inst{20-16} = MRm;
5881 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5882 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5883 let Inst{11} = Imm{1}; // h
5884 let Inst{21} = Imm{0}; // l
5885 let Inst{20-16} = MRm;
5888 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5889 SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
5890 VPR64Lo, neon_uimm2_bare>;
5891 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5892 SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
5893 VPR128Lo, neon_uimm3_bare>;
5894 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5895 SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
5896 VPR64Lo, neon_uimm1_bare>;
5897 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5898 SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
5899 VPR128Lo, neon_uimm2_bare>;
5901 // Scalar general arithmetic operation
5902 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
5904 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5906 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
5908 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5909 (INST FPR64:$Rn, FPR64:$Rm)>;
5911 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
5913 : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
5914 (v1f64 FPR64:$Ra))),
5915 (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5917 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
5918 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
5919 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
5920 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
5921 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
5922 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
5923 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
5924 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
5925 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
5927 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
5928 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
5930 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
5931 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
5933 // Scalar Copy - DUP element to scalar
5934 class NeonI_Scalar_DUP<string asmop, string asmlane,
5935 RegisterClass ResRC, RegisterOperand VPRC,
5937 : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
5938 asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
5944 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
5945 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5947 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
5948 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5950 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
5951 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5953 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
5954 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5957 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)),
5958 (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>;
5959 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)),
5960 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>;
5961 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)),
5962 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>;
5963 def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)),
5964 (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>;
5966 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)),
5967 (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>;
5968 def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)),
5969 (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>;
5971 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)),
5972 (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>;
5973 def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)),
5974 (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5977 def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)),
5978 (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>;
5980 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
5981 ValueType ResTy, ValueType OpTy,Operand OpLImm,
5982 ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
5984 def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
5985 (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
5987 def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
5989 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5993 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
5994 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
5995 v8i8, v16i8, neon_uimm3_bare>;
5996 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
5997 v4i16, v8i16, neon_uimm2_bare>;
5998 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
5999 v2i32, v4i32, neon_uimm1_bare>;
6001 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
6002 ValueType OpTy, ValueType ElemTy,
6003 Operand OpImm, ValueType OpNTy,
6004 ValueType ExTy, Operand OpNImm> {
6006 def : Pat<(ResTy (vector_insert (ResTy undef),
6007 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
6008 (neon_uimm0_bare:$Imm))),
6009 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6011 def : Pat<(ResTy (vector_insert (ResTy undef),
6012 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6015 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6019 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6020 ValueType OpTy, ValueType ElemTy,
6021 Operand OpImm, ValueType OpNTy,
6022 ValueType ExTy, Operand OpNImm> {
6024 def : Pat<(ResTy (scalar_to_vector
6025 (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6026 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6028 def : Pat<(ResTy (scalar_to_vector
6029 (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6031 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6035 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6037 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6038 v1i64, v2i64, i64, neon_uimm1_bare,
6039 v1i64, v2i64, neon_uimm0_bare>;
6040 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6041 v1i32, v4i32, i32, neon_uimm2_bare,
6042 v2i32, v4i32, neon_uimm1_bare>;
6043 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6044 v1i16, v8i16, i32, neon_uimm3_bare,
6045 v4i16, v8i16, neon_uimm2_bare>;
6046 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6047 v1i8, v16i8, i32, neon_uimm4_bare,
6048 v8i8, v16i8, neon_uimm3_bare>;
6049 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6050 v1i64, v2i64, i64, neon_uimm1_bare,
6051 v1i64, v2i64, neon_uimm0_bare>;
6052 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6053 v1i32, v4i32, i32, neon_uimm2_bare,
6054 v2i32, v4i32, neon_uimm1_bare>;
6055 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6056 v1i16, v8i16, i32, neon_uimm3_bare,
6057 v4i16, v8i16, neon_uimm2_bare>;
6058 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6059 v1i8, v16i8, i32, neon_uimm4_bare,
6060 v8i8, v16i8, neon_uimm3_bare>;
6062 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6063 Instruction DUPI, Operand OpImm,
6064 RegisterClass ResRC> {
6065 def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6066 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6069 // Aliases for Scalar copy - DUP element (scalar)
6070 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6071 // custom printing of aliases.
6072 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6073 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6074 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6075 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6077 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6079 def : Pat<(ResTy (GetLow VPR128:$Rn)),
6080 (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6081 def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6082 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6085 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6086 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6087 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6088 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6089 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6090 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6092 //===----------------------------------------------------------------------===//
6093 // Non-Instruction Patterns
6094 //===----------------------------------------------------------------------===//
6096 // 64-bit vector bitcasts...
6098 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
6099 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
6100 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
6101 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
6103 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
6104 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
6105 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
6106 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
6108 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
6109 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
6110 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
6111 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
6113 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
6114 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
6115 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
6116 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
6118 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
6119 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6120 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6121 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6123 // ..and 128-bit vector bitcasts...
6125 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
6126 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
6127 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
6128 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
6129 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
6131 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
6132 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
6133 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
6134 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
6135 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
6137 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
6138 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
6139 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
6140 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
6141 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
6143 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
6144 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
6145 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
6146 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
6147 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
6149 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
6150 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
6151 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
6152 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
6153 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
6155 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
6156 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
6157 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
6158 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
6159 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
6161 // ...and scalar bitcasts...
6162 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
6163 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
6164 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
6165 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
6167 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
6168 def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
6169 def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
6170 def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
6171 def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
6172 def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
6174 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
6176 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
6177 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
6178 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
6180 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
6181 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
6182 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
6183 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
6184 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
6186 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
6187 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
6188 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
6189 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
6190 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
6191 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
6193 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
6194 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
6195 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6196 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
6198 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6199 def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6200 def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6201 def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6202 def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6203 def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
6205 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
6207 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
6208 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
6209 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
6210 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
6211 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
6213 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
6214 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
6215 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
6216 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
6217 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
6218 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
6220 // Scalar Three Same
6222 def neon_uimm3 : Operand<i64>,
6223 ImmLeaf<i64, [{return Imm < 8;}]> {
6224 let ParserMatchClass = uimm3_asmoperand;
6225 let PrintMethod = "printUImmHexOperand";
6228 def neon_uimm4 : Operand<i64>,
6229 ImmLeaf<i64, [{return Imm < 16;}]> {
6230 let ParserMatchClass = uimm4_asmoperand;
6231 let PrintMethod = "printUImmHexOperand";
6235 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6236 string OpS, RegisterOperand OpVPR, Operand OpImm>
6237 : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6238 (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6239 asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6240 ", $Rm." # OpS # ", $Index",
6246 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6247 VPR64, neon_uimm3> {
6248 let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6251 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6252 VPR128, neon_uimm4> {
6253 let Inst{14-11} = Index;
6256 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6258 : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6260 (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6262 def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
6263 def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
6264 def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
6265 def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
6266 def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
6267 def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
6268 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6269 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6270 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6271 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6272 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6273 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6276 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6277 string asmop, string OpS, RegisterOperand OpVPR,
6278 RegisterOperand VecList>
6279 : NeonI_TBL<q, op2, len, op,
6280 (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6281 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6285 // The vectors in look up table are always 16b
6286 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6287 def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6288 !cast<RegisterOperand>(List # "16B_operand")>;
6290 def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6291 !cast<RegisterOperand>(List # "16B_operand")>;
6294 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6295 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6296 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6297 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6299 // Table lookup extention
6300 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6301 string asmop, string OpS, RegisterOperand OpVPR,
6302 RegisterOperand VecList>
6303 : NeonI_TBL<q, op2, len, op,
6304 (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6305 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6308 let Constraints = "$src = $Rd";
6311 // The vectors in look up table are always 16b
6312 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6313 def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6314 !cast<RegisterOperand>(List # "16B_operand")>;
6316 def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6317 !cast<RegisterOperand>(List # "16B_operand")>;
6320 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6321 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6322 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6323 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6325 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6326 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6327 : NeonI_copy<0b1, 0b0, 0b0011,
6328 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6329 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6330 [(set (ResTy VPR128:$Rd),
6331 (ResTy (vector_insert
6332 (ResTy VPR128:$src),
6337 let Constraints = "$src = $Rd";
6340 //Insert element (vector, from main)
6341 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6343 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6345 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6347 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6349 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6351 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6353 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6355 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6358 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6359 (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6360 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6361 (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6362 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6363 (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6364 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6365 (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6367 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6368 RegisterClass OpGPR, ValueType OpTy,
6369 Operand OpImm, Instruction INS>
6370 : Pat<(ResTy (vector_insert
6374 (ResTy (EXTRACT_SUBREG
6375 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6376 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6378 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6379 neon_uimm3_bare, INSbw>;
6380 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6381 neon_uimm2_bare, INShw>;
6382 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6383 neon_uimm1_bare, INSsw>;
6384 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6385 neon_uimm0_bare, INSdx>;
6387 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6388 : NeonI_insert<0b1, 0b1,
6389 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6390 ResImm:$Immd, ResImm:$Immn),
6391 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6394 let Constraints = "$src = $Rd";
6399 //Insert element (vector, from element)
6400 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6401 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6402 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6404 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6405 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6406 let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6407 // bit 11 is unspecified, but should be set to zero.
6409 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6410 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6411 let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6412 // bits 11-12 are unspecified, but should be set to zero.
6414 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6415 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6416 let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6417 // bits 11-13 are unspecified, but should be set to zero.
6420 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6421 (INSELb VPR128:$Rd, VPR128:$Rn,
6422 neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6423 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6424 (INSELh VPR128:$Rd, VPR128:$Rn,
6425 neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6426 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6427 (INSELs VPR128:$Rd, VPR128:$Rn,
6428 neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6429 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6430 (INSELd VPR128:$Rd, VPR128:$Rn,
6431 neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6433 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6434 ValueType MidTy, Operand StImm, Operand NaImm,
6436 def : Pat<(ResTy (vector_insert
6437 (ResTy VPR128:$src),
6438 (MidTy (vector_extract
6442 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6443 StImm:$Immd, StImm:$Immn)>;
6445 def : Pat <(ResTy (vector_insert
6446 (ResTy VPR128:$src),
6447 (MidTy (vector_extract
6451 (INS (ResTy VPR128:$src),
6452 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6453 StImm:$Immd, NaImm:$Immn)>;
6455 def : Pat <(NaTy (vector_insert
6457 (MidTy (vector_extract
6461 (NaTy (EXTRACT_SUBREG
6463 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6465 NaImm:$Immd, StImm:$Immn)),
6468 def : Pat <(NaTy (vector_insert
6470 (MidTy (vector_extract
6474 (NaTy (EXTRACT_SUBREG
6476 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6477 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6478 NaImm:$Immd, NaImm:$Immn)),
6482 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6483 neon_uimm1_bare, INSELs>;
6484 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6485 neon_uimm0_bare, INSELd>;
6486 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6487 neon_uimm3_bare, INSELb>;
6488 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6489 neon_uimm2_bare, INSELh>;
6490 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6491 neon_uimm1_bare, INSELs>;
6492 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6493 neon_uimm0_bare, INSELd>;
6495 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6497 RegisterClass OpFPR, Operand ResImm,
6498 SubRegIndex SubIndex, Instruction INS> {
6499 def : Pat <(ResTy (vector_insert
6500 (ResTy VPR128:$src),
6503 (INS (ResTy VPR128:$src),
6504 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6508 def : Pat <(NaTy (vector_insert
6512 (NaTy (EXTRACT_SUBREG
6514 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6515 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6521 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6523 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6526 class NeonI_SMOV<string asmop, string Res, bit Q,
6527 ValueType OpTy, ValueType eleTy,
6528 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6529 : NeonI_copy<Q, 0b0, 0b0101,
6530 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6531 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6532 [(set (ResTy ResGPR:$Rd),
6534 (ResTy (vector_extract
6535 (OpTy VPR128:$Rn), (OpImm:$Imm))),
6541 //Signed integer move (main, from element)
6542 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6544 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6546 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6548 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6550 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6552 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6554 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6556 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6558 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6560 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6563 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6564 ValueType eleTy, Operand StImm, Operand NaImm,
6565 Instruction SMOVI> {
6566 def : Pat<(i64 (sext_inreg
6568 (i32 (vector_extract
6569 (StTy VPR128:$Rn), (StImm:$Imm))))),
6571 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6573 def : Pat<(i64 (sext
6574 (i32 (vector_extract
6575 (StTy VPR128:$Rn), (StImm:$Imm))))),
6576 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6578 def : Pat<(i64 (sext_inreg
6579 (i64 (vector_extract
6580 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6582 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6585 def : Pat<(i64 (sext_inreg
6587 (i32 (vector_extract
6588 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6590 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6593 def : Pat<(i64 (sext
6594 (i32 (vector_extract
6595 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6596 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6600 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6601 neon_uimm3_bare, SMOVxb>;
6602 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6603 neon_uimm2_bare, SMOVxh>;
6604 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6605 neon_uimm1_bare, SMOVxs>;
6607 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6608 ValueType eleTy, Operand StImm, Operand NaImm,
6610 : Pat<(i32 (sext_inreg
6611 (i32 (vector_extract
6612 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6614 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6617 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6618 neon_uimm3_bare, SMOVwb>;
6619 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6620 neon_uimm2_bare, SMOVwh>;
6622 class NeonI_UMOV<string asmop, string Res, bit Q,
6623 ValueType OpTy, Operand OpImm,
6624 RegisterClass ResGPR, ValueType ResTy>
6625 : NeonI_copy<Q, 0b0, 0b0111,
6626 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6627 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6628 [(set (ResTy ResGPR:$Rd),
6629 (ResTy (vector_extract
6630 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6635 //Unsigned integer move (main, from element)
6636 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6638 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6640 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6642 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6644 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6646 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6648 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6650 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6653 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6654 (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6655 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6656 (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6658 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6659 Operand StImm, Operand NaImm,
6661 : Pat<(ResTy (vector_extract
6662 (NaTy VPR64:$Rn), NaImm:$Imm)),
6663 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6666 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6667 neon_uimm3_bare, UMOVwb>;
6668 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6669 neon_uimm2_bare, UMOVwh>;
6670 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6671 neon_uimm1_bare, UMOVws>;
6674 (i32 (vector_extract
6675 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6677 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6680 (i32 (vector_extract
6681 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6683 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6685 def : Pat<(i64 (zext
6686 (i32 (vector_extract
6687 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6688 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6691 (i32 (vector_extract
6692 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6694 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6695 neon_uimm3_bare:$Imm)>;
6698 (i32 (vector_extract
6699 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6701 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6702 neon_uimm2_bare:$Imm)>;
6704 def : Pat<(i64 (zext
6705 (i32 (vector_extract
6706 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6707 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6708 neon_uimm0_bare:$Imm)>;
6710 // Additional copy patterns for scalar types
6711 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6713 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6715 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6717 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6719 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6720 (FMOVws FPR32:$Rn)>;
6722 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6723 (FMOVxd FPR64:$Rn)>;
6725 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6728 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6729 (v1i8 (EXTRACT_SUBREG (v16i8
6730 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6733 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6734 (v1i16 (EXTRACT_SUBREG (v8i16
6735 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6738 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6741 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6744 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6745 (v8i8 (EXTRACT_SUBREG (v16i8
6746 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6749 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6750 (v4i16 (EXTRACT_SUBREG (v8i16
6751 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6754 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6755 (v2i32 (EXTRACT_SUBREG (v16i8
6756 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6759 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6760 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6762 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6763 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6765 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
6766 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6768 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
6769 (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6771 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6772 (v2i32 (EXTRACT_SUBREG (v16i8
6773 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6776 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6777 (v2i32 (EXTRACT_SUBREG (v16i8
6778 (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6781 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6784 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6785 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6786 (f64 FPR64:$src), sub_64)>;
6788 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
6789 RegisterOperand ResVPR, Operand OpImm>
6790 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6791 (ins VPR128:$Rn, OpImm:$Imm),
6792 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6798 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6800 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6803 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6805 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6808 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6810 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6813 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6815 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6818 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6820 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6823 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6825 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6828 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6830 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6833 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6834 ValueType OpTy,ValueType NaTy,
6835 ValueType ExTy, Operand OpLImm,
6837 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6838 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6840 def : Pat<(ResTy (Neon_vduplane
6841 (NaTy VPR64:$Rn), OpNImm:$Imm)),
6843 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6845 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6846 neon_uimm4_bare, neon_uimm3_bare>;
6847 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6848 neon_uimm4_bare, neon_uimm3_bare>;
6849 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6850 neon_uimm3_bare, neon_uimm2_bare>;
6851 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6852 neon_uimm3_bare, neon_uimm2_bare>;
6853 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6854 neon_uimm2_bare, neon_uimm1_bare>;
6855 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6856 neon_uimm2_bare, neon_uimm1_bare>;
6857 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6858 neon_uimm1_bare, neon_uimm0_bare>;
6859 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6860 neon_uimm2_bare, neon_uimm1_bare>;
6861 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6862 neon_uimm2_bare, neon_uimm1_bare>;
6863 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6864 neon_uimm1_bare, neon_uimm0_bare>;
6866 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6868 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6870 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6872 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6874 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6876 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6879 class NeonI_DUP<bit Q, string asmop, string rdlane,
6880 RegisterOperand ResVPR, ValueType ResTy,
6881 RegisterClass OpGPR, ValueType OpTy>
6882 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6883 asmop # "\t$Rd" # rdlane # ", $Rn",
6884 [(set (ResTy ResVPR:$Rd),
6885 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6888 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6889 let Inst{20-16} = 0b00001;
6890 // bits 17-20 are unspecified, but should be set to zero.
6893 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6894 let Inst{20-16} = 0b00010;
6895 // bits 18-20 are unspecified, but should be set to zero.
6898 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6899 let Inst{20-16} = 0b00100;
6900 // bits 19-20 are unspecified, but should be set to zero.
6903 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6904 let Inst{20-16} = 0b01000;
6905 // bit 20 is unspecified, but should be set to zero.
6908 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6909 let Inst{20-16} = 0b00001;
6910 // bits 17-20 are unspecified, but should be set to zero.
6913 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6914 let Inst{20-16} = 0b00010;
6915 // bits 18-20 are unspecified, but should be set to zero.
6918 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6919 let Inst{20-16} = 0b00100;
6920 // bits 19-20 are unspecified, but should be set to zero.
6923 // patterns for CONCAT_VECTORS
6924 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6925 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6926 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6927 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6929 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6930 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6933 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6935 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6939 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6940 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6941 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6942 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6943 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6944 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6946 //patterns for EXTRACT_SUBVECTOR
6947 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6948 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6949 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6950 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6951 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6952 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6953 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6954 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6955 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6956 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6957 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6958 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6960 // The followings are for instruction class (3V Elem)
6964 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
6965 string asmop, string ResS, string OpS, string EleOpS,
6966 Operand OpImm, RegisterOperand ResVPR,
6967 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
6968 : NeonI_2VElem<q, u, size, opcode,
6969 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
6970 EleOpVPR:$Re, OpImm:$Index),
6971 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
6972 ", $Re." # EleOpS # "[$Index]",
6978 let Constraints = "$src = $Rd";
6981 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
6982 // vector register class for element is always 128-bit to cover the max index
6983 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
6984 neon_uimm2_bare, VPR64, VPR64, VPR128> {
6985 let Inst{11} = {Index{1}};
6986 let Inst{21} = {Index{0}};
6987 let Inst{20-16} = Re;
6990 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
6991 neon_uimm2_bare, VPR128, VPR128, VPR128> {
6992 let Inst{11} = {Index{1}};
6993 let Inst{21} = {Index{0}};
6994 let Inst{20-16} = Re;
6997 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
6998 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
6999 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7000 let Inst{11} = {Index{2}};
7001 let Inst{21} = {Index{1}};
7002 let Inst{20} = {Index{0}};
7003 let Inst{19-16} = Re{3-0};
7006 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7007 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7008 let Inst{11} = {Index{2}};
7009 let Inst{21} = {Index{1}};
7010 let Inst{20} = {Index{0}};
7011 let Inst{19-16} = Re{3-0};
7015 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7016 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7018 // Pattern for lane in 128-bit vector
7019 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7020 RegisterOperand ResVPR, RegisterOperand OpVPR,
7021 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7023 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7024 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7025 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7027 // Pattern for lane in 64-bit vector
7028 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7029 RegisterOperand ResVPR, RegisterOperand OpVPR,
7030 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7032 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7033 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7034 (INST ResVPR:$src, OpVPR:$Rn,
7035 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7037 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7039 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7040 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7042 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7043 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7045 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7046 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7048 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7049 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7051 // Index can only be half of the max value for lane in 64-bit vector
7053 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7054 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7056 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7057 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7060 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7061 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7063 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7064 string asmop, string ResS, string OpS, string EleOpS,
7065 Operand OpImm, RegisterOperand ResVPR,
7066 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7067 : NeonI_2VElem<q, u, size, opcode,
7068 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7069 EleOpVPR:$Re, OpImm:$Index),
7070 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7071 ", $Re." # EleOpS # "[$Index]",
7078 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7079 // vector register class for element is always 128-bit to cover the max index
7080 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7081 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7082 let Inst{11} = {Index{1}};
7083 let Inst{21} = {Index{0}};
7084 let Inst{20-16} = Re;
7087 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7088 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7089 let Inst{11} = {Index{1}};
7090 let Inst{21} = {Index{0}};
7091 let Inst{20-16} = Re;
7094 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7095 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7096 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7097 let Inst{11} = {Index{2}};
7098 let Inst{21} = {Index{1}};
7099 let Inst{20} = {Index{0}};
7100 let Inst{19-16} = Re{3-0};
7103 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7104 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7105 let Inst{11} = {Index{2}};
7106 let Inst{21} = {Index{1}};
7107 let Inst{20} = {Index{0}};
7108 let Inst{19-16} = Re{3-0};
7112 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7113 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7114 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7116 // Pattern for lane in 128-bit vector
7117 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7118 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7119 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7120 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7121 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7122 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7124 // Pattern for lane in 64-bit vector
7125 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7126 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7127 ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7128 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7129 (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7131 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7133 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7134 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7135 op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7137 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7138 op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7140 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7141 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7143 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7144 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7146 // Index can only be half of the max value for lane in 64-bit vector
7148 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7149 op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7151 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7152 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7155 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7156 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7157 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7161 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7162 // vector register class for element is always 128-bit to cover the max index
7163 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7164 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7165 let Inst{11} = {Index{1}};
7166 let Inst{21} = {Index{0}};
7167 let Inst{20-16} = Re;
7170 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7171 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7172 let Inst{11} = {Index{1}};
7173 let Inst{21} = {Index{0}};
7174 let Inst{20-16} = Re;
7177 // _1d2d doesn't exist!
7179 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7180 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7181 let Inst{11} = {Index{0}};
7183 let Inst{20-16} = Re;
7187 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7188 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7190 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7191 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7192 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7193 SDPatternOperator coreop>
7194 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7195 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7197 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7199 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7200 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7201 op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7203 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7204 op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7206 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7207 op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7209 // Index can only be half of the max value for lane in 64-bit vector
7211 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7212 op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7214 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7215 op, VPR128, VPR64, v2f64, v2f64, v1f64,
7216 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7219 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7220 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7222 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7223 (v2f32 VPR64:$Rn))),
7224 (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7226 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7227 (v4f32 VPR128:$Rn))),
7228 (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7230 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7231 (v2f64 VPR128:$Rn))),
7232 (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7234 // The followings are patterns using fma
7235 // -ffp-contract=fast generates fma
7237 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7238 // vector register class for element is always 128-bit to cover the max index
7239 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7240 neon_uimm2_bare, VPR64, VPR64, VPR128> {
7241 let Inst{11} = {Index{1}};
7242 let Inst{21} = {Index{0}};
7243 let Inst{20-16} = Re;
7246 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7247 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7248 let Inst{11} = {Index{1}};
7249 let Inst{21} = {Index{0}};
7250 let Inst{20-16} = Re;
7253 // _1d2d doesn't exist!
7255 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7256 neon_uimm1_bare, VPR128, VPR128, VPR128> {
7257 let Inst{11} = {Index{0}};
7259 let Inst{20-16} = Re;
7263 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7264 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7266 // Pattern for lane in 128-bit vector
7267 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7268 RegisterOperand ResVPR, RegisterOperand OpVPR,
7269 ValueType ResTy, ValueType OpTy,
7270 SDPatternOperator coreop>
7271 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7272 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7273 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7275 // Pattern for lane 0
7276 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7277 RegisterOperand ResVPR, ValueType ResTy>
7278 : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7279 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7280 (ResTy ResVPR:$src))),
7281 (INST ResVPR:$src, ResVPR:$Rn,
7282 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7284 // Pattern for lane in 64-bit vector
7285 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7286 RegisterOperand ResVPR, RegisterOperand OpVPR,
7287 ValueType ResTy, ValueType OpTy,
7288 SDPatternOperator coreop>
7289 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7290 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7291 (INST ResVPR:$src, ResVPR:$Rn,
7292 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7294 // Pattern for lane in 64-bit vector
7295 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7296 SDPatternOperator op,
7297 RegisterOperand ResVPR, RegisterOperand OpVPR,
7298 ValueType ResTy, ValueType OpTy,
7299 SDPatternOperator coreop>
7300 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7301 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7302 (INST ResVPR:$src, ResVPR:$Rn,
7303 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7306 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7307 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7308 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7309 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7311 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7314 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7315 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7316 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7318 def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7321 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7322 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7323 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7325 // Index can only be half of the max value for lane in 64-bit vector
7327 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7328 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7329 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7331 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7332 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7333 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7336 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7338 // Pattern for lane 0
7339 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7340 RegisterOperand ResVPR, ValueType ResTy>
7341 : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7342 (ResTy (Neon_vdup (f32 FPR32:$Re))),
7343 (ResTy ResVPR:$src))),
7344 (INST ResVPR:$src, ResVPR:$Rn,
7345 (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7347 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7349 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7350 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7351 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7353 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7354 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7355 BinOpFrag<(Neon_vduplane
7356 (fneg node:$LHS), node:$RHS)>>;
7358 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7361 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7362 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7363 BinOpFrag<(fneg (Neon_vduplane
7364 node:$LHS, node:$RHS))>>;
7366 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7367 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7368 BinOpFrag<(Neon_vduplane
7369 (fneg node:$LHS), node:$RHS)>>;
7371 def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7374 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7375 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7376 BinOpFrag<(fneg (Neon_vduplane
7377 node:$LHS, node:$RHS))>>;
7379 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7380 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7381 BinOpFrag<(Neon_vduplane
7382 (fneg node:$LHS), node:$RHS)>>;
7384 // Index can only be half of the max value for lane in 64-bit vector
7386 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7387 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7388 BinOpFrag<(fneg (Neon_vduplane
7389 node:$LHS, node:$RHS))>>;
7391 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7392 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7393 BinOpFrag<(Neon_vduplane
7394 (fneg node:$LHS), node:$RHS)>>;
7396 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7397 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7398 BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7400 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7401 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7402 BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7404 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7405 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7406 BinOpFrag<(fneg (Neon_combine_2d
7407 node:$LHS, node:$RHS))>>;
7409 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7410 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7411 BinOpFrag<(Neon_combine_2d
7412 (fneg node:$LHS), (fneg node:$RHS))>>;
7415 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7417 // Variant 3: Long type
7418 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7419 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7421 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7422 // vector register class for element is always 128-bit to cover the max index
7423 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7424 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7425 let Inst{11} = {Index{1}};
7426 let Inst{21} = {Index{0}};
7427 let Inst{20-16} = Re;
7430 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7431 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7432 let Inst{11} = {Index{1}};
7433 let Inst{21} = {Index{0}};
7434 let Inst{20-16} = Re;
7437 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7438 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7439 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7440 let Inst{11} = {Index{2}};
7441 let Inst{21} = {Index{1}};
7442 let Inst{20} = {Index{0}};
7443 let Inst{19-16} = Re{3-0};
7446 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7447 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7448 let Inst{11} = {Index{2}};
7449 let Inst{21} = {Index{1}};
7450 let Inst{20} = {Index{0}};
7451 let Inst{19-16} = Re{3-0};
7455 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7456 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7457 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7458 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7459 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7460 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7462 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7463 // vector register class for element is always 128-bit to cover the max index
7464 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7465 neon_uimm2_bare, VPR128, VPR64, VPR128> {
7466 let Inst{11} = {Index{1}};
7467 let Inst{21} = {Index{0}};
7468 let Inst{20-16} = Re;
7471 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7472 neon_uimm2_bare, VPR128, VPR128, VPR128> {
7473 let Inst{11} = {Index{1}};
7474 let Inst{21} = {Index{0}};
7475 let Inst{20-16} = Re;
7478 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7479 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7480 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7481 let Inst{11} = {Index{2}};
7482 let Inst{21} = {Index{1}};
7483 let Inst{20} = {Index{0}};
7484 let Inst{19-16} = Re{3-0};
7487 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7488 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7489 let Inst{11} = {Index{2}};
7490 let Inst{21} = {Index{1}};
7491 let Inst{20} = {Index{0}};
7492 let Inst{19-16} = Re{3-0};
7496 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7497 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7498 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7500 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7503 // Pattern for lane in 128-bit vector
7504 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7505 RegisterOperand EleOpVPR, ValueType ResTy,
7506 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7507 SDPatternOperator hiop>
7508 : Pat<(ResTy (op (ResTy VPR128:$src),
7509 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7510 (HalfOpTy (Neon_vduplane
7511 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7512 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7514 // Pattern for lane in 64-bit vector
7515 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7516 RegisterOperand EleOpVPR, ValueType ResTy,
7517 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7518 SDPatternOperator hiop>
7519 : Pat<(ResTy (op (ResTy VPR128:$src),
7520 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7521 (HalfOpTy (Neon_vduplane
7522 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7523 (INST VPR128:$src, VPR128:$Rn,
7524 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7526 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7527 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7528 SDPatternOperator hiop, Instruction DupInst>
7529 : Pat<(ResTy (op (ResTy VPR128:$src),
7530 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7531 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7532 (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7534 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7535 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7536 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7538 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7539 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7541 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7542 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7544 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7545 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7547 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7548 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7550 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7551 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7553 // Index can only be half of the max value for lane in 64-bit vector
7555 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7556 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7558 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7559 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7561 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7562 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7564 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7565 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7568 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7569 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7570 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7571 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7573 // Pattern for lane in 128-bit vector
7574 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7575 RegisterOperand EleOpVPR, ValueType ResTy,
7576 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7577 SDPatternOperator hiop>
7579 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7580 (HalfOpTy (Neon_vduplane
7581 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7582 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7584 // Pattern for lane in 64-bit vector
7585 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7586 RegisterOperand EleOpVPR, ValueType ResTy,
7587 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7588 SDPatternOperator hiop>
7590 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7591 (HalfOpTy (Neon_vduplane
7592 (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7594 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7596 // Pattern for fixed lane 0
7597 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7598 ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7599 SDPatternOperator hiop, Instruction DupInst>
7601 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7602 (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7603 (INST VPR128:$Rn, (DupInst $Re), 0)>;
7605 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7606 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7607 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7609 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7610 op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7612 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7613 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7615 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7616 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7618 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7619 op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7621 def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7622 op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7624 // Index can only be half of the max value for lane in 64-bit vector
7626 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7627 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7629 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7630 op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7632 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7633 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7635 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7636 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7639 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7640 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7641 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7643 multiclass NI_qdma<SDPatternOperator op> {
7644 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7646 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7648 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7650 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7653 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7654 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7656 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7657 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7658 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7659 v4i32, v4i16, v8i16>;
7661 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7662 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7663 v2i64, v2i32, v4i32>;
7665 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7666 !cast<PatFrag>(op # "_4s"), VPR128Lo,
7667 v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7669 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7670 !cast<PatFrag>(op # "_2d"), VPR128,
7671 v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7673 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7674 !cast<PatFrag>(op # "_4s"),
7675 v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7677 def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7678 !cast<PatFrag>(op # "_2d"),
7679 v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7681 // Index can only be half of the max value for lane in 64-bit vector
7683 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7684 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
7685 v4i32, v4i16, v4i16>;
7687 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7688 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
7689 v2i64, v2i32, v2i32>;
7691 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7692 !cast<PatFrag>(op # "_4s"), VPR64Lo,
7693 v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7695 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7696 !cast<PatFrag>(op # "_2d"), VPR64,
7697 v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7700 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
7701 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
7703 // End of implementation for instruction class (3V Elem)
7705 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
7706 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
7707 SDPatternOperator Neon_Rev>
7708 : NeonI_2VMisc<Q, U, size, opcode,
7709 (outs ResVPR:$Rd), (ins ResVPR:$Rn),
7710 asmop # "\t$Rd." # Res # ", $Rn." # Res,
7711 [(set (ResTy ResVPR:$Rd),
7712 (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
7715 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
7717 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
7719 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
7721 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
7723 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
7725 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
7728 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
7729 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
7731 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
7733 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
7735 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
7737 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
7740 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
7742 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
7745 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
7746 SDPatternOperator Neon_Padd> {
7747 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7748 (outs VPR128:$Rd), (ins VPR128:$Rn),
7749 asmop # "\t$Rd.8h, $Rn.16b",
7750 [(set (v8i16 VPR128:$Rd),
7751 (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
7754 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7755 (outs VPR64:$Rd), (ins VPR64:$Rn),
7756 asmop # "\t$Rd.4h, $Rn.8b",
7757 [(set (v4i16 VPR64:$Rd),
7758 (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
7761 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7762 (outs VPR128:$Rd), (ins VPR128:$Rn),
7763 asmop # "\t$Rd.4s, $Rn.8h",
7764 [(set (v4i32 VPR128:$Rd),
7765 (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
7768 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7769 (outs VPR64:$Rd), (ins VPR64:$Rn),
7770 asmop # "\t$Rd.2s, $Rn.4h",
7771 [(set (v2i32 VPR64:$Rd),
7772 (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
7775 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7776 (outs VPR128:$Rd), (ins VPR128:$Rn),
7777 asmop # "\t$Rd.2d, $Rn.4s",
7778 [(set (v2i64 VPR128:$Rd),
7779 (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
7782 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7783 (outs VPR64:$Rd), (ins VPR64:$Rn),
7784 asmop # "\t$Rd.1d, $Rn.2s",
7785 [(set (v1i64 VPR64:$Rd),
7786 (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
7790 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
7791 int_arm_neon_vpaddls>;
7792 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
7793 int_arm_neon_vpaddlu>;
7795 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
7797 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
7800 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
7801 SDPatternOperator Neon_Padd> {
7802 let Constraints = "$src = $Rd" in {
7803 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7804 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7805 asmop # "\t$Rd.8h, $Rn.16b",
7806 [(set (v8i16 VPR128:$Rd),
7808 (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
7811 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7812 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7813 asmop # "\t$Rd.4h, $Rn.8b",
7814 [(set (v4i16 VPR64:$Rd),
7816 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
7819 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7820 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7821 asmop # "\t$Rd.4s, $Rn.8h",
7822 [(set (v4i32 VPR128:$Rd),
7824 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
7827 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7828 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7829 asmop # "\t$Rd.2s, $Rn.4h",
7830 [(set (v2i32 VPR64:$Rd),
7832 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
7835 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7836 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7837 asmop # "\t$Rd.2d, $Rn.4s",
7838 [(set (v2i64 VPR128:$Rd),
7840 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
7843 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7844 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7845 asmop # "\t$Rd.1d, $Rn.2s",
7846 [(set (v1i64 VPR64:$Rd),
7848 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
7853 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
7854 int_arm_neon_vpadals>;
7855 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
7856 int_arm_neon_vpadalu>;
7858 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
7859 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7860 (outs VPR128:$Rd), (ins VPR128:$Rn),
7861 asmop # "\t$Rd.16b, $Rn.16b",
7864 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7865 (outs VPR128:$Rd), (ins VPR128:$Rn),
7866 asmop # "\t$Rd.8h, $Rn.8h",
7869 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7870 (outs VPR128:$Rd), (ins VPR128:$Rn),
7871 asmop # "\t$Rd.4s, $Rn.4s",
7874 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7875 (outs VPR128:$Rd), (ins VPR128:$Rn),
7876 asmop # "\t$Rd.2d, $Rn.2d",
7879 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7880 (outs VPR64:$Rd), (ins VPR64:$Rn),
7881 asmop # "\t$Rd.8b, $Rn.8b",
7884 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7885 (outs VPR64:$Rd), (ins VPR64:$Rn),
7886 asmop # "\t$Rd.4h, $Rn.4h",
7889 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7890 (outs VPR64:$Rd), (ins VPR64:$Rn),
7891 asmop # "\t$Rd.2s, $Rn.2s",
7895 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
7896 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
7897 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
7898 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
7900 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
7901 SDPatternOperator Neon_Op> {
7902 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
7903 (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
7905 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
7906 (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
7908 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
7909 (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
7911 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
7912 (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
7914 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
7915 (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
7917 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
7918 (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
7920 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
7921 (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
7924 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
7925 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
7926 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
7928 def : Pat<(v16i8 (sub
7929 (v16i8 Neon_AllZero),
7930 (v16i8 VPR128:$Rn))),
7931 (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
7932 def : Pat<(v8i8 (sub
7933 (v8i8 Neon_AllZero),
7935 (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
7936 def : Pat<(v8i16 (sub
7937 (v8i16 (bitconvert (v16i8 Neon_AllZero))),
7938 (v8i16 VPR128:$Rn))),
7939 (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
7940 def : Pat<(v4i16 (sub
7941 (v4i16 (bitconvert (v8i8 Neon_AllZero))),
7942 (v4i16 VPR64:$Rn))),
7943 (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
7944 def : Pat<(v4i32 (sub
7945 (v4i32 (bitconvert (v16i8 Neon_AllZero))),
7946 (v4i32 VPR128:$Rn))),
7947 (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
7948 def : Pat<(v2i32 (sub
7949 (v2i32 (bitconvert (v8i8 Neon_AllZero))),
7950 (v2i32 VPR64:$Rn))),
7951 (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
7952 def : Pat<(v2i64 (sub
7953 (v2i64 (bitconvert (v16i8 Neon_AllZero))),
7954 (v2i64 VPR128:$Rn))),
7955 (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
7957 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
7958 let Constraints = "$src = $Rd" in {
7959 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7960 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7961 asmop # "\t$Rd.16b, $Rn.16b",
7964 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7965 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7966 asmop # "\t$Rd.8h, $Rn.8h",
7969 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7970 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7971 asmop # "\t$Rd.4s, $Rn.4s",
7974 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7975 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7976 asmop # "\t$Rd.2d, $Rn.2d",
7979 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7980 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7981 asmop # "\t$Rd.8b, $Rn.8b",
7984 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7985 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7986 asmop # "\t$Rd.4h, $Rn.4h",
7989 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7990 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7991 asmop # "\t$Rd.2s, $Rn.2s",
7996 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
7997 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
7999 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
8000 SDPatternOperator Neon_Op> {
8001 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
8002 (v16i8 (!cast<Instruction>(Prefix # 16b)
8003 (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
8005 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
8006 (v8i16 (!cast<Instruction>(Prefix # 8h)
8007 (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
8009 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
8010 (v4i32 (!cast<Instruction>(Prefix # 4s)
8011 (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
8013 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
8014 (v2i64 (!cast<Instruction>(Prefix # 2d)
8015 (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8017 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8018 (v8i8 (!cast<Instruction>(Prefix # 8b)
8019 (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8021 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8022 (v4i16 (!cast<Instruction>(Prefix # 4h)
8023 (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8025 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8026 (v2i32 (!cast<Instruction>(Prefix # 2s)
8027 (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8030 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8031 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8033 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8034 SDPatternOperator Neon_Op> {
8035 def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8036 (outs VPR128:$Rd), (ins VPR128:$Rn),
8037 asmop # "\t$Rd.16b, $Rn.16b",
8038 [(set (v16i8 VPR128:$Rd),
8039 (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8042 def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8043 (outs VPR128:$Rd), (ins VPR128:$Rn),
8044 asmop # "\t$Rd.8h, $Rn.8h",
8045 [(set (v8i16 VPR128:$Rd),
8046 (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8049 def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8050 (outs VPR128:$Rd), (ins VPR128:$Rn),
8051 asmop # "\t$Rd.4s, $Rn.4s",
8052 [(set (v4i32 VPR128:$Rd),
8053 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8056 def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8057 (outs VPR64:$Rd), (ins VPR64:$Rn),
8058 asmop # "\t$Rd.8b, $Rn.8b",
8059 [(set (v8i8 VPR64:$Rd),
8060 (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8063 def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8064 (outs VPR64:$Rd), (ins VPR64:$Rn),
8065 asmop # "\t$Rd.4h, $Rn.4h",
8066 [(set (v4i16 VPR64:$Rd),
8067 (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8070 def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8071 (outs VPR64:$Rd), (ins VPR64:$Rn),
8072 asmop # "\t$Rd.2s, $Rn.2s",
8073 [(set (v2i32 VPR64:$Rd),
8074 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8078 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8079 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8081 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8083 def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8084 (outs VPR128:$Rd), (ins VPR128:$Rn),
8085 asmop # "\t$Rd.16b, $Rn.16b",
8088 def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8089 (outs VPR64:$Rd), (ins VPR64:$Rn),
8090 asmop # "\t$Rd.8b, $Rn.8b",
8094 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8095 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8096 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8098 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8099 (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8100 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8101 (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8103 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8104 (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8105 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8106 (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8108 def : Pat<(v16i8 (xor
8110 (v16i8 Neon_AllOne))),
8111 (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8112 def : Pat<(v8i8 (xor
8114 (v8i8 Neon_AllOne))),
8115 (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8116 def : Pat<(v8i16 (xor
8118 (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8119 (NOT16b VPR128:$Rn)>;
8120 def : Pat<(v4i16 (xor
8122 (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8124 def : Pat<(v4i32 (xor
8126 (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8127 (NOT16b VPR128:$Rn)>;
8128 def : Pat<(v2i32 (xor
8130 (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8132 def : Pat<(v2i64 (xor
8134 (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8135 (NOT16b VPR128:$Rn)>;
8137 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8138 (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8139 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8140 (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8142 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8143 SDPatternOperator Neon_Op> {
8144 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8145 (outs VPR128:$Rd), (ins VPR128:$Rn),
8146 asmop # "\t$Rd.4s, $Rn.4s",
8147 [(set (v4f32 VPR128:$Rd),
8148 (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8151 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8152 (outs VPR128:$Rd), (ins VPR128:$Rn),
8153 asmop # "\t$Rd.2d, $Rn.2d",
8154 [(set (v2f64 VPR128:$Rd),
8155 (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8158 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8159 (outs VPR64:$Rd), (ins VPR64:$Rn),
8160 asmop # "\t$Rd.2s, $Rn.2s",
8161 [(set (v2f32 VPR64:$Rd),
8162 (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8166 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8167 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8169 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8170 def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8171 (outs VPR64:$Rd), (ins VPR128:$Rn),
8172 asmop # "\t$Rd.8b, $Rn.8h",
8175 def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8176 (outs VPR64:$Rd), (ins VPR128:$Rn),
8177 asmop # "\t$Rd.4h, $Rn.4s",
8180 def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8181 (outs VPR64:$Rd), (ins VPR128:$Rn),
8182 asmop # "\t$Rd.2s, $Rn.2d",
8185 let Constraints = "$Rd = $src" in {
8186 def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8187 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8188 asmop # "2\t$Rd.16b, $Rn.8h",
8191 def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8192 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8193 asmop # "2\t$Rd.8h, $Rn.4s",
8196 def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8197 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8198 asmop # "2\t$Rd.4s, $Rn.2d",
8203 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8204 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8205 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8206 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8208 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8209 SDPatternOperator Neon_Op> {
8210 def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8211 (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8213 def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8214 (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8216 def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8217 (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8219 def : Pat<(v16i8 (concat_vectors
8221 (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8222 (!cast<Instruction>(Prefix # 8h16b)
8223 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8226 def : Pat<(v8i16 (concat_vectors
8228 (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8229 (!cast<Instruction>(Prefix # 4s8h)
8230 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8233 def : Pat<(v4i32 (concat_vectors
8235 (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8236 (!cast<Instruction>(Prefix # 2d4s)
8237 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8241 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8242 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8243 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8244 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8246 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8247 let DecoderMethod = "DecodeSHLLInstruction" in {
8248 def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8250 (ins VPR64:$Rn, uimm_exact8:$Imm),
8251 asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8254 def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8256 (ins VPR64:$Rn, uimm_exact16:$Imm),
8257 asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8260 def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8262 (ins VPR64:$Rn, uimm_exact32:$Imm),
8263 asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8266 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8268 (ins VPR128:$Rn, uimm_exact8:$Imm),
8269 asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8272 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8274 (ins VPR128:$Rn, uimm_exact16:$Imm),
8275 asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8278 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8280 (ins VPR128:$Rn, uimm_exact32:$Imm),
8281 asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8286 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8288 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8289 SDPatternOperator ExtOp, Operand Neon_Imm,
8292 (DesTy (ExtOp (OpTy VPR64:$Rn))),
8294 (i32 Neon_Imm:$Imm))))),
8295 (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8297 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8298 SDPatternOperator ExtOp, Operand Neon_Imm,
8299 string suffix, PatFrag GetHigh>
8302 (OpTy (GetHigh VPR128:$Rn)))),
8304 (i32 Neon_Imm:$Imm))))),
8305 (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8307 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8308 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8309 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8310 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8311 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8312 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8313 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8315 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8317 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8319 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8321 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8323 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8326 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8327 def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8328 (outs VPR64:$Rd), (ins VPR128:$Rn),
8329 asmop # "\t$Rd.4h, $Rn.4s",
8332 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8333 (outs VPR64:$Rd), (ins VPR128:$Rn),
8334 asmop # "\t$Rd.2s, $Rn.2d",
8337 let Constraints = "$src = $Rd" in {
8338 def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8339 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8340 asmop # "2\t$Rd.8h, $Rn.4s",
8343 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8344 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8345 asmop # "2\t$Rd.4s, $Rn.2d",
8350 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8352 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8353 SDPatternOperator f32_to_f16_Op,
8354 SDPatternOperator f64_to_f32_Op> {
8356 def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8357 (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8359 def : Pat<(v8i16 (concat_vectors
8361 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8362 (!cast<Instruction>(prefix # "4s8h")
8363 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8364 (v4f32 VPR128:$Rn))>;
8366 def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8367 (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8369 def : Pat<(v4f32 (concat_vectors
8371 (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8372 (!cast<Instruction>(prefix # "2d4s")
8373 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8374 (v2f64 VPR128:$Rn))>;
8377 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8379 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8381 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8382 (outs VPR64:$Rd), (ins VPR128:$Rn),
8383 asmop # "\t$Rd.2s, $Rn.2d",
8386 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8387 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8388 asmop # "2\t$Rd.4s, $Rn.2d",
8390 let Constraints = "$src = $Rd";
8393 def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8394 (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8396 def : Pat<(v4f32 (concat_vectors
8398 (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8399 (!cast<Instruction>(prefix # "2d4s")
8400 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8404 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8406 def Neon_High4Float : PatFrag<(ops node:$in),
8407 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8409 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8410 def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8411 (outs VPR128:$Rd), (ins VPR64:$Rn),
8412 asmop # "\t$Rd.4s, $Rn.4h",
8415 def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8416 (outs VPR128:$Rd), (ins VPR64:$Rn),
8417 asmop # "\t$Rd.2d, $Rn.2s",
8420 def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8421 (outs VPR128:$Rd), (ins VPR128:$Rn),
8422 asmop # "2\t$Rd.4s, $Rn.8h",
8425 def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8426 (outs VPR128:$Rd), (ins VPR128:$Rn),
8427 asmop # "2\t$Rd.2d, $Rn.4s",
8431 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8433 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8434 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8435 (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8437 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8439 (v8i16 VPR128:$Rn))))),
8440 (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8442 def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8443 (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8445 def : Pat<(v2f64 (fextend
8446 (v2f32 (Neon_High4Float
8447 (v4f32 VPR128:$Rn))))),
8448 (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8451 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8453 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8454 ValueType ResTy4s, ValueType OpTy4s,
8455 ValueType ResTy2d, ValueType OpTy2d,
8456 ValueType ResTy2s, ValueType OpTy2s,
8457 SDPatternOperator Neon_Op> {
8459 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8460 (outs VPR128:$Rd), (ins VPR128:$Rn),
8461 asmop # "\t$Rd.4s, $Rn.4s",
8462 [(set (ResTy4s VPR128:$Rd),
8463 (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8466 def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8467 (outs VPR128:$Rd), (ins VPR128:$Rn),
8468 asmop # "\t$Rd.2d, $Rn.2d",
8469 [(set (ResTy2d VPR128:$Rd),
8470 (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8473 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8474 (outs VPR64:$Rd), (ins VPR64:$Rn),
8475 asmop # "\t$Rd.2s, $Rn.2s",
8476 [(set (ResTy2s VPR64:$Rd),
8477 (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8481 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8482 bits<5> opcode, SDPatternOperator Neon_Op> {
8483 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8484 v2f64, v2i32, v2f32, Neon_Op>;
8487 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8488 int_arm_neon_vcvtns>;
8489 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8490 int_arm_neon_vcvtnu>;
8491 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8492 int_arm_neon_vcvtps>;
8493 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8494 int_arm_neon_vcvtpu>;
8495 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8496 int_arm_neon_vcvtms>;
8497 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8498 int_arm_neon_vcvtmu>;
8499 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8500 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8501 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8502 int_arm_neon_vcvtas>;
8503 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8504 int_arm_neon_vcvtau>;
8506 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8507 bits<5> opcode, SDPatternOperator Neon_Op> {
8508 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8509 v2i64, v2f32, v2i32, Neon_Op>;
8512 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8513 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8515 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8516 bits<5> opcode, SDPatternOperator Neon_Op> {
8517 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8518 v2f64, v2f32, v2f32, Neon_Op>;
8521 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8522 int_aarch64_neon_frintn>;
8523 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8524 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8525 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8526 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8527 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8528 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8529 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8530 int_arm_neon_vrecpe>;
8531 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8532 int_arm_neon_vrsqrte>;
8533 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8535 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8536 bits<5> opcode, SDPatternOperator Neon_Op> {
8537 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8538 (outs VPR128:$Rd), (ins VPR128:$Rn),
8539 asmop # "\t$Rd.4s, $Rn.4s",
8540 [(set (v4i32 VPR128:$Rd),
8541 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8544 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8545 (outs VPR64:$Rd), (ins VPR64:$Rn),
8546 asmop # "\t$Rd.2s, $Rn.2s",
8547 [(set (v2i32 VPR64:$Rd),
8548 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8552 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8553 int_arm_neon_vrecpe>;
8554 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8555 int_arm_neon_vrsqrte>;
8558 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8559 string asmop, SDPatternOperator opnode>
8560 : NeonI_Crypto_AES<size, opcode,
8561 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8562 asmop # "\t$Rd.16b, $Rn.16b",
8563 [(set (v16i8 VPR128:$Rd),
8564 (v16i8 (opnode (v16i8 VPR128:$src),
8565 (v16i8 VPR128:$Rn))))],
8567 let Constraints = "$src = $Rd";
8568 let Predicates = [HasNEON, HasCrypto];
8571 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8572 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8574 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8575 string asmop, SDPatternOperator opnode>
8576 : NeonI_Crypto_AES<size, opcode,
8577 (outs VPR128:$Rd), (ins VPR128:$Rn),
8578 asmop # "\t$Rd.16b, $Rn.16b",
8579 [(set (v16i8 VPR128:$Rd),
8580 (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8583 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8584 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8586 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8587 string asmop, SDPatternOperator opnode>
8588 : NeonI_Crypto_SHA<size, opcode,
8589 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8590 asmop # "\t$Rd.4s, $Rn.4s",
8591 [(set (v4i32 VPR128:$Rd),
8592 (v4i32 (opnode (v4i32 VPR128:$src),
8593 (v4i32 VPR128:$Rn))))],
8595 let Constraints = "$src = $Rd";
8596 let Predicates = [HasNEON, HasCrypto];
8599 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8600 int_arm_neon_sha1su1>;
8601 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8602 int_arm_neon_sha256su0>;
8604 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8605 string asmop, SDPatternOperator opnode>
8606 : NeonI_Crypto_SHA<size, opcode,
8607 (outs FPR32:$Rd), (ins FPR32:$Rn),
8608 asmop # "\t$Rd, $Rn",
8609 [(set (v1i32 FPR32:$Rd),
8610 (v1i32 (opnode (v1i32 FPR32:$Rn))))],
8612 let Predicates = [HasNEON, HasCrypto];
8615 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8617 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8618 SDPatternOperator opnode>
8619 : NeonI_Crypto_3VSHA<size, opcode,
8621 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8622 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8623 [(set (v4i32 VPR128:$Rd),
8624 (v4i32 (opnode (v4i32 VPR128:$src),
8626 (v4i32 VPR128:$Rm))))],
8628 let Constraints = "$src = $Rd";
8629 let Predicates = [HasNEON, HasCrypto];
8632 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8633 int_arm_neon_sha1su0>;
8634 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8635 int_arm_neon_sha256su1>;
8637 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8638 SDPatternOperator opnode>
8639 : NeonI_Crypto_3VSHA<size, opcode,
8641 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8642 asmop # "\t$Rd, $Rn, $Rm.4s",
8643 [(set (v4i32 FPR128:$Rd),
8644 (v4i32 (opnode (v4i32 FPR128:$src),
8646 (v4i32 VPR128:$Rm))))],
8648 let Constraints = "$src = $Rd";
8649 let Predicates = [HasNEON, HasCrypto];
8652 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8653 int_arm_neon_sha256h>;
8654 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8655 int_arm_neon_sha256h2>;
8657 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
8658 SDPatternOperator opnode>
8659 : NeonI_Crypto_3VSHA<size, opcode,
8661 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8662 asmop # "\t$Rd, $Rn, $Rm.4s",
8663 [(set (v4i32 FPR128:$Rd),
8664 (v4i32 (opnode (v4i32 FPR128:$src),
8666 (v4i32 VPR128:$Rm))))],
8668 let Constraints = "$src = $Rd";
8669 let Predicates = [HasNEON, HasCrypto];
8672 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
8673 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
8674 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
8676 // Additional patterns to match shl to USHL.
8677 def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8678 (USHLvvv_8B $Rn, $Rm)>;
8679 def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8680 (USHLvvv_4H $Rn, $Rm)>;
8681 def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8682 (USHLvvv_2S $Rn, $Rm)>;
8683 def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8684 (USHLddd $Rn, $Rm)>;
8685 def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8686 (USHLvvv_16B $Rn, $Rm)>;
8687 def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8688 (USHLvvv_8H $Rn, $Rm)>;
8689 def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8690 (USHLvvv_4S $Rn, $Rm)>;
8691 def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8692 (USHLvvv_2D $Rn, $Rm)>;
8694 // Additional patterns to match sra, srl.
8695 // For a vector right shift by vector, the shift amounts of SSHL/USHL are
8696 // negative. Negate the vector of shift amount first.
8697 def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8698 (USHLvvv_8B $Rn, (NEG8b $Rm))>;
8699 def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8700 (USHLvvv_4H $Rn, (NEG4h $Rm))>;
8701 def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8702 (USHLvvv_2S $Rn, (NEG2s $Rm))>;
8703 def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8704 (USHLddd $Rn, (NEGdd $Rm))>;
8705 def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8706 (USHLvvv_16B $Rn, (NEG16b $Rm))>;
8707 def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8708 (USHLvvv_8H $Rn, (NEG8h $Rm))>;
8709 def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8710 (USHLvvv_4S $Rn, (NEG4s $Rm))>;
8711 def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8712 (USHLvvv_2D $Rn, (NEG2d $Rm))>;
8714 def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8715 (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
8716 def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8717 (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
8718 def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8719 (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
8720 def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8721 (SSHLddd $Rn, (NEGdd $Rm))>;
8722 def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8723 (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
8724 def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8725 (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
8726 def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8727 (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
8728 def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8729 (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
8732 // Patterns for handling half-precision values
8735 // Convert f16 value coming in as i16 value to f32
8736 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
8737 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8738 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
8739 (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8741 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
8742 f32_to_f16 (f32 FPR32:$Rn))))))),
8745 // Patterns for vector extract of half-precision FP value in i16 storage type
8746 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8747 (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
8748 (FCVTsh (f16 (DUPhv_H
8749 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8750 neon_uimm2_bare:$Imm)))>;
8752 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8753 (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
8754 (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
8756 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
8757 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8758 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8759 (neon_uimm3_bare:$Imm))),
8760 (v8i16 (INSELh (v8i16 VPR128:$Rn),
8761 (v8i16 (SUBREG_TO_REG (i64 0),
8762 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8764 neon_uimm3_bare:$Imm, 0))>;
8766 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8767 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8768 (neon_uimm2_bare:$Imm))),
8769 (v4i16 (EXTRACT_SUBREG
8771 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8772 (v8i16 (SUBREG_TO_REG (i64 0),
8773 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8775 neon_uimm2_bare:$Imm, 0)),
8778 // Patterns for vector insert of half-precision FP value in i16 storage type
8779 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8780 (i32 (assertsext (i32 (fp_to_sint
8781 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8782 (neon_uimm3_bare:$Imm))),
8783 (v8i16 (INSELh (v8i16 VPR128:$Rn),
8784 (v8i16 (SUBREG_TO_REG (i64 0),
8785 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8787 neon_uimm3_bare:$Imm, 0))>;
8789 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8790 (i32 (assertsext (i32 (fp_to_sint
8791 (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8792 (neon_uimm2_bare:$Imm))),
8793 (v4i16 (EXTRACT_SUBREG
8795 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8796 (v8i16 (SUBREG_TO_REG (i64 0),
8797 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8799 neon_uimm2_bare:$Imm, 0)),
8802 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8803 (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8804 (neon_uimm3_bare:$Imm1))),
8805 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8806 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8808 // Patterns for vector copy of half-precision FP value in i16 storage type
8809 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8810 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8811 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8813 (neon_uimm3_bare:$Imm1))),
8814 (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8815 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8817 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8818 (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8819 (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
8821 (neon_uimm3_bare:$Imm1))),
8822 (v4i16 (EXTRACT_SUBREG
8824 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8825 (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
8826 neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),