1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
46 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
49 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
50 def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
51 def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
52 def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
53 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
55 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
56 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
57 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
58 [SDTCisVec<0>, SDTCisSameAs<0, 1>,
59 SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
61 //===----------------------------------------------------------------------===//
63 //===----------------------------------------------------------------------===//
65 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
66 string asmop, SDPatternOperator opnode8B,
67 SDPatternOperator opnode16B,
69 let isCommutable = Commutable in {
70 def _8B : NeonI_3VSame<0b0, u, size, opcode,
71 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
72 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
73 [(set (v8i8 VPR64:$Rd),
74 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
77 def _16B : NeonI_3VSame<0b1, u, size, opcode,
78 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
79 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
80 [(set (v16i8 VPR128:$Rd),
81 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
87 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
88 string asmop, SDPatternOperator opnode,
90 let isCommutable = Commutable in {
91 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
92 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
93 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
94 [(set (v4i16 VPR64:$Rd),
95 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
98 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
99 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
100 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
101 [(set (v8i16 VPR128:$Rd),
102 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
105 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
106 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
107 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
108 [(set (v2i32 VPR64:$Rd),
109 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
112 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
113 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
114 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
115 [(set (v4i32 VPR128:$Rd),
116 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
120 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
121 string asmop, SDPatternOperator opnode,
123 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
124 let isCommutable = Commutable in {
125 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
126 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128 [(set (v8i8 VPR64:$Rd),
129 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
132 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
133 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135 [(set (v16i8 VPR128:$Rd),
136 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
141 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
142 string asmop, SDPatternOperator opnode,
144 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
145 let isCommutable = Commutable in {
146 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
147 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
148 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
149 [(set (v2i64 VPR128:$Rd),
150 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
155 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
156 // but Result types can be integer or floating point types.
157 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
158 string asmop, SDPatternOperator opnode2S,
159 SDPatternOperator opnode4S,
160 SDPatternOperator opnode2D,
161 ValueType ResTy2S, ValueType ResTy4S,
162 ValueType ResTy2D, bit Commutable = 0> {
163 let isCommutable = Commutable in {
164 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
165 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
166 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
167 [(set (ResTy2S VPR64:$Rd),
168 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
171 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
172 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
173 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
174 [(set (ResTy4S VPR128:$Rd),
175 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
178 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
179 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
180 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
181 [(set (ResTy2D VPR128:$Rd),
182 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
187 //===----------------------------------------------------------------------===//
188 // Instruction Definitions
189 //===----------------------------------------------------------------------===//
191 // Vector Arithmetic Instructions
193 // Vector Add (Integer and Floating-Point)
195 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
196 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
197 v2f32, v4f32, v2f64, 1>;
199 // Vector Sub (Integer and Floating-Point)
201 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
202 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
203 v2f32, v4f32, v2f64, 0>;
205 // Vector Multiply (Integer and Floating-Point)
207 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
208 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
209 v2f32, v4f32, v2f64, 1>;
211 // Vector Multiply (Polynomial)
213 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
214 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
216 // Vector Multiply-accumulate and Multiply-subtract (Integer)
218 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
219 // two operands constraints.
220 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
221 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
222 bits<5> opcode, SDPatternOperator opnode>
223 : NeonI_3VSame<q, u, size, opcode,
224 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
225 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
226 [(set (OpTy VPRC:$Rd),
227 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
229 let Constraints = "$src = $Rd";
232 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
235 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
236 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
239 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
240 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
241 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
242 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
243 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
244 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
245 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
246 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
247 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
248 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
249 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
250 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
252 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
253 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
254 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
255 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
256 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
257 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
258 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
259 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
260 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
261 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
262 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
263 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
265 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
267 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
270 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
271 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
273 let Predicates = [HasNEON, UseFusedMAC] in {
274 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
275 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
276 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
277 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
278 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
279 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
281 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
282 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
283 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
284 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
285 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
286 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
289 // We're also allowed to match the fma instruction regardless of compile
291 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
292 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
293 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
294 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
296 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
298 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
299 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
300 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
301 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
302 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
303 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
305 // Vector Divide (Floating-Point)
307 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
308 v2f32, v4f32, v2f64, 0>;
310 // Vector Bitwise Operations
312 // Vector Bitwise AND
314 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
316 // Vector Bitwise Exclusive OR
318 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
322 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
324 // ORR disassembled as MOV if Vn==Vm
326 // Vector Move - register
327 // Alias for ORR if Vn=Vm.
328 // FIXME: This is actually the preferred syntax but TableGen can't deal with
329 // custom printing of aliases.
330 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
331 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
332 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
333 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
335 // The MOVI instruction takes two immediate operands. The first is the
336 // immediate encoding, while the second is the cmode. A cmode of 14, or
337 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
338 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
339 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
341 def Neon_not8B : PatFrag<(ops node:$in),
342 (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
343 def Neon_not16B : PatFrag<(ops node:$in),
344 (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
346 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
347 (or node:$Rn, (Neon_not8B node:$Rm))>;
349 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
350 (or node:$Rn, (Neon_not16B node:$Rm))>;
352 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
353 (and node:$Rn, (Neon_not8B node:$Rm))>;
355 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
356 (and node:$Rn, (Neon_not16B node:$Rm))>;
359 // Vector Bitwise OR NOT - register
361 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
362 Neon_orn8B, Neon_orn16B, 0>;
364 // Vector Bitwise Bit Clear (AND NOT) - register
366 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
367 Neon_bic8B, Neon_bic16B, 0>;
369 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
370 SDPatternOperator opnode16B,
372 Instruction INST16B> {
373 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
374 (INST8B VPR64:$Rn, VPR64:$Rm)>;
375 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
376 (INST8B VPR64:$Rn, VPR64:$Rm)>;
377 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
378 (INST8B VPR64:$Rn, VPR64:$Rm)>;
379 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
380 (INST16B VPR128:$Rn, VPR128:$Rm)>;
381 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
382 (INST16B VPR128:$Rn, VPR128:$Rm)>;
383 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
384 (INST16B VPR128:$Rn, VPR128:$Rm)>;
387 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
388 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
389 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
390 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
391 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
392 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
394 // Vector Bitwise Select
395 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
396 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
398 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
399 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
401 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
403 Instruction INST16B> {
404 // Disassociate type from instruction definition
405 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
406 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
407 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
408 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
409 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
410 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
411 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
412 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
413 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
414 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
415 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
416 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
418 // Allow to match BSL instruction pattern with non-constant operand
419 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
420 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
421 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
422 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
423 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
424 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
425 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
426 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
427 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
428 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
429 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
430 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
431 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
432 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
433 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
434 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
435 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
436 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
437 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
438 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
439 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
440 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
441 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
442 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
444 // Allow to match llvm.arm.* intrinsics.
445 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
446 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
447 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
448 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
449 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
450 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
451 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
452 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
453 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
454 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
455 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
456 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
457 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
458 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
459 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
460 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
461 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
462 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
463 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
464 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
465 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
466 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
467 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
468 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
469 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
470 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
473 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
474 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
475 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
476 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
477 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
480 // Additional patterns for bitwise instruction BSL
481 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
483 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
484 (Neon_bsl node:$src, node:$Rn, node:$Rm),
485 [{ (void)N; return false; }]>;
487 // Vector Bitwise Insert if True
489 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
490 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
491 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
492 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
494 // Vector Bitwise Insert if False
496 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
497 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
498 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
499 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
501 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
503 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
504 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
505 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
506 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
508 // Vector Absolute Difference and Accumulate (Unsigned)
509 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
510 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
511 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
512 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
513 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
514 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
515 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
516 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
517 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
518 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
519 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
520 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
522 // Vector Absolute Difference and Accumulate (Signed)
523 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
524 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
525 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
526 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
527 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
528 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
529 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
530 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
531 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
532 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
533 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
534 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
537 // Vector Absolute Difference (Signed, Unsigned)
538 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
539 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
541 // Vector Absolute Difference (Floating Point)
542 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
543 int_arm_neon_vabds, int_arm_neon_vabds,
544 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
546 // Vector Reciprocal Step (Floating Point)
547 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
548 int_arm_neon_vrecps, int_arm_neon_vrecps,
550 v2f32, v4f32, v2f64, 0>;
552 // Vector Reciprocal Square Root Step (Floating Point)
553 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
554 int_arm_neon_vrsqrts,
555 int_arm_neon_vrsqrts,
556 int_arm_neon_vrsqrts,
557 v2f32, v4f32, v2f64, 0>;
559 // Vector Comparisons
561 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
562 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
563 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
564 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
565 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
566 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
567 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
568 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
569 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
570 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
572 // NeonI_compare_aliases class: swaps register operands to implement
573 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
574 class NeonI_compare_aliases<string asmop, string asmlane,
575 Instruction inst, RegisterOperand VPRC>
576 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
578 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
580 // Vector Comparisons (Integer)
582 // Vector Compare Mask Equal (Integer)
583 let isCommutable =1 in {
584 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
587 // Vector Compare Mask Higher or Same (Unsigned Integer)
588 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
590 // Vector Compare Mask Greater Than or Equal (Integer)
591 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
593 // Vector Compare Mask Higher (Unsigned Integer)
594 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
596 // Vector Compare Mask Greater Than (Integer)
597 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
599 // Vector Compare Mask Bitwise Test (Integer)
600 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
602 // Vector Compare Mask Less or Same (Unsigned Integer)
603 // CMLS is alias for CMHS with operands reversed.
604 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
605 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
606 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
607 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
608 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
609 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
610 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
612 // Vector Compare Mask Less Than or Equal (Integer)
613 // CMLE is alias for CMGE with operands reversed.
614 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
615 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
616 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
617 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
618 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
619 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
620 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
622 // Vector Compare Mask Lower (Unsigned Integer)
623 // CMLO is alias for CMHI with operands reversed.
624 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
625 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
626 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
627 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
628 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
629 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
630 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
632 // Vector Compare Mask Less Than (Integer)
633 // CMLT is alias for CMGT with operands reversed.
634 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
635 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
636 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
637 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
638 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
639 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
640 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
643 def neon_uimm0_asmoperand : AsmOperandClass
646 let PredicateMethod = "isUImm<0>";
647 let RenderMethod = "addImmOperands";
650 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
651 let ParserMatchClass = neon_uimm0_asmoperand;
652 let PrintMethod = "printNeonUImm0Operand";
656 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
658 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
659 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
660 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
661 [(set (v8i8 VPR64:$Rd),
662 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
665 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
666 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
667 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
668 [(set (v16i8 VPR128:$Rd),
669 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
672 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
673 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
674 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
675 [(set (v4i16 VPR64:$Rd),
676 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
679 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
680 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
681 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
682 [(set (v8i16 VPR128:$Rd),
683 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
686 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
687 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
688 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
689 [(set (v2i32 VPR64:$Rd),
690 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
693 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
694 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
695 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
696 [(set (v4i32 VPR128:$Rd),
697 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
700 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
701 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
702 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
703 [(set (v2i64 VPR128:$Rd),
704 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
708 // Vector Compare Mask Equal to Zero (Integer)
709 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
711 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
712 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
714 // Vector Compare Mask Greater Than Zero (Signed Integer)
715 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
717 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
718 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
720 // Vector Compare Mask Less Than Zero (Signed Integer)
721 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
723 // Vector Comparisons (Floating Point)
725 // Vector Compare Mask Equal (Floating Point)
726 let isCommutable =1 in {
727 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
728 Neon_cmeq, Neon_cmeq,
729 v2i32, v4i32, v2i64, 0>;
732 // Vector Compare Mask Greater Than Or Equal (Floating Point)
733 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
734 Neon_cmge, Neon_cmge,
735 v2i32, v4i32, v2i64, 0>;
737 // Vector Compare Mask Greater Than (Floating Point)
738 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
739 Neon_cmgt, Neon_cmgt,
740 v2i32, v4i32, v2i64, 0>;
742 // Vector Compare Mask Less Than Or Equal (Floating Point)
743 // FCMLE is alias for FCMGE with operands reversed.
744 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
745 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
746 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
748 // Vector Compare Mask Less Than (Floating Point)
749 // FCMLT is alias for FCMGT with operands reversed.
750 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
751 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
752 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
755 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
756 string asmop, CondCode CC>
758 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
759 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
760 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
761 [(set (v2i32 VPR64:$Rd),
762 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
765 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
766 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
767 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
768 [(set (v4i32 VPR128:$Rd),
769 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
772 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
773 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
774 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
775 [(set (v2i64 VPR128:$Rd),
776 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
780 // Vector Compare Mask Equal to Zero (Floating Point)
781 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
783 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
784 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
786 // Vector Compare Mask Greater Than Zero (Floating Point)
787 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
789 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
790 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
792 // Vector Compare Mask Less Than Zero (Floating Point)
793 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
795 // Vector Absolute Comparisons (Floating Point)
797 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
798 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
799 int_arm_neon_vacged, int_arm_neon_vacgeq,
800 int_aarch64_neon_vacgeq,
801 v2i32, v4i32, v2i64, 0>;
803 // Vector Absolute Compare Mask Greater Than (Floating Point)
804 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
805 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
806 int_aarch64_neon_vacgtq,
807 v2i32, v4i32, v2i64, 0>;
809 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
810 // FACLE is alias for FACGE with operands reversed.
811 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
812 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
813 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
815 // Vector Absolute Compare Mask Less Than (Floating Point)
816 // FACLT is alias for FACGT with operands reversed.
817 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
818 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
819 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
821 // Vector halving add (Integer Signed, Unsigned)
822 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
823 int_arm_neon_vhadds, 1>;
824 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
825 int_arm_neon_vhaddu, 1>;
827 // Vector halving sub (Integer Signed, Unsigned)
828 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
829 int_arm_neon_vhsubs, 0>;
830 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
831 int_arm_neon_vhsubu, 0>;
833 // Vector rouding halving add (Integer Signed, Unsigned)
834 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
835 int_arm_neon_vrhadds, 1>;
836 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
837 int_arm_neon_vrhaddu, 1>;
839 // Vector Saturating add (Integer Signed, Unsigned)
840 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
841 int_arm_neon_vqadds, 1>;
842 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
843 int_arm_neon_vqaddu, 1>;
845 // Vector Saturating sub (Integer Signed, Unsigned)
846 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
847 int_arm_neon_vqsubs, 1>;
848 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
849 int_arm_neon_vqsubu, 1>;
851 // Vector Shift Left (Signed and Unsigned Integer)
852 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
853 int_arm_neon_vshifts, 1>;
854 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
855 int_arm_neon_vshiftu, 1>;
857 // Vector Saturating Shift Left (Signed and Unsigned Integer)
858 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
859 int_arm_neon_vqshifts, 1>;
860 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
861 int_arm_neon_vqshiftu, 1>;
863 // Vector Rouding Shift Left (Signed and Unsigned Integer)
864 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
865 int_arm_neon_vrshifts, 1>;
866 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
867 int_arm_neon_vrshiftu, 1>;
869 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
870 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
871 int_arm_neon_vqrshifts, 1>;
872 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
873 int_arm_neon_vqrshiftu, 1>;
875 // Vector Maximum (Signed and Unsigned Integer)
876 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
877 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
879 // Vector Minimum (Signed and Unsigned Integer)
880 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
881 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
883 // Vector Maximum (Floating Point)
884 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
885 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
886 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
888 // Vector Minimum (Floating Point)
889 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
890 int_arm_neon_vmins, int_arm_neon_vmins,
891 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
893 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
894 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
895 int_aarch64_neon_vmaxnm,
896 int_aarch64_neon_vmaxnm,
897 int_aarch64_neon_vmaxnm,
898 v2f32, v4f32, v2f64, 1>;
900 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
901 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
902 int_aarch64_neon_vminnm,
903 int_aarch64_neon_vminnm,
904 int_aarch64_neon_vminnm,
905 v2f32, v4f32, v2f64, 1>;
907 // Vector Maximum Pairwise (Signed and Unsigned Integer)
908 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
909 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
911 // Vector Minimum Pairwise (Signed and Unsigned Integer)
912 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
913 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
915 // Vector Maximum Pairwise (Floating Point)
916 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
917 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
918 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
920 // Vector Minimum Pairwise (Floating Point)
921 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
922 int_arm_neon_vpmins, int_arm_neon_vpmins,
923 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
925 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
926 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
927 int_aarch64_neon_vpmaxnm,
928 int_aarch64_neon_vpmaxnm,
929 int_aarch64_neon_vpmaxnm,
930 v2f32, v4f32, v2f64, 1>;
932 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
933 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
934 int_aarch64_neon_vpminnm,
935 int_aarch64_neon_vpminnm,
936 int_aarch64_neon_vpminnm,
937 v2f32, v4f32, v2f64, 1>;
939 // Vector Addition Pairwise (Integer)
940 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
942 // Vector Addition Pairwise (Floating Point)
943 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
947 v2f32, v4f32, v2f64, 1>;
949 // Vector Saturating Doubling Multiply High
950 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
951 int_arm_neon_vqdmulh, 1>;
953 // Vector Saturating Rouding Doubling Multiply High
954 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
955 int_arm_neon_vqrdmulh, 1>;
957 // Vector Multiply Extended (Floating Point)
958 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
959 int_aarch64_neon_vmulx,
960 int_aarch64_neon_vmulx,
961 int_aarch64_neon_vmulx,
962 v2f32, v4f32, v2f64, 1>;
964 // Vector Immediate Instructions
966 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
968 def _asmoperand : AsmOperandClass
970 let Name = "NeonMovImmShift" # PREFIX;
971 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
972 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
976 // Definition of vector immediates shift operands
978 // The selectable use-cases extract the shift operation
979 // information from the OpCmode fields encoded in the immediate.
980 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
981 uint64_t OpCmode = N->getZExtValue();
983 unsigned ShiftOnesIn;
985 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
986 if (!HasShift) return SDValue();
987 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
990 // Vector immediates shift operands which accept LSL and MSL
991 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
992 // or 0, 8 (LSLH) or 8, 16 (MSL).
993 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
994 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
995 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
996 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
998 multiclass neon_mov_imm_shift_operands<string PREFIX,
999 string HALF, string ISHALF, code pred>
1001 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1004 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1006 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1007 let ParserMatchClass =
1008 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1012 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1014 unsigned ShiftOnesIn;
1016 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1017 return (HasShift && !ShiftOnesIn);
1020 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1022 unsigned ShiftOnesIn;
1024 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1025 return (HasShift && ShiftOnesIn);
1028 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1030 unsigned ShiftOnesIn;
1032 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1033 return (HasShift && !ShiftOnesIn);
1036 def neon_uimm1_asmoperand : AsmOperandClass
1039 let PredicateMethod = "isUImm<1>";
1040 let RenderMethod = "addImmOperands";
1043 def neon_uimm2_asmoperand : AsmOperandClass
1046 let PredicateMethod = "isUImm<2>";
1047 let RenderMethod = "addImmOperands";
1050 def neon_uimm8_asmoperand : AsmOperandClass
1053 let PredicateMethod = "isUImm<8>";
1054 let RenderMethod = "addImmOperands";
1057 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1058 let ParserMatchClass = neon_uimm8_asmoperand;
1059 let PrintMethod = "printUImmHexOperand";
1062 def neon_uimm64_mask_asmoperand : AsmOperandClass
1064 let Name = "NeonUImm64Mask";
1065 let PredicateMethod = "isNeonUImm64Mask";
1066 let RenderMethod = "addNeonUImm64MaskOperands";
1069 // MCOperand for 64-bit bytemask with each byte having only the
1070 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1071 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1072 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1073 let PrintMethod = "printNeonUImm64MaskOperand";
1076 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1077 SDPatternOperator opnode>
1079 // shift zeros, per word
1080 def _2S : NeonI_1VModImm<0b0, op,
1082 (ins neon_uimm8:$Imm,
1083 neon_mov_imm_LSL_operand:$Simm),
1084 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1085 [(set (v2i32 VPR64:$Rd),
1086 (v2i32 (opnode (timm:$Imm),
1087 (neon_mov_imm_LSL_operand:$Simm))))],
1090 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1093 def _4S : NeonI_1VModImm<0b1, op,
1095 (ins neon_uimm8:$Imm,
1096 neon_mov_imm_LSL_operand:$Simm),
1097 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1098 [(set (v4i32 VPR128:$Rd),
1099 (v4i32 (opnode (timm:$Imm),
1100 (neon_mov_imm_LSL_operand:$Simm))))],
1103 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1106 // shift zeros, per halfword
1107 def _4H : NeonI_1VModImm<0b0, op,
1109 (ins neon_uimm8:$Imm,
1110 neon_mov_imm_LSLH_operand:$Simm),
1111 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1112 [(set (v4i16 VPR64:$Rd),
1113 (v4i16 (opnode (timm:$Imm),
1114 (neon_mov_imm_LSLH_operand:$Simm))))],
1117 let cmode = {0b1, 0b0, Simm, 0b0};
1120 def _8H : NeonI_1VModImm<0b1, op,
1122 (ins neon_uimm8:$Imm,
1123 neon_mov_imm_LSLH_operand:$Simm),
1124 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1125 [(set (v8i16 VPR128:$Rd),
1126 (v8i16 (opnode (timm:$Imm),
1127 (neon_mov_imm_LSLH_operand:$Simm))))],
1130 let cmode = {0b1, 0b0, Simm, 0b0};
1134 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1135 SDPatternOperator opnode,
1136 SDPatternOperator neonopnode>
1138 let Constraints = "$src = $Rd" in {
1139 // shift zeros, per word
1140 def _2S : NeonI_1VModImm<0b0, op,
1142 (ins VPR64:$src, neon_uimm8:$Imm,
1143 neon_mov_imm_LSL_operand:$Simm),
1144 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1145 [(set (v2i32 VPR64:$Rd),
1146 (v2i32 (opnode (v2i32 VPR64:$src),
1147 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1148 neon_mov_imm_LSL_operand:$Simm)))))))],
1151 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1154 def _4S : NeonI_1VModImm<0b1, op,
1156 (ins VPR128:$src, neon_uimm8:$Imm,
1157 neon_mov_imm_LSL_operand:$Simm),
1158 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1159 [(set (v4i32 VPR128:$Rd),
1160 (v4i32 (opnode (v4i32 VPR128:$src),
1161 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1162 neon_mov_imm_LSL_operand:$Simm)))))))],
1165 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1168 // shift zeros, per halfword
1169 def _4H : NeonI_1VModImm<0b0, op,
1171 (ins VPR64:$src, neon_uimm8:$Imm,
1172 neon_mov_imm_LSLH_operand:$Simm),
1173 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1174 [(set (v4i16 VPR64:$Rd),
1175 (v4i16 (opnode (v4i16 VPR64:$src),
1176 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1177 neon_mov_imm_LSL_operand:$Simm)))))))],
1180 let cmode = {0b1, 0b0, Simm, 0b1};
1183 def _8H : NeonI_1VModImm<0b1, op,
1185 (ins VPR128:$src, neon_uimm8:$Imm,
1186 neon_mov_imm_LSLH_operand:$Simm),
1187 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1188 [(set (v8i16 VPR128:$Rd),
1189 (v8i16 (opnode (v8i16 VPR128:$src),
1190 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1191 neon_mov_imm_LSL_operand:$Simm)))))))],
1194 let cmode = {0b1, 0b0, Simm, 0b1};
1199 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1200 SDPatternOperator opnode>
1202 // shift ones, per word
1203 def _2S : NeonI_1VModImm<0b0, op,
1205 (ins neon_uimm8:$Imm,
1206 neon_mov_imm_MSL_operand:$Simm),
1207 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1208 [(set (v2i32 VPR64:$Rd),
1209 (v2i32 (opnode (timm:$Imm),
1210 (neon_mov_imm_MSL_operand:$Simm))))],
1213 let cmode = {0b1, 0b1, 0b0, Simm};
1216 def _4S : NeonI_1VModImm<0b1, op,
1218 (ins neon_uimm8:$Imm,
1219 neon_mov_imm_MSL_operand:$Simm),
1220 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1221 [(set (v4i32 VPR128:$Rd),
1222 (v4i32 (opnode (timm:$Imm),
1223 (neon_mov_imm_MSL_operand:$Simm))))],
1226 let cmode = {0b1, 0b1, 0b0, Simm};
1230 // Vector Move Immediate Shifted
1231 let isReMaterializable = 1 in {
1232 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1235 // Vector Move Inverted Immediate Shifted
1236 let isReMaterializable = 1 in {
1237 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1240 // Vector Bitwise Bit Clear (AND NOT) - immediate
1241 let isReMaterializable = 1 in {
1242 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1246 // Vector Bitwise OR - immedidate
1248 let isReMaterializable = 1 in {
1249 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1253 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1254 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1255 // BIC immediate instructions selection requires additional patterns to
1256 // transform Neon_movi operands into BIC immediate operands
1258 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1259 uint64_t OpCmode = N->getZExtValue();
1261 unsigned ShiftOnesIn;
1262 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1263 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1264 // Transform encoded shift amount 0 to 1 and 1 to 0.
1265 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1268 def neon_mov_imm_LSLH_transform_operand
1271 unsigned ShiftOnesIn;
1273 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1274 return (HasShift && !ShiftOnesIn); }],
1275 neon_mov_imm_LSLH_transform_XFORM>;
1277 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1278 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1279 def : Pat<(v4i16 (and VPR64:$src,
1280 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1281 (BICvi_lsl_4H VPR64:$src, 0,
1282 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1284 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1285 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1286 def : Pat<(v8i16 (and VPR128:$src,
1287 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1288 (BICvi_lsl_8H VPR128:$src, 0,
1289 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1292 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1293 SDPatternOperator neonopnode,
1295 Instruction INST8H> {
1296 def : Pat<(v8i8 (opnode VPR64:$src,
1297 (bitconvert(v4i16 (neonopnode timm:$Imm,
1298 neon_mov_imm_LSLH_operand:$Simm))))),
1299 (INST4H VPR64:$src, neon_uimm8:$Imm,
1300 neon_mov_imm_LSLH_operand:$Simm)>;
1301 def : Pat<(v1i64 (opnode VPR64:$src,
1302 (bitconvert(v4i16 (neonopnode timm:$Imm,
1303 neon_mov_imm_LSLH_operand:$Simm))))),
1304 (INST4H VPR64:$src, neon_uimm8:$Imm,
1305 neon_mov_imm_LSLH_operand:$Simm)>;
1307 def : Pat<(v16i8 (opnode VPR128:$src,
1308 (bitconvert(v8i16 (neonopnode timm:$Imm,
1309 neon_mov_imm_LSLH_operand:$Simm))))),
1310 (INST8H VPR128:$src, neon_uimm8:$Imm,
1311 neon_mov_imm_LSLH_operand:$Simm)>;
1312 def : Pat<(v4i32 (opnode VPR128:$src,
1313 (bitconvert(v8i16 (neonopnode timm:$Imm,
1314 neon_mov_imm_LSLH_operand:$Simm))))),
1315 (INST8H VPR128:$src, neon_uimm8:$Imm,
1316 neon_mov_imm_LSLH_operand:$Simm)>;
1317 def : Pat<(v2i64 (opnode VPR128:$src,
1318 (bitconvert(v8i16 (neonopnode timm:$Imm,
1319 neon_mov_imm_LSLH_operand:$Simm))))),
1320 (INST8H VPR128:$src, neon_uimm8:$Imm,
1321 neon_mov_imm_LSLH_operand:$Simm)>;
1324 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1325 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1327 // Additional patterns for Vector Bitwise OR - immedidate
1328 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1331 // Vector Move Immediate Masked
1332 let isReMaterializable = 1 in {
1333 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1336 // Vector Move Inverted Immediate Masked
1337 let isReMaterializable = 1 in {
1338 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1341 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1342 Instruction inst, RegisterOperand VPRC>
1343 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1344 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1346 // Aliases for Vector Move Immediate Shifted
1347 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1348 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1349 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1350 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1352 // Aliases for Vector Move Inverted Immediate Shifted
1353 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1354 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1355 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1356 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1358 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1359 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1360 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1361 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1362 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1364 // Aliases for Vector Bitwise OR - immedidate
1365 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1366 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1367 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1368 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1370 // Vector Move Immediate - per byte
1371 let isReMaterializable = 1 in {
1372 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1373 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1374 "movi\t$Rd.8b, $Imm",
1375 [(set (v8i8 VPR64:$Rd),
1376 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1381 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1382 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1383 "movi\t$Rd.16b, $Imm",
1384 [(set (v16i8 VPR128:$Rd),
1385 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1391 // Vector Move Immediate - bytemask, per double word
1392 let isReMaterializable = 1 in {
1393 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1394 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1395 "movi\t $Rd.2d, $Imm",
1396 [(set (v2i64 VPR128:$Rd),
1397 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1403 // Vector Move Immediate - bytemask, one doubleword
1405 let isReMaterializable = 1 in {
1406 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1407 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1409 [(set (f64 FPR64:$Rd),
1411 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1417 // Vector Floating Point Move Immediate
1419 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1420 Operand immOpType, bit q, bit op>
1421 : NeonI_1VModImm<q, op,
1422 (outs VPRC:$Rd), (ins immOpType:$Imm),
1423 "fmov\t$Rd" # asmlane # ", $Imm",
1424 [(set (OpTy VPRC:$Rd),
1425 (OpTy (Neon_fmovi (timm:$Imm))))],
1430 let isReMaterializable = 1 in {
1431 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1432 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1433 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1436 // Vector Shift (Immediate)
1437 // Immediate in [0, 63]
1438 def imm0_63 : Operand<i32> {
1439 let ParserMatchClass = uimm6_asmoperand;
1442 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1446 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1447 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1448 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1449 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1451 // The shift right immediate amount, in the range 1 to element bits, is computed
1452 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1453 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1455 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1456 let Name = "ShrImm" # OFFSET;
1457 let RenderMethod = "addImmOperands";
1458 let DiagnosticType = "ShrImm" # OFFSET;
1461 class shr_imm<string OFFSET> : Operand<i32> {
1462 let EncoderMethod = "getShiftRightImm" # OFFSET;
1463 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1464 let ParserMatchClass =
1465 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1468 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1469 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1470 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1471 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1473 def shr_imm8 : shr_imm<"8">;
1474 def shr_imm16 : shr_imm<"16">;
1475 def shr_imm32 : shr_imm<"32">;
1476 def shr_imm64 : shr_imm<"64">;
1478 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1479 let Name = "ShlImm" # OFFSET;
1480 let RenderMethod = "addImmOperands";
1481 let DiagnosticType = "ShlImm" # OFFSET;
1484 class shl_imm<string OFFSET> : Operand<i32> {
1485 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1486 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1487 let ParserMatchClass =
1488 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1491 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1492 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1493 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1494 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1496 def shl_imm8 : shl_imm<"8">;
1497 def shl_imm16 : shl_imm<"16">;
1498 def shl_imm32 : shl_imm<"32">;
1499 def shl_imm64 : shl_imm<"64">;
1501 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1502 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1503 : NeonI_2VShiftImm<q, u, opcode,
1504 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1505 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1506 [(set (Ty VPRC:$Rd),
1507 (Ty (OpNode (Ty VPRC:$Rn),
1508 (Ty (Neon_vdup (i32 imm:$Imm))))))],
1511 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1512 // 64-bit vector types.
1513 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1514 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1517 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1518 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1521 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1522 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1525 // 128-bit vector types.
1526 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1527 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1530 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1531 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1534 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1535 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1538 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1539 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1543 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1544 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1546 let Inst{22-19} = 0b0001;
1549 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1551 let Inst{22-20} = 0b001;
1554 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1556 let Inst{22-21} = 0b01;
1559 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1561 let Inst{22-19} = 0b0001;
1564 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1566 let Inst{22-20} = 0b001;
1569 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1571 let Inst{22-21} = 0b01;
1574 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1581 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1584 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1585 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1587 def Neon_High16B : PatFrag<(ops node:$in),
1588 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1589 def Neon_High8H : PatFrag<(ops node:$in),
1590 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1591 def Neon_High4S : PatFrag<(ops node:$in),
1592 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1593 def Neon_High2D : PatFrag<(ops node:$in),
1594 (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1595 def Neon_High4f : PatFrag<(ops node:$in),
1596 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1597 def Neon_High2d : PatFrag<(ops node:$in),
1598 (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1600 def Neon_low16B : PatFrag<(ops node:$in),
1601 (v8i8 (extract_subvector (v16i8 node:$in),
1603 def Neon_low8H : PatFrag<(ops node:$in),
1604 (v4i16 (extract_subvector (v8i16 node:$in),
1606 def Neon_low4S : PatFrag<(ops node:$in),
1607 (v2i32 (extract_subvector (v4i32 node:$in),
1609 def Neon_low2D : PatFrag<(ops node:$in),
1610 (v1i64 (extract_subvector (v2i64 node:$in),
1612 def Neon_low4f : PatFrag<(ops node:$in),
1613 (v2f32 (extract_subvector (v4f32 node:$in),
1615 def Neon_low2d : PatFrag<(ops node:$in),
1616 (v1f64 (extract_subvector (v2f64 node:$in),
1619 def neon_uimm3_shift : Operand<i32>,
1620 ImmLeaf<i32, [{return Imm < 8;}]> {
1621 let ParserMatchClass = uimm3_asmoperand;
1624 def neon_uimm4_shift : Operand<i32>,
1625 ImmLeaf<i32, [{return Imm < 16;}]> {
1626 let ParserMatchClass = uimm4_asmoperand;
1629 def neon_uimm5_shift : Operand<i32>,
1630 ImmLeaf<i32, [{return Imm < 32;}]> {
1631 let ParserMatchClass = uimm5_asmoperand;
1634 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1635 string SrcT, ValueType DestTy, ValueType SrcTy,
1636 Operand ImmTy, SDPatternOperator ExtOp>
1637 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1638 (ins VPR64:$Rn, ImmTy:$Imm),
1639 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1640 [(set (DestTy VPR128:$Rd),
1642 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1643 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1646 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1647 string SrcT, ValueType DestTy, ValueType SrcTy,
1648 int StartIndex, Operand ImmTy,
1649 SDPatternOperator ExtOp, PatFrag getTop>
1650 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1651 (ins VPR128:$Rn, ImmTy:$Imm),
1652 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1653 [(set (DestTy VPR128:$Rd),
1656 (SrcTy (getTop VPR128:$Rn)))),
1657 (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1660 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1662 // 64-bit vector types.
1663 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1664 neon_uimm3_shift, ExtOp> {
1665 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1668 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1669 neon_uimm4_shift, ExtOp> {
1670 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1673 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1674 neon_uimm5_shift, ExtOp> {
1675 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1678 // 128-bit vector types
1679 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1680 8, neon_uimm3_shift, ExtOp, Neon_High16B> {
1681 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1684 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1685 4, neon_uimm4_shift, ExtOp, Neon_High8H> {
1686 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1689 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1690 2, neon_uimm5_shift, ExtOp, Neon_High4S> {
1691 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1694 // Use other patterns to match when the immediate is 0.
1695 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1696 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1698 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1699 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1701 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1702 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1704 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1705 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1707 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1708 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1710 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1711 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1715 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1716 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1718 // Rounding/Saturating shift
1719 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1720 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1721 SDPatternOperator OpNode>
1722 : NeonI_2VShiftImm<q, u, opcode,
1723 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1724 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1725 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1729 // shift right (vector by immediate)
1730 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1731 SDPatternOperator OpNode> {
1732 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1734 let Inst{22-19} = 0b0001;
1737 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1739 let Inst{22-20} = 0b001;
1742 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1744 let Inst{22-21} = 0b01;
1747 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1749 let Inst{22-19} = 0b0001;
1752 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1754 let Inst{22-20} = 0b001;
1757 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1759 let Inst{22-21} = 0b01;
1762 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1768 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1769 SDPatternOperator OpNode> {
1770 // 64-bit vector types.
1771 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1773 let Inst{22-19} = 0b0001;
1776 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1778 let Inst{22-20} = 0b001;
1781 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1783 let Inst{22-21} = 0b01;
1786 // 128-bit vector types.
1787 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1789 let Inst{22-19} = 0b0001;
1792 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1794 let Inst{22-20} = 0b001;
1797 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1799 let Inst{22-21} = 0b01;
1802 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1808 // Rounding shift right
1809 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1810 int_aarch64_neon_vsrshr>;
1811 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1812 int_aarch64_neon_vurshr>;
1814 // Saturating shift left unsigned
1815 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1817 // Saturating shift left
1818 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1819 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1821 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1822 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1824 : NeonI_2VShiftImm<q, u, opcode,
1825 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1826 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1827 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1828 (Ty (OpNode (Ty VPRC:$Rn),
1829 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1831 let Constraints = "$src = $Rd";
1834 // Shift Right accumulate
1835 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1836 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1838 let Inst{22-19} = 0b0001;
1841 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1843 let Inst{22-20} = 0b001;
1846 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1848 let Inst{22-21} = 0b01;
1851 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1853 let Inst{22-19} = 0b0001;
1856 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1858 let Inst{22-20} = 0b001;
1861 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1863 let Inst{22-21} = 0b01;
1866 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1872 // Shift right and accumulate
1873 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1874 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1876 // Rounding shift accumulate
1877 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1878 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1879 SDPatternOperator OpNode>
1880 : NeonI_2VShiftImm<q, u, opcode,
1881 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1882 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1883 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1884 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1886 let Constraints = "$src = $Rd";
1889 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1890 SDPatternOperator OpNode> {
1891 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1893 let Inst{22-19} = 0b0001;
1896 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1898 let Inst{22-20} = 0b001;
1901 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1903 let Inst{22-21} = 0b01;
1906 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1908 let Inst{22-19} = 0b0001;
1911 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1913 let Inst{22-20} = 0b001;
1916 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1918 let Inst{22-21} = 0b01;
1921 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1927 // Rounding shift right and accumulate
1928 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1929 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1931 // Shift insert by immediate
1932 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1933 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1934 SDPatternOperator OpNode>
1935 : NeonI_2VShiftImm<q, u, opcode,
1936 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1937 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1938 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1941 let Constraints = "$src = $Rd";
1944 // shift left insert (vector by immediate)
1945 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1946 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1947 int_aarch64_neon_vsli> {
1948 let Inst{22-19} = 0b0001;
1951 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1952 int_aarch64_neon_vsli> {
1953 let Inst{22-20} = 0b001;
1956 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1957 int_aarch64_neon_vsli> {
1958 let Inst{22-21} = 0b01;
1961 // 128-bit vector types
1962 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1963 int_aarch64_neon_vsli> {
1964 let Inst{22-19} = 0b0001;
1967 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1968 int_aarch64_neon_vsli> {
1969 let Inst{22-20} = 0b001;
1972 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1973 int_aarch64_neon_vsli> {
1974 let Inst{22-21} = 0b01;
1977 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1978 int_aarch64_neon_vsli> {
1983 // shift right insert (vector by immediate)
1984 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1985 // 64-bit vector types.
1986 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1987 int_aarch64_neon_vsri> {
1988 let Inst{22-19} = 0b0001;
1991 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1992 int_aarch64_neon_vsri> {
1993 let Inst{22-20} = 0b001;
1996 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1997 int_aarch64_neon_vsri> {
1998 let Inst{22-21} = 0b01;
2001 // 128-bit vector types
2002 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2003 int_aarch64_neon_vsri> {
2004 let Inst{22-19} = 0b0001;
2007 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2008 int_aarch64_neon_vsri> {
2009 let Inst{22-20} = 0b001;
2012 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2013 int_aarch64_neon_vsri> {
2014 let Inst{22-21} = 0b01;
2017 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2018 int_aarch64_neon_vsri> {
2023 // Shift left and insert
2024 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2026 // Shift right and insert
2027 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2029 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2030 string SrcT, Operand ImmTy>
2031 : NeonI_2VShiftImm<q, u, opcode,
2032 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2033 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2036 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2037 string SrcT, Operand ImmTy>
2038 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2039 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2040 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2042 let Constraints = "$src = $Rd";
2045 // left long shift by immediate
2046 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2047 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2048 let Inst{22-19} = 0b0001;
2051 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2052 let Inst{22-20} = 0b001;
2055 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2056 let Inst{22-21} = 0b01;
2059 // Shift Narrow High
2060 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2062 let Inst{22-19} = 0b0001;
2065 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2067 let Inst{22-20} = 0b001;
2070 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2072 let Inst{22-21} = 0b01;
2076 // Shift right narrow
2077 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2079 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2080 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2081 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2082 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2083 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2084 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2085 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2086 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2088 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2089 (v2i64 (concat_vectors (v1i64 node:$Rm),
2090 (v1i64 node:$Rn)))>;
2091 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2092 (v8i16 (concat_vectors (v4i16 node:$Rm),
2093 (v4i16 node:$Rn)))>;
2094 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2095 (v4i32 (concat_vectors (v2i32 node:$Rm),
2096 (v2i32 node:$Rn)))>;
2097 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2098 (v4f32 (concat_vectors (v2f32 node:$Rm),
2099 (v2f32 node:$Rn)))>;
2100 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2101 (v2f64 (concat_vectors (v1f64 node:$Rm),
2102 (v1f64 node:$Rn)))>;
2104 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2105 (v8i16 (srl (v8i16 node:$lhs),
2106 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2107 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2108 (v4i32 (srl (v4i32 node:$lhs),
2109 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2110 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2111 (v2i64 (srl (v2i64 node:$lhs),
2112 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2113 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2114 (v8i16 (sra (v8i16 node:$lhs),
2115 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2116 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2117 (v4i32 (sra (v4i32 node:$lhs),
2118 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2119 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2120 (v2i64 (sra (v2i64 node:$lhs),
2121 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2123 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2124 multiclass Neon_shiftNarrow_patterns<string shr> {
2125 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2127 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2128 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2130 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2131 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2133 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2135 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2136 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2137 VPR128:$Rn, (i32 imm:$Imm))))))),
2138 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2139 VPR128:$Rn, imm:$Imm)>;
2140 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2141 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2142 VPR128:$Rn, (i32 imm:$Imm))))))),
2143 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2144 VPR128:$Rn, imm:$Imm)>;
2145 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2146 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2147 VPR128:$Rn, (i32 imm:$Imm))))))),
2148 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2149 VPR128:$Rn, imm:$Imm)>;
2152 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2153 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2154 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2155 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2156 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2157 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2158 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2160 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2161 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2162 (!cast<Instruction>(prefix # "_16B")
2163 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2164 VPR128:$Rn, imm:$Imm)>;
2165 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2166 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2167 (!cast<Instruction>(prefix # "_8H")
2168 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2169 VPR128:$Rn, imm:$Imm)>;
2170 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2171 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2172 (!cast<Instruction>(prefix # "_4S")
2173 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2174 VPR128:$Rn, imm:$Imm)>;
2177 defm : Neon_shiftNarrow_patterns<"lshr">;
2178 defm : Neon_shiftNarrow_patterns<"ashr">;
2180 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2181 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2182 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2183 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2184 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2185 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2186 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2188 // Convert fix-point and float-pointing
2189 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2190 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2191 Operand ImmTy, SDPatternOperator IntOp>
2192 : NeonI_2VShiftImm<q, u, opcode,
2193 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2194 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2195 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2199 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2200 SDPatternOperator IntOp> {
2201 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2203 let Inst{22-21} = 0b01;
2206 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2208 let Inst{22-21} = 0b01;
2211 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2217 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2218 SDPatternOperator IntOp> {
2219 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2221 let Inst{22-21} = 0b01;
2224 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2226 let Inst{22-21} = 0b01;
2229 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2235 // Convert fixed-point to floating-point
2236 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2237 int_arm_neon_vcvtfxs2fp>;
2238 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2239 int_arm_neon_vcvtfxu2fp>;
2241 // Convert floating-point to fixed-point
2242 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2243 int_arm_neon_vcvtfp2fxs>;
2244 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2245 int_arm_neon_vcvtfp2fxu>;
2247 multiclass Neon_sshll2_0<SDNode ext>
2249 def _v8i8 : PatFrag<(ops node:$Rn),
2250 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2251 def _v4i16 : PatFrag<(ops node:$Rn),
2252 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2253 def _v2i32 : PatFrag<(ops node:$Rn),
2254 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2257 defm NI_sext_high : Neon_sshll2_0<sext>;
2258 defm NI_zext_high : Neon_sshll2_0<zext>;
2261 //===----------------------------------------------------------------------===//
2262 // Multiclasses for NeonI_Across
2263 //===----------------------------------------------------------------------===//
2267 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2268 string asmop, SDPatternOperator opnode>
2270 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2271 (outs FPR16:$Rd), (ins VPR64:$Rn),
2272 asmop # "\t$Rd, $Rn.8b",
2273 [(set (v1i16 FPR16:$Rd),
2274 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2277 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2278 (outs FPR16:$Rd), (ins VPR128:$Rn),
2279 asmop # "\t$Rd, $Rn.16b",
2280 [(set (v1i16 FPR16:$Rd),
2281 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2284 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2285 (outs FPR32:$Rd), (ins VPR64:$Rn),
2286 asmop # "\t$Rd, $Rn.4h",
2287 [(set (v1i32 FPR32:$Rd),
2288 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2291 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2292 (outs FPR32:$Rd), (ins VPR128:$Rn),
2293 asmop # "\t$Rd, $Rn.8h",
2294 [(set (v1i32 FPR32:$Rd),
2295 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2298 // _1d2s doesn't exist!
2300 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2301 (outs FPR64:$Rd), (ins VPR128:$Rn),
2302 asmop # "\t$Rd, $Rn.4s",
2303 [(set (v1i64 FPR64:$Rd),
2304 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2308 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2309 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2313 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2314 string asmop, SDPatternOperator opnode>
2316 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2317 (outs FPR8:$Rd), (ins VPR64:$Rn),
2318 asmop # "\t$Rd, $Rn.8b",
2319 [(set (v1i8 FPR8:$Rd),
2320 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2323 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2324 (outs FPR8:$Rd), (ins VPR128:$Rn),
2325 asmop # "\t$Rd, $Rn.16b",
2326 [(set (v1i8 FPR8:$Rd),
2327 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2330 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2331 (outs FPR16:$Rd), (ins VPR64:$Rn),
2332 asmop # "\t$Rd, $Rn.4h",
2333 [(set (v1i16 FPR16:$Rd),
2334 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2337 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2338 (outs FPR16:$Rd), (ins VPR128:$Rn),
2339 asmop # "\t$Rd, $Rn.8h",
2340 [(set (v1i16 FPR16:$Rd),
2341 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2344 // _1s2s doesn't exist!
2346 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2347 (outs FPR32:$Rd), (ins VPR128:$Rn),
2348 asmop # "\t$Rd, $Rn.4s",
2349 [(set (v1i32 FPR32:$Rd),
2350 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2354 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2355 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2357 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2358 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2360 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2364 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2365 string asmop, SDPatternOperator opnode> {
2366 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2367 (outs FPR32:$Rd), (ins VPR128:$Rn),
2368 asmop # "\t$Rd, $Rn.4s",
2369 [(set (v1f32 FPR32:$Rd),
2370 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2374 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2375 int_aarch64_neon_vmaxnmv>;
2376 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2377 int_aarch64_neon_vminnmv>;
2379 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2380 int_aarch64_neon_vmaxv>;
2381 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2382 int_aarch64_neon_vminv>;
2384 // The followings are for instruction class (Perm)
2386 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2387 string asmop, RegisterOperand OpVPR, string OpS>
2388 : NeonI_Perm<q, size, opcode,
2389 (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2390 asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2393 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
2394 def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
2395 def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
2396 def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
2397 def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
2398 def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
2399 def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
2400 def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
2403 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
2404 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
2405 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
2406 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
2407 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
2408 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
2410 // Extract and Insert
2411 def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
2412 (vector_insert node:$Rn,
2413 (i32 (vector_extract node:$Rm, node:$Ext)),
2416 def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
2417 (vector_insert node:$Rn,
2418 (f32 (vector_extract node:$Rm, node:$Ext)),
2422 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2423 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2424 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2425 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2426 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2428 (v16i8 VPR128:$Rn), 2, 1)),
2429 (v16i8 VPR128:$Rn), 4, 2)),
2430 (v16i8 VPR128:$Rn), 6, 3)),
2431 (v16i8 VPR128:$Rn), 8, 4)),
2432 (v16i8 VPR128:$Rn), 10, 5)),
2433 (v16i8 VPR128:$Rn), 12, 6)),
2434 (v16i8 VPR128:$Rn), 14, 7)),
2435 (v16i8 VPR128:$Rm), 0, 8)),
2436 (v16i8 VPR128:$Rm), 2, 9)),
2437 (v16i8 VPR128:$Rm), 4, 10)),
2438 (v16i8 VPR128:$Rm), 6, 11)),
2439 (v16i8 VPR128:$Rm), 8, 12)),
2440 (v16i8 VPR128:$Rm), 10, 13)),
2441 (v16i8 VPR128:$Rm), 12, 14)),
2442 (v16i8 VPR128:$Rm), 14, 15)),
2443 (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2445 class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2446 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2447 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2449 (Ty VPR:$Rn), 2, 1)),
2450 (Ty VPR:$Rn), 4, 2)),
2451 (Ty VPR:$Rn), 6, 3)),
2452 (Ty VPR:$Rm), 0, 4)),
2453 (Ty VPR:$Rm), 2, 5)),
2454 (Ty VPR:$Rm), 4, 6)),
2455 (Ty VPR:$Rm), 6, 7)),
2456 (INST VPR:$Rn, VPR:$Rm)>;
2458 def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
2459 def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
2461 class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2463 : Pat<(Ty (ei (Ty (ei (Ty (ei
2465 (Ty VPR:$Rn), 2, 1)),
2466 (Ty VPR:$Rm), 0, 2)),
2467 (Ty VPR:$Rm), 2, 3)),
2468 (INST VPR:$Rn, VPR:$Rm)>;
2470 def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
2471 def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
2472 def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
2475 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2476 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2477 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2478 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2479 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2481 (v16i8 VPR128:$Rn), 1, 0)),
2482 (v16i8 VPR128:$Rn), 3, 1)),
2483 (v16i8 VPR128:$Rn), 5, 2)),
2484 (v16i8 VPR128:$Rn), 7, 3)),
2485 (v16i8 VPR128:$Rn), 9, 4)),
2486 (v16i8 VPR128:$Rn), 11, 5)),
2487 (v16i8 VPR128:$Rn), 13, 6)),
2488 (v16i8 VPR128:$Rn), 15, 7)),
2489 (v16i8 VPR128:$Rm), 1, 8)),
2490 (v16i8 VPR128:$Rm), 3, 9)),
2491 (v16i8 VPR128:$Rm), 5, 10)),
2492 (v16i8 VPR128:$Rm), 7, 11)),
2493 (v16i8 VPR128:$Rm), 9, 12)),
2494 (v16i8 VPR128:$Rm), 11, 13)),
2495 (v16i8 VPR128:$Rm), 13, 14)),
2496 (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2498 class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2499 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2500 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2502 (Ty VPR:$Rn), 1, 0)),
2503 (Ty VPR:$Rn), 3, 1)),
2504 (Ty VPR:$Rn), 5, 2)),
2505 (Ty VPR:$Rn), 7, 3)),
2506 (Ty VPR:$Rm), 1, 4)),
2507 (Ty VPR:$Rm), 3, 5)),
2508 (Ty VPR:$Rm), 5, 6)),
2509 (INST VPR:$Rn, VPR:$Rm)>;
2511 def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
2512 def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
2514 class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2516 : Pat<(Ty (ei (Ty (ei (Ty (ei
2518 (Ty VPR:$Rn), 1, 0)),
2519 (Ty VPR:$Rn), 3, 1)),
2520 (Ty VPR:$Rm), 1, 2)),
2521 (INST VPR:$Rn, VPR:$Rm)>;
2523 def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
2524 def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
2525 def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
2528 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2529 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2530 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2531 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2532 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2534 (v16i8 VPR128:$Rm), 0, 1)),
2535 (v16i8 VPR128:$Rn), 1, 2)),
2536 (v16i8 VPR128:$Rm), 1, 3)),
2537 (v16i8 VPR128:$Rn), 2, 4)),
2538 (v16i8 VPR128:$Rm), 2, 5)),
2539 (v16i8 VPR128:$Rn), 3, 6)),
2540 (v16i8 VPR128:$Rm), 3, 7)),
2541 (v16i8 VPR128:$Rn), 4, 8)),
2542 (v16i8 VPR128:$Rm), 4, 9)),
2543 (v16i8 VPR128:$Rn), 5, 10)),
2544 (v16i8 VPR128:$Rm), 5, 11)),
2545 (v16i8 VPR128:$Rn), 6, 12)),
2546 (v16i8 VPR128:$Rm), 6, 13)),
2547 (v16i8 VPR128:$Rn), 7, 14)),
2548 (v16i8 VPR128:$Rm), 7, 15)),
2549 (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2551 class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2552 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2553 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2555 (Ty VPR:$Rm), 0, 1)),
2556 (Ty VPR:$Rn), 1, 2)),
2557 (Ty VPR:$Rm), 1, 3)),
2558 (Ty VPR:$Rn), 2, 4)),
2559 (Ty VPR:$Rm), 2, 5)),
2560 (Ty VPR:$Rn), 3, 6)),
2561 (Ty VPR:$Rm), 3, 7)),
2562 (INST VPR:$Rn, VPR:$Rm)>;
2564 def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
2565 def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
2567 class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2569 : Pat<(Ty (ei (Ty (ei (Ty (ei
2571 (Ty VPR:$Rm), 0, 1)),
2572 (Ty VPR:$Rn), 1, 2)),
2573 (Ty VPR:$Rm), 1, 3)),
2574 (INST VPR:$Rn, VPR:$Rm)>;
2576 def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
2577 def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
2578 def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
2581 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2582 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2583 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2584 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2585 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2587 (v16i8 VPR128:$Rn), 8, 0)),
2588 (v16i8 VPR128:$Rm), 8, 1)),
2589 (v16i8 VPR128:$Rn), 9, 2)),
2590 (v16i8 VPR128:$Rm), 9, 3)),
2591 (v16i8 VPR128:$Rn), 10, 4)),
2592 (v16i8 VPR128:$Rm), 10, 5)),
2593 (v16i8 VPR128:$Rn), 11, 6)),
2594 (v16i8 VPR128:$Rm), 11, 7)),
2595 (v16i8 VPR128:$Rn), 12, 8)),
2596 (v16i8 VPR128:$Rm), 12, 9)),
2597 (v16i8 VPR128:$Rn), 13, 10)),
2598 (v16i8 VPR128:$Rm), 13, 11)),
2599 (v16i8 VPR128:$Rn), 14, 12)),
2600 (v16i8 VPR128:$Rm), 14, 13)),
2601 (v16i8 VPR128:$Rn), 15, 14)),
2602 (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2604 class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2605 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2606 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2608 (Ty VPR:$Rn), 4, 0)),
2609 (Ty VPR:$Rm), 4, 1)),
2610 (Ty VPR:$Rn), 5, 2)),
2611 (Ty VPR:$Rm), 5, 3)),
2612 (Ty VPR:$Rn), 6, 4)),
2613 (Ty VPR:$Rm), 6, 5)),
2614 (Ty VPR:$Rn), 7, 6)),
2615 (INST VPR:$Rn, VPR:$Rm)>;
2617 def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
2618 def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
2620 class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2622 : Pat<(Ty (ei (Ty (ei (Ty (ei
2624 (Ty VPR:$Rn), 2, 0)),
2625 (Ty VPR:$Rm), 2, 1)),
2626 (Ty VPR:$Rn), 3, 2)),
2627 (INST VPR:$Rn, VPR:$Rm)>;
2629 def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
2630 def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
2631 def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
2634 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2635 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2636 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2638 (v16i8 VPR128:$Rm), 0, 1)),
2639 (v16i8 VPR128:$Rm), 2, 3)),
2640 (v16i8 VPR128:$Rm), 4, 5)),
2641 (v16i8 VPR128:$Rm), 6, 7)),
2642 (v16i8 VPR128:$Rm), 8, 9)),
2643 (v16i8 VPR128:$Rm), 10, 11)),
2644 (v16i8 VPR128:$Rm), 12, 13)),
2645 (v16i8 VPR128:$Rm), 14, 15)),
2646 (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2648 class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2649 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2651 (Ty VPR:$Rm), 0, 1)),
2652 (Ty VPR:$Rm), 2, 3)),
2653 (Ty VPR:$Rm), 4, 5)),
2654 (Ty VPR:$Rm), 6, 7)),
2655 (INST VPR:$Rn, VPR:$Rm)>;
2657 def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
2658 def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
2660 class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2662 : Pat<(Ty (ei (Ty (ei
2664 (Ty VPR:$Rm), 0, 1)),
2665 (Ty VPR:$Rm), 2, 3)),
2666 (INST VPR:$Rn, VPR:$Rm)>;
2668 def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
2669 def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
2670 def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
2673 def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2674 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2675 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
2677 (v16i8 VPR128:$Rn), 1, 0)),
2678 (v16i8 VPR128:$Rn), 3, 2)),
2679 (v16i8 VPR128:$Rn), 5, 4)),
2680 (v16i8 VPR128:$Rn), 7, 6)),
2681 (v16i8 VPR128:$Rn), 9, 8)),
2682 (v16i8 VPR128:$Rn), 11, 10)),
2683 (v16i8 VPR128:$Rn), 13, 12)),
2684 (v16i8 VPR128:$Rn), 15, 14)),
2685 (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
2687 class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
2688 : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
2690 (Ty VPR:$Rn), 1, 0)),
2691 (Ty VPR:$Rn), 3, 2)),
2692 (Ty VPR:$Rn), 5, 4)),
2693 (Ty VPR:$Rn), 7, 6)),
2694 (INST VPR:$Rn, VPR:$Rm)>;
2696 def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
2697 def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
2699 class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
2701 : Pat<(Ty (ei (Ty (ei
2703 (Ty VPR:$Rn), 1, 0)),
2704 (Ty VPR:$Rn), 3, 2)),
2705 (INST VPR:$Rn, VPR:$Rm)>;
2707 def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
2708 def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
2709 def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
2711 // End of implementation for instruction class (Perm)
2713 // The followings are for instruction class (3V Diff)
2715 // normal long/long2 pattern
2716 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2717 string asmop, string ResS, string OpS,
2718 SDPatternOperator opnode, SDPatternOperator ext,
2719 RegisterOperand OpVPR,
2720 ValueType ResTy, ValueType OpTy>
2721 : NeonI_3VDiff<q, u, size, opcode,
2722 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2723 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2724 [(set (ResTy VPR128:$Rd),
2725 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2726 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2729 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2730 string asmop, SDPatternOperator opnode,
2731 bit Commutable = 0> {
2732 let isCommutable = Commutable in {
2733 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2734 opnode, sext, VPR64, v8i16, v8i8>;
2735 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2736 opnode, sext, VPR64, v4i32, v4i16>;
2737 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2738 opnode, sext, VPR64, v2i64, v2i32>;
2742 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2743 SDPatternOperator opnode, bit Commutable = 0> {
2744 let isCommutable = Commutable in {
2745 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2746 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2747 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2748 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2749 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2750 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2754 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2755 SDPatternOperator opnode, bit Commutable = 0> {
2756 let isCommutable = Commutable in {
2757 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2758 opnode, zext, VPR64, v8i16, v8i8>;
2759 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2760 opnode, zext, VPR64, v4i32, v4i16>;
2761 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2762 opnode, zext, VPR64, v2i64, v2i32>;
2766 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2767 SDPatternOperator opnode, bit Commutable = 0> {
2768 let isCommutable = Commutable in {
2769 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2770 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2771 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2772 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2773 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2774 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2778 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2779 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2781 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2782 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2784 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2785 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2787 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2788 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2790 // normal wide/wide2 pattern
2791 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2792 string asmop, string ResS, string OpS,
2793 SDPatternOperator opnode, SDPatternOperator ext,
2794 RegisterOperand OpVPR,
2795 ValueType ResTy, ValueType OpTy>
2796 : NeonI_3VDiff<q, u, size, opcode,
2797 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2798 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2799 [(set (ResTy VPR128:$Rd),
2800 (ResTy (opnode (ResTy VPR128:$Rn),
2801 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2804 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2805 SDPatternOperator opnode> {
2806 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2807 opnode, sext, VPR64, v8i16, v8i8>;
2808 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2809 opnode, sext, VPR64, v4i32, v4i16>;
2810 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2811 opnode, sext, VPR64, v2i64, v2i32>;
2814 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2815 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2817 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2818 SDPatternOperator opnode> {
2819 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2820 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2821 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2822 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2823 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2824 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2827 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2828 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2830 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2831 SDPatternOperator opnode> {
2832 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2833 opnode, zext, VPR64, v8i16, v8i8>;
2834 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2835 opnode, zext, VPR64, v4i32, v4i16>;
2836 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2837 opnode, zext, VPR64, v2i64, v2i32>;
2840 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2841 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2843 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2844 SDPatternOperator opnode> {
2845 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2846 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2847 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2848 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2849 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2850 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2853 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2854 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2856 // Get the high half part of the vector element.
2857 multiclass NeonI_get_high {
2858 def _8h : PatFrag<(ops node:$Rn),
2859 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2860 (v8i16 (Neon_vdup (i32 8)))))))>;
2861 def _4s : PatFrag<(ops node:$Rn),
2862 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2863 (v4i32 (Neon_vdup (i32 16)))))))>;
2864 def _2d : PatFrag<(ops node:$Rn),
2865 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2866 (v2i64 (Neon_vdup (i32 32)))))))>;
2869 defm NI_get_hi : NeonI_get_high;
2871 // pattern for addhn/subhn with 2 operands
2872 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2873 string asmop, string ResS, string OpS,
2874 SDPatternOperator opnode, SDPatternOperator get_hi,
2875 ValueType ResTy, ValueType OpTy>
2876 : NeonI_3VDiff<q, u, size, opcode,
2877 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2878 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2879 [(set (ResTy VPR64:$Rd),
2881 (OpTy (opnode (OpTy VPR128:$Rn),
2882 (OpTy VPR128:$Rm))))))],
2885 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2886 SDPatternOperator opnode, bit Commutable = 0> {
2887 let isCommutable = Commutable in {
2888 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2889 opnode, NI_get_hi_8h, v8i8, v8i16>;
2890 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2891 opnode, NI_get_hi_4s, v4i16, v4i32>;
2892 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2893 opnode, NI_get_hi_2d, v2i32, v2i64>;
2897 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2898 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2900 // pattern for operation with 2 operands
2901 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2902 string asmop, string ResS, string OpS,
2903 SDPatternOperator opnode,
2904 RegisterOperand ResVPR, RegisterOperand OpVPR,
2905 ValueType ResTy, ValueType OpTy>
2906 : NeonI_3VDiff<q, u, size, opcode,
2907 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2908 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2909 [(set (ResTy ResVPR:$Rd),
2910 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2913 // normal narrow pattern
2914 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2915 SDPatternOperator opnode, bit Commutable = 0> {
2916 let isCommutable = Commutable in {
2917 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2918 opnode, VPR64, VPR128, v8i8, v8i16>;
2919 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2920 opnode, VPR64, VPR128, v4i16, v4i32>;
2921 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2922 opnode, VPR64, VPR128, v2i32, v2i64>;
2926 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2927 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2929 // pattern for acle intrinsic with 3 operands
2930 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2931 string asmop, string ResS, string OpS>
2932 : NeonI_3VDiff<q, u, size, opcode,
2933 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2934 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2936 let Constraints = "$src = $Rd";
2937 let neverHasSideEffects = 1;
2940 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2941 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2942 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2943 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2946 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2947 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2949 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2950 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2952 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2954 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2955 SDPatternOperator coreop>
2956 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2957 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2958 (SrcTy VPR128:$Rm)))))),
2959 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2960 VPR128:$Rn, VPR128:$Rm)>;
2963 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2964 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2965 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2966 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2967 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2968 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2971 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2972 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2973 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2974 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2975 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2976 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2979 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2980 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2981 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2984 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2985 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2986 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2988 // pattern that need to extend result
2989 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2990 string asmop, string ResS, string OpS,
2991 SDPatternOperator opnode,
2992 RegisterOperand OpVPR,
2993 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2994 : NeonI_3VDiff<q, u, size, opcode,
2995 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2996 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2997 [(set (ResTy VPR128:$Rd),
2998 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2999 (OpTy OpVPR:$Rm))))))],
3002 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
3003 SDPatternOperator opnode, bit Commutable = 0> {
3004 let isCommutable = Commutable in {
3005 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3006 opnode, VPR64, v8i16, v8i8, v8i8>;
3007 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3008 opnode, VPR64, v4i32, v4i16, v4i16>;
3009 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3010 opnode, VPR64, v2i64, v2i32, v2i32>;
3014 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
3015 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
3017 multiclass NeonI_Op_High<SDPatternOperator op> {
3018 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
3019 (op (v8i8 (Neon_High16B node:$Rn)),
3020 (v8i8 (Neon_High16B node:$Rm)))>;
3021 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
3022 (op (v4i16 (Neon_High8H node:$Rn)),
3023 (v4i16 (Neon_High8H node:$Rm)))>;
3024 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
3025 (op (v2i32 (Neon_High4S node:$Rn)),
3026 (v2i32 (Neon_High4S node:$Rm)))>;
3029 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
3030 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
3031 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
3032 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
3033 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
3034 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
3036 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
3037 bit Commutable = 0> {
3038 let isCommutable = Commutable in {
3039 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3040 !cast<PatFrag>(opnode # "_16B"),
3041 VPR128, v8i16, v16i8, v8i8>;
3042 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3043 !cast<PatFrag>(opnode # "_8H"),
3044 VPR128, v4i32, v8i16, v4i16>;
3045 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3046 !cast<PatFrag>(opnode # "_4S"),
3047 VPR128, v2i64, v4i32, v2i32>;
3051 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
3052 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
3054 // For pattern that need two operators being chained.
3055 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
3056 string asmop, string ResS, string OpS,
3057 SDPatternOperator opnode, SDPatternOperator subop,
3058 RegisterOperand OpVPR,
3059 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
3060 : NeonI_3VDiff<q, u, size, opcode,
3061 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3062 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3063 [(set (ResTy VPR128:$Rd),
3065 (ResTy VPR128:$src),
3066 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
3067 (OpTy OpVPR:$Rm))))))))],
3069 let Constraints = "$src = $Rd";
3072 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
3073 SDPatternOperator opnode, SDPatternOperator subop>{
3074 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3075 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
3076 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3077 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
3078 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3079 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
3082 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
3083 add, int_arm_neon_vabds>;
3084 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
3085 add, int_arm_neon_vabdu>;
3087 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
3088 SDPatternOperator opnode, string subop> {
3089 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3090 opnode, !cast<PatFrag>(subop # "_16B"),
3091 VPR128, v8i16, v16i8, v8i8>;
3092 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3093 opnode, !cast<PatFrag>(subop # "_8H"),
3094 VPR128, v4i32, v8i16, v4i16>;
3095 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3096 opnode, !cast<PatFrag>(subop # "_4S"),
3097 VPR128, v2i64, v4i32, v2i32>;
3100 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
3102 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
3105 // Long pattern with 2 operands
3106 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
3107 SDPatternOperator opnode, bit Commutable = 0> {
3108 let isCommutable = Commutable in {
3109 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3110 opnode, VPR128, VPR64, v8i16, v8i8>;
3111 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3112 opnode, VPR128, VPR64, v4i32, v4i16>;
3113 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3114 opnode, VPR128, VPR64, v2i64, v2i32>;
3118 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
3119 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
3121 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
3122 string asmop, string ResS, string OpS,
3123 SDPatternOperator opnode,
3124 ValueType ResTy, ValueType OpTy>
3125 : NeonI_3VDiff<q, u, size, opcode,
3126 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3127 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3128 [(set (ResTy VPR128:$Rd),
3129 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
3132 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
3133 string opnode, bit Commutable = 0> {
3134 let isCommutable = Commutable in {
3135 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3136 !cast<PatFrag>(opnode # "_16B"),
3138 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3139 !cast<PatFrag>(opnode # "_8H"),
3141 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3142 !cast<PatFrag>(opnode # "_4S"),
3147 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
3149 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
3152 // Long pattern with 3 operands
3153 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
3154 string asmop, string ResS, string OpS,
3155 SDPatternOperator opnode,
3156 ValueType ResTy, ValueType OpTy>
3157 : NeonI_3VDiff<q, u, size, opcode,
3158 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
3159 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3160 [(set (ResTy VPR128:$Rd),
3162 (ResTy VPR128:$src),
3163 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
3165 let Constraints = "$src = $Rd";
3168 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
3169 SDPatternOperator opnode> {
3170 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3171 opnode, v8i16, v8i8>;
3172 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3173 opnode, v4i32, v4i16>;
3174 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3175 opnode, v2i64, v2i32>;
3178 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3180 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3182 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3184 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3186 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3188 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
3190 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
3192 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
3194 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
3195 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
3197 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
3198 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
3200 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
3201 string asmop, string ResS, string OpS,
3202 SDPatternOperator subop, SDPatternOperator opnode,
3203 RegisterOperand OpVPR,
3204 ValueType ResTy, ValueType OpTy>
3205 : NeonI_3VDiff<q, u, size, opcode,
3206 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
3207 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
3208 [(set (ResTy VPR128:$Rd),
3210 (ResTy VPR128:$src),
3211 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3213 let Constraints = "$src = $Rd";
3216 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3217 SDPatternOperator subop, string opnode> {
3218 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3219 subop, !cast<PatFrag>(opnode # "_16B"),
3220 VPR128, v8i16, v16i8>;
3221 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3222 subop, !cast<PatFrag>(opnode # "_8H"),
3223 VPR128, v4i32, v8i16>;
3224 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3225 subop, !cast<PatFrag>(opnode # "_4S"),
3226 VPR128, v2i64, v4i32>;
3229 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3230 add, "NI_smull_hi">;
3231 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3232 add, "NI_umull_hi">;
3234 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3235 sub, "NI_smull_hi">;
3236 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3237 sub, "NI_umull_hi">;
3239 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3240 SDPatternOperator opnode> {
3241 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3242 opnode, int_arm_neon_vqdmull,
3243 VPR64, v4i32, v4i16>;
3244 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3245 opnode, int_arm_neon_vqdmull,
3246 VPR64, v2i64, v2i32>;
3249 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3250 int_arm_neon_vqadds>;
3251 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3252 int_arm_neon_vqsubs>;
3254 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3255 SDPatternOperator opnode, bit Commutable = 0> {
3256 let isCommutable = Commutable in {
3257 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3258 opnode, VPR128, VPR64, v4i32, v4i16>;
3259 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3260 opnode, VPR128, VPR64, v2i64, v2i32>;
3264 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3265 int_arm_neon_vqdmull, 1>;
3267 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3268 string opnode, bit Commutable = 0> {
3269 let isCommutable = Commutable in {
3270 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3271 !cast<PatFrag>(opnode # "_8H"),
3273 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3274 !cast<PatFrag>(opnode # "_4S"),
3279 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3282 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3283 SDPatternOperator opnode> {
3284 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3285 opnode, NI_qdmull_hi_8H,
3286 VPR128, v4i32, v8i16>;
3287 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3288 opnode, NI_qdmull_hi_4S,
3289 VPR128, v2i64, v4i32>;
3292 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3293 int_arm_neon_vqadds>;
3294 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3295 int_arm_neon_vqsubs>;
3297 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3298 SDPatternOperator opnode, bit Commutable = 0> {
3299 let isCommutable = Commutable in {
3300 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3301 opnode, VPR128, VPR64, v8i16, v8i8>;
3303 def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode,
3304 (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
3305 asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d",
3310 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
3312 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3313 string opnode, bit Commutable = 0> {
3314 let isCommutable = Commutable in {
3315 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3316 !cast<PatFrag>(opnode # "_16B"),
3319 def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode,
3320 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3321 asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3326 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3329 // End of implementation for instruction class (3V Diff)
3331 // The followings are vector load/store multiple N-element structure
3332 // (class SIMD lselem).
3334 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
3335 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3336 // The structure consists of a sequence of sets of N values.
3337 // The first element of the structure is placed in the first lane
3338 // of the first first vector, the second element in the first lane
3339 // of the second vector, and so on.
3340 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3341 // the three 64-bit vectors list {BA, DC, FE}.
3342 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3343 // 64-bit vectors list {DA, EB, FC}.
3344 // Store instructions store multiple structure to N registers like load.
3347 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3348 RegisterOperand VecList, string asmop>
3349 : NeonI_LdStMult<q, 1, opcode, size,
3350 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3351 asmop # "\t$Rt, [$Rn]",
3355 let neverHasSideEffects = 1;
3358 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3359 def _8B : NeonI_LDVList<0, opcode, 0b00,
3360 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3362 def _4H : NeonI_LDVList<0, opcode, 0b01,
3363 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3365 def _2S : NeonI_LDVList<0, opcode, 0b10,
3366 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3368 def _16B : NeonI_LDVList<1, opcode, 0b00,
3369 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3371 def _8H : NeonI_LDVList<1, opcode, 0b01,
3372 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3374 def _4S : NeonI_LDVList<1, opcode, 0b10,
3375 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3377 def _2D : NeonI_LDVList<1, opcode, 0b11,
3378 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3381 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3382 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3383 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3385 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3387 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3389 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3391 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3392 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3393 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3395 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3396 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3398 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3399 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3401 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3402 RegisterOperand VecList, string asmop>
3403 : NeonI_LdStMult<q, 0, opcode, size,
3404 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3405 asmop # "\t$Rt, [$Rn]",
3409 let neverHasSideEffects = 1;
3412 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3413 def _8B : NeonI_STVList<0, opcode, 0b00,
3414 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3416 def _4H : NeonI_STVList<0, opcode, 0b01,
3417 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3419 def _2S : NeonI_STVList<0, opcode, 0b10,
3420 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3422 def _16B : NeonI_STVList<1, opcode, 0b00,
3423 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3425 def _8H : NeonI_STVList<1, opcode, 0b01,
3426 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3428 def _4S : NeonI_STVList<1, opcode, 0b10,
3429 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3431 def _2D : NeonI_STVList<1, opcode, 0b11,
3432 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3435 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3436 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3437 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3439 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3441 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3443 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3445 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3446 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3447 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3449 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3450 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3452 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3453 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3455 // End of vector load/store multiple N-element structure(class SIMD lselem)
3457 // The followings are post-index vector load/store multiple N-element
3458 // structure(class SIMD lselem-post)
3459 def exact8_asmoperand : AsmOperandClass {
3460 let Name = "Exact8";
3461 let PredicateMethod = "isExactImm<8>";
3462 let RenderMethod = "addImmOperands";
3464 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3465 let ParserMatchClass = exact8_asmoperand;
3468 def exact16_asmoperand : AsmOperandClass {
3469 let Name = "Exact16";
3470 let PredicateMethod = "isExactImm<16>";
3471 let RenderMethod = "addImmOperands";
3473 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3474 let ParserMatchClass = exact16_asmoperand;
3477 def exact24_asmoperand : AsmOperandClass {
3478 let Name = "Exact24";
3479 let PredicateMethod = "isExactImm<24>";
3480 let RenderMethod = "addImmOperands";
3482 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3483 let ParserMatchClass = exact24_asmoperand;
3486 def exact32_asmoperand : AsmOperandClass {
3487 let Name = "Exact32";
3488 let PredicateMethod = "isExactImm<32>";
3489 let RenderMethod = "addImmOperands";
3491 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3492 let ParserMatchClass = exact32_asmoperand;
3495 def exact48_asmoperand : AsmOperandClass {
3496 let Name = "Exact48";
3497 let PredicateMethod = "isExactImm<48>";
3498 let RenderMethod = "addImmOperands";
3500 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3501 let ParserMatchClass = exact48_asmoperand;
3504 def exact64_asmoperand : AsmOperandClass {
3505 let Name = "Exact64";
3506 let PredicateMethod = "isExactImm<64>";
3507 let RenderMethod = "addImmOperands";
3509 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3510 let ParserMatchClass = exact64_asmoperand;
3513 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3514 RegisterOperand VecList, Operand ImmTy,
3516 let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3517 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3518 def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3519 (outs VecList:$Rt, GPR64xsp:$wb),
3520 (ins GPR64xsp:$Rn, ImmTy:$amt),
3521 asmop # "\t$Rt, [$Rn], $amt",
3527 def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3528 (outs VecList:$Rt, GPR64xsp:$wb),
3529 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3530 asmop # "\t$Rt, [$Rn], $Rm",
3536 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3537 Operand ImmTy2, string asmop> {
3538 defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3539 !cast<RegisterOperand>(List # "8B_operand"),
3542 defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3543 !cast<RegisterOperand>(List # "4H_operand"),
3546 defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3547 !cast<RegisterOperand>(List # "2S_operand"),
3550 defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3551 !cast<RegisterOperand>(List # "16B_operand"),
3554 defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3555 !cast<RegisterOperand>(List # "8H_operand"),
3558 defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3559 !cast<RegisterOperand>(List # "4S_operand"),
3562 defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3563 !cast<RegisterOperand>(List # "2D_operand"),
3567 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3568 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3569 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3572 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3574 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3577 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3579 // Post-index load multiple 1-element structures from N consecutive registers
3581 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3583 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3584 uimm_exact16, "ld1">;
3586 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3588 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3589 uimm_exact24, "ld1">;
3591 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3593 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3594 uimm_exact32, "ld1">;
3596 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3597 RegisterOperand VecList, Operand ImmTy,
3599 let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3600 DecoderMethod = "DecodeVLDSTPostInstruction" in {
3601 def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3602 (outs GPR64xsp:$wb),
3603 (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3604 asmop # "\t$Rt, [$Rn], $amt",
3610 def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3611 (outs GPR64xsp:$wb),
3612 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3613 asmop # "\t$Rt, [$Rn], $Rm",
3619 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3620 Operand ImmTy2, string asmop> {
3621 defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3622 !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3624 defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3625 !cast<RegisterOperand>(List # "4H_operand"),
3628 defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3629 !cast<RegisterOperand>(List # "2S_operand"),
3632 defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3633 !cast<RegisterOperand>(List # "16B_operand"),
3636 defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3637 !cast<RegisterOperand>(List # "8H_operand"),
3640 defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3641 !cast<RegisterOperand>(List # "4S_operand"),
3644 defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3645 !cast<RegisterOperand>(List # "2D_operand"),
3649 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3650 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3651 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3654 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3656 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3659 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3661 // Post-index load multiple 1-element structures from N consecutive registers
3663 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3665 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3666 uimm_exact16, "st1">;
3668 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3670 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3671 uimm_exact24, "st1">;
3673 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3675 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3676 uimm_exact32, "st1">;
3678 // End of post-index vector load/store multiple N-element structure
3679 // (class SIMD lselem-post)
3682 // Neon Scalar instructions implementation
3683 // Scalar Three Same
3685 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3687 : NeonI_Scalar3Same<u, size, opcode,
3688 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
3689 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3693 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3694 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3696 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
3697 bit Commutable = 0> {
3698 let isCommutable = Commutable in {
3699 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3700 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3704 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3705 string asmop, bit Commutable = 0> {
3706 let isCommutable = Commutable in {
3707 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
3708 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
3712 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3713 string asmop, bit Commutable = 0> {
3714 let isCommutable = Commutable in {
3715 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
3716 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3717 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3718 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3722 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3723 Instruction INSTD> {
3724 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3725 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3728 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3733 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3734 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3735 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3737 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3738 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3740 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3741 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3744 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3746 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3747 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3749 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3751 Instruction INSTS> {
3752 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3753 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3754 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3755 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3758 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3760 Instruction INSTD> {
3761 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3762 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3763 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3764 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3767 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3769 Instruction INSTD> {
3770 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3771 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3772 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3773 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3776 // Scalar Three Different
3778 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
3779 RegisterClass FPRCD, RegisterClass FPRCS>
3780 : NeonI_Scalar3Diff<u, size, opcode,
3781 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
3782 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3786 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3787 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
3788 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
3791 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3792 let Constraints = "$Src = $Rd" in {
3793 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3794 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3795 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3798 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3799 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3800 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3806 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3808 Instruction INSTS> {
3809 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3810 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3811 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3812 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3815 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3817 Instruction INSTS> {
3818 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3819 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3820 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3821 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3824 // Scalar Two Registers Miscellaneous
3826 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3827 RegisterClass FPRCD, RegisterClass FPRCS>
3828 : NeonI_Scalar2SameMisc<u, size, opcode,
3829 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
3830 !strconcat(asmop, "\t$Rd, $Rn"),
3834 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3836 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
3838 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
3842 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3843 def dd: NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
3846 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3847 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3848 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
3849 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
3850 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
3853 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3855 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
3856 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
3857 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
3860 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
3861 string asmop, RegisterClass FPRC>
3862 : NeonI_Scalar2SameMisc<u, size, opcode,
3863 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
3864 !strconcat(asmop, "\t$Rd, $Rn"),
3868 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3871 let Constraints = "$Src = $Rd" in {
3872 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
3873 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
3874 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
3875 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
3879 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3880 SDPatternOperator Dopnode,
3882 Instruction INSTD> {
3883 def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))),
3885 def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))),
3889 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3891 Instruction INSTD> {
3892 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3894 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3898 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3899 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3900 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3901 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3905 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3907 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3908 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3909 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3912 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3913 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3914 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3919 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3921 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3922 (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
3923 (INSTD FPR64:$Rn, 0)>;
3925 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3927 Instruction INSTD> {
3928 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3929 (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3930 (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3931 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3932 (v1f64 (bitconvert (v8i8 Neon_AllZero))))),
3933 (INSTD FPR64:$Rn, 0)>;
3936 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3937 Instruction INSTD> {
3938 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3942 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3947 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3948 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3950 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3952 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3956 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3957 SDPatternOperator opnode,
3960 Instruction INSTD> {
3961 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3963 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3965 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3970 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3971 SDPatternOperator opnode,
3975 Instruction INSTD> {
3976 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3977 (INSTB FPR8:$Src, FPR8:$Rn)>;
3978 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3979 (INSTH FPR16:$Src, FPR16:$Rn)>;
3980 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3981 (INSTS FPR32:$Src, FPR32:$Rn)>;
3982 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3983 (INSTD FPR64:$Src, FPR64:$Rn)>;
3986 // Scalar Shift By Immediate
3988 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
3989 RegisterClass FPRC, Operand ImmTy>
3990 : NeonI_ScalarShiftImm<u, opcode,
3991 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
3992 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3995 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
3997 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3999 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4000 let Inst{21-16} = Imm;
4004 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4006 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4007 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4009 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4010 let Inst{18-16} = Imm;
4012 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4014 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4015 let Inst{19-16} = Imm;
4017 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4019 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4020 let Inst{20-16} = Imm;
4024 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4026 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4028 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4029 let Inst{21-16} = Imm;
4033 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4035 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4036 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4038 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4039 let Inst{18-16} = Imm;
4041 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4043 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4044 let Inst{19-16} = Imm;
4046 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4048 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4049 let Inst{20-16} = Imm;
4053 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4054 : NeonI_ScalarShiftImm<u, opcode,
4055 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4056 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4059 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4060 let Inst{21-16} = Imm;
4061 let Constraints = "$Src = $Rd";
4064 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4065 : NeonI_ScalarShiftImm<u, opcode,
4066 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4067 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4070 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4071 let Inst{21-16} = Imm;
4072 let Constraints = "$Src = $Rd";
4075 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4076 RegisterClass FPRCD, RegisterClass FPRCS,
4078 : NeonI_ScalarShiftImm<u, opcode,
4079 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4080 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4083 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4085 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4088 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4089 let Inst{18-16} = Imm;
4091 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4094 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4095 let Inst{19-16} = Imm;
4097 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4100 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4101 let Inst{20-16} = Imm;
4105 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4106 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4108 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4109 let Inst{20-16} = Imm;
4111 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4113 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4114 let Inst{21-16} = Imm;
4118 multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
4119 Instruction INSTD> {
4120 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4121 (INSTD FPR64:$Rn, imm:$Imm)>;
4124 class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode,
4126 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 (Neon_vdup (i32 imm:$Imm))))),
4127 (INSTD FPR64:$Rn, imm:$Imm)>;
4129 multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
4134 : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
4135 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
4136 (INSTB FPR8:$Rn, imm:$Imm)>;
4137 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
4138 (INSTH FPR16:$Rn, imm:$Imm)>;
4139 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4140 (INSTS FPR32:$Rn, imm:$Imm)>;
4143 class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
4145 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4146 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4148 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4149 SDPatternOperator opnode,
4152 Instruction INSTD> {
4153 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
4154 (INSTH FPR16:$Rn, imm:$Imm)>;
4155 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4156 (INSTS FPR32:$Rn, imm:$Imm)>;
4157 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4158 (INSTD FPR64:$Rn, imm:$Imm)>;
4161 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
4162 SDPatternOperator Dopnode,
4164 Instruction INSTD> {
4165 def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
4166 (INSTS FPR32:$Rn, imm:$Imm)>;
4167 def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
4168 (INSTD FPR64:$Rn, imm:$Imm)>;
4171 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode,
4172 SDPatternOperator Dopnode,
4174 Instruction INSTD> {
4175 def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 imm:$Imm))),
4176 (INSTS FPR32:$Rn, imm:$Imm)>;
4177 def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 imm:$Imm))),
4178 (INSTD FPR64:$Rn, imm:$Imm)>;
4181 // Scalar Signed Shift Right (Immediate)
4182 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4183 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4184 // Pattern to match llvm.arm.* intrinsic.
4185 def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>;
4187 // Scalar Unsigned Shift Right (Immediate)
4188 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4189 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4190 // Pattern to match llvm.arm.* intrinsic.
4191 def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>;
4193 // Scalar Signed Rounding Shift Right (Immediate)
4194 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4195 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4197 // Scalar Unigned Rounding Shift Right (Immediate)
4198 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4199 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4201 // Scalar Signed Shift Right and Accumulate (Immediate)
4202 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4203 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
4205 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4206 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4207 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
4209 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4210 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4211 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
4213 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4214 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4215 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
4217 // Scalar Shift Left (Immediate)
4218 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4219 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4220 // Pattern to match llvm.arm.* intrinsic.
4221 def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>;
4223 // Signed Saturating Shift Left (Immediate)
4224 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4225 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4227 SQSHLssi, SQSHLddi>;
4228 // Pattern to match llvm.arm.* intrinsic.
4229 defm : Neon_ScalarShiftImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4231 // Unsigned Saturating Shift Left (Immediate)
4232 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4233 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4235 UQSHLssi, UQSHLddi>;
4236 // Pattern to match llvm.arm.* intrinsic.
4237 defm : Neon_ScalarShiftImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4239 // Signed Saturating Shift Left Unsigned (Immediate)
4240 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4241 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4242 SQSHLUbbi, SQSHLUhhi,
4243 SQSHLUssi, SQSHLUddi>;
4245 // Shift Right And Insert (Immediate)
4246 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4247 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsri, SRI>;
4249 // Shift Left And Insert (Immediate)
4250 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4251 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsli, SLI>;
4253 // Signed Saturating Shift Right Narrow (Immediate)
4254 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4255 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4256 SQSHRNbhi, SQSHRNhsi,
4259 // Unsigned Saturating Shift Right Narrow (Immediate)
4260 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4261 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4262 UQSHRNbhi, UQSHRNhsi,
4265 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4266 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4267 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4268 SQRSHRNbhi, SQRSHRNhsi,
4271 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4272 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4273 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4274 UQRSHRNbhi, UQRSHRNhsi,
4277 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4278 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4279 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4280 SQSHRUNbhi, SQSHRUNhsi,
4283 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4284 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4285 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4286 SQRSHRUNbhi, SQRSHRUNhsi,
4289 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4290 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4291 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
4292 int_aarch64_neon_vcvtf64_n_s64,
4293 SCVTF_Nssi, SCVTF_Nddi>;
4295 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4296 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4297 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
4298 int_aarch64_neon_vcvtf64_n_u64,
4299 UCVTF_Nssi, UCVTF_Nddi>;
4301 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4302 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4303 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32,
4304 int_aarch64_neon_vcvtd_n_s64_f64,
4305 FCVTZS_Nssi, FCVTZS_Nddi>;
4307 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4308 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4309 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32,
4310 int_aarch64_neon_vcvtd_n_u64_f64,
4311 FCVTZU_Nssi, FCVTZU_Nddi>;
4313 // Scalar Integer Add
4314 let isCommutable = 1 in {
4315 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4318 // Scalar Integer Sub
4319 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4321 // Pattern for Scalar Integer Add and Sub with D register only
4322 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4323 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4325 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4326 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4327 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4328 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4329 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4331 // Scalar Integer Saturating Add (Signed, Unsigned)
4332 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4333 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4335 // Scalar Integer Saturating Sub (Signed, Unsigned)
4336 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4337 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4339 // Patterns to match llvm.arm.* intrinsic for
4340 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
4341 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
4342 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
4343 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
4344 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
4346 // Patterns to match llvm.aarch64.* intrinsic for
4347 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
4348 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
4349 SQADDhhh, SQADDsss, SQADDddd>;
4350 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
4351 UQADDhhh, UQADDsss, UQADDddd>;
4352 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
4353 SQSUBhhh, SQSUBsss, SQSUBddd>;
4354 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
4355 UQSUBhhh, UQSUBsss, UQSUBddd>;
4357 // Scalar Integer Saturating Doubling Multiply Half High
4358 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4360 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4361 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4363 // Patterns to match llvm.arm.* intrinsic for
4364 // Scalar Integer Saturating Doubling Multiply Half High and
4365 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4366 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4368 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4371 // Scalar Floating-point Multiply Extended
4372 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4374 // Scalar Floating-point Reciprocal Step
4375 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4377 // Scalar Floating-point Reciprocal Square Root Step
4378 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4380 // Patterns to match llvm.arm.* intrinsic for
4381 // Scalar Floating-point Reciprocal Step and
4382 // Scalar Floating-point Reciprocal Square Root Step
4383 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
4385 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
4388 // Patterns to match llvm.aarch64.* intrinsic for
4389 // Scalar Floating-point Multiply Extended,
4390 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
4392 Instruction INSTD> {
4393 def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4394 (INSTS FPR32:$Rn, FPR32:$Rm)>;
4395 def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4396 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4399 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
4402 // Scalar Integer Shift Left (Signed, Unsigned)
4403 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4404 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4406 // Patterns to match llvm.arm.* intrinsic for
4407 // Scalar Integer Shift Left (Signed, Unsigned)
4408 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4409 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4411 // Patterns to match llvm.aarch64.* intrinsic for
4412 // Scalar Integer Shift Left (Signed, Unsigned)
4413 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4414 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4416 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4417 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4418 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4420 // Patterns to match llvm.aarch64.* intrinsic for
4421 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4422 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4423 SQSHLhhh, SQSHLsss, SQSHLddd>;
4424 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4425 UQSHLhhh, UQSHLsss, UQSHLddd>;
4427 // Patterns to match llvm.arm.* intrinsic for
4428 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
4429 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4430 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4432 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4433 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4434 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4436 // Patterns to match llvm.aarch64.* intrinsic for
4437 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4438 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4439 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4441 // Patterns to match llvm.arm.* intrinsic for
4442 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4443 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4444 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4446 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4447 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4448 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4450 // Patterns to match llvm.aarch64.* intrinsic for
4451 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4452 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
4453 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
4454 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
4455 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
4457 // Patterns to match llvm.arm.* intrinsic for
4458 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4459 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
4460 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
4462 // Signed Saturating Doubling Multiply-Add Long
4463 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
4464 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
4465 SQDMLALshh, SQDMLALdss>;
4467 // Signed Saturating Doubling Multiply-Subtract Long
4468 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
4469 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
4470 SQDMLSLshh, SQDMLSLdss>;
4472 // Signed Saturating Doubling Multiply Long
4473 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
4474 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
4475 SQDMULLshh, SQDMULLdss>;
4477 // Scalar Signed Integer Convert To Floating-point
4478 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
4479 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
4480 int_aarch64_neon_vcvtf64_s64,
4483 // Scalar Unsigned Integer Convert To Floating-point
4484 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
4485 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
4486 int_aarch64_neon_vcvtf64_u64,
4489 // Scalar Floating-point Reciprocal Estimate
4490 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
4491 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
4492 FRECPEss, FRECPEdd>;
4494 // Scalar Floating-point Reciprocal Exponent
4495 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
4496 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
4497 FRECPXss, FRECPXdd>;
4499 // Scalar Floating-point Reciprocal Square Root Estimate
4500 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
4501 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
4502 FRSQRTEss, FRSQRTEdd>;
4504 // Scalar Integer Compare
4506 // Scalar Compare Bitwise Equal
4507 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
4508 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
4510 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
4513 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
4514 (INSTD FPR64:$Rn, FPR64:$Rm)>;
4516 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
4518 // Scalar Compare Signed Greather Than Or Equal
4519 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
4520 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
4522 // Scalar Compare Unsigned Higher Or Same
4523 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
4524 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
4526 // Scalar Compare Unsigned Higher
4527 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
4528 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
4530 // Scalar Compare Signed Greater Than
4531 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
4532 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
4534 // Scalar Compare Bitwise Test Bits
4535 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
4536 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
4537 def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>;
4539 // Scalar Compare Bitwise Equal To Zero
4540 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
4541 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
4544 // Scalar Compare Signed Greather Than Or Equal To Zero
4545 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
4546 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
4549 // Scalar Compare Signed Greater Than Zero
4550 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
4551 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
4554 // Scalar Compare Signed Less Than Or Equal To Zero
4555 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
4556 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
4559 // Scalar Compare Less Than Zero
4560 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
4561 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
4564 // Scalar Floating-point Compare
4566 // Scalar Floating-point Compare Mask Equal
4567 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
4568 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
4569 FCMEQsss, FCMEQddd>;
4571 // Scalar Floating-point Compare Mask Equal To Zero
4572 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
4573 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
4574 FCMEQZssi, FCMEQZddi>;
4576 // Scalar Floating-point Compare Mask Greater Than Or Equal
4577 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
4578 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
4579 FCMGEsss, FCMGEddd>;
4581 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
4582 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
4583 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
4584 FCMGEZssi, FCMGEZddi>;
4586 // Scalar Floating-point Compare Mask Greather Than
4587 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
4588 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
4589 FCMGTsss, FCMGTddd>;
4591 // Scalar Floating-point Compare Mask Greather Than Zero
4592 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
4593 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
4594 FCMGTZssi, FCMGTZddi>;
4596 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
4597 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
4598 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
4599 FCMLEZssi, FCMLEZddi>;
4601 // Scalar Floating-point Compare Mask Less Than Zero
4602 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
4603 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
4604 FCMLTZssi, FCMLTZddi>;
4606 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
4607 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
4608 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
4609 FACGEsss, FACGEddd>;
4611 // Scalar Floating-point Absolute Compare Mask Greater Than
4612 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
4613 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
4614 FACGTsss, FACGTddd>;
4616 // Scalar Absolute Value
4617 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
4618 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
4620 // Scalar Signed Saturating Absolute Value
4621 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
4622 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
4623 SQABSbb, SQABShh, SQABSss, SQABSdd>;
4626 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
4627 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
4629 // Scalar Signed Saturating Negate
4630 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
4631 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
4632 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
4634 // Scalar Signed Saturating Accumulated of Unsigned Value
4635 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
4636 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
4638 SUQADDss, SUQADDdd>;
4640 // Scalar Unsigned Saturating Accumulated of Signed Value
4641 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
4642 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
4644 USQADDss, USQADDdd>;
4646 // Scalar Signed Saturating Extract Unsigned Narrow
4647 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
4648 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
4652 // Scalar Signed Saturating Extract Narrow
4653 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
4654 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
4658 // Scalar Unsigned Saturating Extract Narrow
4659 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
4660 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
4664 // Scalar Reduce Pairwise
4666 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
4667 string asmop, bit Commutable = 0> {
4668 let isCommutable = Commutable in {
4669 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
4670 (outs FPR64:$Rd), (ins VPR128:$Rn),
4671 !strconcat(asmop, "\t$Rd, $Rn.2d"),
4677 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
4678 string asmop, bit Commutable = 0>
4679 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
4680 let isCommutable = Commutable in {
4681 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
4682 (outs FPR32:$Rd), (ins VPR64:$Rn),
4683 !strconcat(asmop, "\t$Rd, $Rn.2s"),
4689 // Scalar Reduce Addition Pairwise (Integer) with
4690 // Pattern to match llvm.arm.* intrinsic
4691 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
4693 // Pattern to match llvm.aarch64.* intrinsic for
4694 // Scalar Reduce Addition Pairwise (Integer)
4695 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
4696 (ADDPvv_D_2D VPR128:$Rn)>;
4698 // Scalar Reduce Addition Pairwise (Floating Point)
4699 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
4701 // Scalar Reduce Maximum Pairwise (Floating Point)
4702 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
4704 // Scalar Reduce Minimum Pairwise (Floating Point)
4705 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
4707 // Scalar Reduce maxNum Pairwise (Floating Point)
4708 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
4710 // Scalar Reduce minNum Pairwise (Floating Point)
4711 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
4713 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
4714 SDPatternOperator opnodeD,
4716 Instruction INSTD> {
4717 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
4719 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
4720 (INSTD VPR128:$Rn)>;
4723 // Patterns to match llvm.aarch64.* intrinsic for
4724 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
4725 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
4726 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
4728 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
4729 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
4731 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
4732 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
4734 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
4735 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
4737 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
4738 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
4740 def neon_uimm0_bare : Operand<i64>,
4741 ImmLeaf<i64, [{return Imm == 0;}]> {
4742 let ParserMatchClass = neon_uimm0_asmoperand;
4743 let PrintMethod = "printUImmBareOperand";
4746 def neon_uimm1_bare : Operand<i64>,
4747 ImmLeaf<i64, [{return Imm < 2;}]> {
4748 let ParserMatchClass = neon_uimm1_asmoperand;
4749 let PrintMethod = "printUImmBareOperand";
4752 def neon_uimm2_bare : Operand<i64>,
4753 ImmLeaf<i64, [{return Imm < 4;}]> {
4754 let ParserMatchClass = neon_uimm2_asmoperand;
4755 let PrintMethod = "printUImmBareOperand";
4758 def neon_uimm3_bare : Operand<i64>,
4759 ImmLeaf<i64, [{return Imm < 8;}]> {
4760 let ParserMatchClass = uimm3_asmoperand;
4761 let PrintMethod = "printUImmBareOperand";
4764 def neon_uimm4_bare : Operand<i64>,
4765 ImmLeaf<i64, [{return Imm < 16;}]> {
4766 let ParserMatchClass = uimm4_asmoperand;
4767 let PrintMethod = "printUImmBareOperand";
4771 // Scalar by element Arithmetic
4773 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
4774 string rmlane, bit u, bit szhi, bit szlo,
4775 RegisterClass ResFPR, RegisterClass OpFPR,
4776 RegisterOperand OpVPR, Operand OpImm>
4777 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
4779 (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
4780 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
4787 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
4789 bit u, bit szhi, bit szlo,
4790 RegisterClass ResFPR,
4791 RegisterClass OpFPR,
4792 RegisterOperand OpVPR,
4794 : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
4796 (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
4797 asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
4800 let Constraints = "$src = $Rd";
4805 // Scalar Floating Point multiply (scalar, by element)
4806 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
4807 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4808 let Inst{11} = Imm{1}; // h
4809 let Inst{21} = Imm{0}; // l
4810 let Inst{20-16} = MRm;
4812 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
4813 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4814 let Inst{11} = Imm{0}; // h
4815 let Inst{21} = 0b0; // l
4816 let Inst{20-16} = MRm;
4819 // Scalar Floating Point multiply extended (scalar, by element)
4820 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
4821 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4822 let Inst{11} = Imm{1}; // h
4823 let Inst{21} = Imm{0}; // l
4824 let Inst{20-16} = MRm;
4826 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
4827 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4828 let Inst{11} = Imm{0}; // h
4829 let Inst{21} = 0b0; // l
4830 let Inst{20-16} = MRm;
4833 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
4834 SDPatternOperator opnode,
4836 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
4837 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
4839 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
4840 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
4841 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4843 def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
4844 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
4845 (ResTy (INST (ResTy FPRC:$Rn),
4846 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4850 def : Pat<(ResTy (opnode
4851 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4853 (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4855 def : Pat<(ResTy (opnode
4856 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4858 (ResTy (INST (ResTy FPRC:$Rn),
4859 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4863 // Patterns for Scalar Floating Point multiply (scalar, by element)
4864 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
4865 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
4866 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
4867 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
4869 // Patterns for Scalar Floating Point multiply extended (scalar, by element)
4870 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
4871 FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
4872 v2f32, v4f32, neon_uimm1_bare>;
4873 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
4874 FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
4875 v1f64, v2f64, neon_uimm0_bare>;
4878 // Scalar Floating Point fused multiply-add (scalar, by element)
4879 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
4880 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4881 let Inst{11} = Imm{1}; // h
4882 let Inst{21} = Imm{0}; // l
4883 let Inst{20-16} = MRm;
4885 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
4886 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4887 let Inst{11} = Imm{0}; // h
4888 let Inst{21} = 0b0; // l
4889 let Inst{20-16} = MRm;
4892 // Scalar Floating Point fused multiply-subtract (scalar, by element)
4893 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
4894 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
4895 let Inst{11} = Imm{1}; // h
4896 let Inst{21} = Imm{0}; // l
4897 let Inst{20-16} = MRm;
4899 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
4900 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
4901 let Inst{11} = Imm{0}; // h
4902 let Inst{21} = 0b0; // l
4903 let Inst{20-16} = MRm;
4905 // We are allowed to match the fma instruction regardless of compile options.
4906 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
4907 Instruction FMLAI, Instruction FMLSI,
4908 ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
4909 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
4911 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4912 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4914 (ResTy (FMLAI (ResTy FPRC:$Ra),
4915 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4917 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4918 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4920 (ResTy (FMLAI (ResTy FPRC:$Ra),
4922 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4925 // swapped fmla operands
4926 def : Pat<(ResTy (fma
4927 (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
4930 (ResTy (FMLAI (ResTy FPRC:$Ra),
4931 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4933 def : Pat<(ResTy (fma
4934 (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
4937 (ResTy (FMLAI (ResTy FPRC:$Ra),
4939 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4943 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4944 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
4946 (ResTy (FMLSI (ResTy FPRC:$Ra),
4947 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4949 def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
4950 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
4952 (ResTy (FMLSI (ResTy FPRC:$Ra),
4954 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4957 // swapped fmls operands
4958 def : Pat<(ResTy (fma
4959 (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
4962 (ResTy (FMLSI (ResTy FPRC:$Ra),
4963 (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
4965 def : Pat<(ResTy (fma
4966 (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
4969 (ResTy (FMLSI (ResTy FPRC:$Ra),
4971 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
4975 // Scalar Floating Point fused multiply-add and multiply-subtract (scalar, by element)
4976 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
4977 f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
4978 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
4979 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
4980 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
4981 f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
4983 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
4984 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4985 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
4986 let Inst{11} = 0b0; // h
4987 let Inst{21} = Imm{1}; // l
4988 let Inst{20} = Imm{0}; // m
4989 let Inst{19-16} = MRm{3-0};
4991 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4992 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
4993 let Inst{11} = Imm{2}; // h
4994 let Inst{21} = Imm{1}; // l
4995 let Inst{20} = Imm{0}; // m
4996 let Inst{19-16} = MRm{3-0};
4998 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
4999 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5000 let Inst{11} = 0b0; // h
5001 let Inst{21} = Imm{0}; // l
5002 let Inst{20-16} = MRm;
5004 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5005 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5006 let Inst{11} = Imm{1}; // h
5007 let Inst{21} = Imm{0}; // l
5008 let Inst{20-16} = MRm;
5011 // Scalar Signed saturating doubling
5012 // multiply-subtract long (scalar, by element)
5013 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5014 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5015 let Inst{11} = 0b0; // h
5016 let Inst{21} = Imm{1}; // l
5017 let Inst{20} = Imm{0}; // m
5018 let Inst{19-16} = MRm{3-0};
5020 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5021 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5022 let Inst{11} = Imm{2}; // h
5023 let Inst{21} = Imm{1}; // l
5024 let Inst{20} = Imm{0}; // m
5025 let Inst{19-16} = MRm{3-0};
5027 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5028 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5029 let Inst{11} = 0b0; // h
5030 let Inst{21} = Imm{0}; // l
5031 let Inst{20-16} = MRm;
5033 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5034 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5035 let Inst{11} = Imm{1}; // h
5036 let Inst{21} = Imm{0}; // l
5037 let Inst{20-16} = MRm;
5040 // Scalar Signed saturating doubling multiply long (scalar, by element)
5041 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5042 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5043 let Inst{11} = 0b0; // h
5044 let Inst{21} = Imm{1}; // l
5045 let Inst{20} = Imm{0}; // m
5046 let Inst{19-16} = MRm{3-0};
5048 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5049 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5050 let Inst{11} = Imm{2}; // h
5051 let Inst{21} = Imm{1}; // l
5052 let Inst{20} = Imm{0}; // m
5053 let Inst{19-16} = MRm{3-0};
5055 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5056 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5057 let Inst{11} = 0b0; // h
5058 let Inst{21} = Imm{0}; // l
5059 let Inst{20-16} = MRm;
5061 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5062 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5063 let Inst{11} = Imm{1}; // h
5064 let Inst{21} = Imm{0}; // l
5065 let Inst{20-16} = MRm;
5068 // Scalar Signed saturating doubling multiply returning
5069 // high half (scalar, by element)
5070 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5071 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5072 let Inst{11} = 0b0; // h
5073 let Inst{21} = Imm{1}; // l
5074 let Inst{20} = Imm{0}; // m
5075 let Inst{19-16} = MRm{3-0};
5077 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5078 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5079 let Inst{11} = Imm{2}; // h
5080 let Inst{21} = Imm{1}; // l
5081 let Inst{20} = Imm{0}; // m
5082 let Inst{19-16} = MRm{3-0};
5084 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5085 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5086 let Inst{11} = 0b0; // h
5087 let Inst{21} = Imm{0}; // l
5088 let Inst{20-16} = MRm;
5090 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5091 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5092 let Inst{11} = Imm{1}; // h
5093 let Inst{21} = Imm{0}; // l
5094 let Inst{20-16} = MRm;
5097 // Scalar Signed saturating rounding doubling multiply
5098 // returning high half (scalar, by element)
5099 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5100 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5101 let Inst{11} = 0b0; // h
5102 let Inst{21} = Imm{1}; // l
5103 let Inst{20} = Imm{0}; // m
5104 let Inst{19-16} = MRm{3-0};
5106 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5107 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5108 let Inst{11} = Imm{2}; // h
5109 let Inst{21} = Imm{1}; // l
5110 let Inst{20} = Imm{0}; // m
5111 let Inst{19-16} = MRm{3-0};
5113 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5114 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5115 let Inst{11} = 0b0; // h
5116 let Inst{21} = Imm{0}; // l
5117 let Inst{20-16} = MRm;
5119 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5120 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5121 let Inst{11} = Imm{1}; // h
5122 let Inst{21} = Imm{0}; // l
5123 let Inst{20-16} = MRm;
5127 // Scalar Copy - DUP element to scalar
5128 class NeonI_Scalar_DUP<string asmop, string asmlane,
5129 RegisterClass ResRC, RegisterOperand VPRC,
5131 : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
5132 asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
5138 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
5139 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5141 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
5142 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5144 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
5145 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5147 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
5148 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5151 multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
5152 ValueType OpTy, Operand OpImm,
5153 ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5154 def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5155 (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5157 def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5159 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5163 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh,
5164 ValueType ResTy, ValueType OpTy> {
5165 def : Pat<(ResTy (GetLow VPR128:$Rn)),
5166 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 0))>;
5167 def : Pat<(ResTy (GetHigh VPR128:$Rn)),
5168 (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
5171 defm : NeonI_SDUP<Neon_low16B, Neon_High16B, v8i8, v16i8>;
5172 defm : NeonI_SDUP<Neon_low8H, Neon_High8H, v4i16, v8i16>;
5173 defm : NeonI_SDUP<Neon_low4S, Neon_High4S, v2i32, v4i32>;
5174 defm : NeonI_SDUP<Neon_low2D, Neon_High2D, v1i64, v2i64>;
5175 defm : NeonI_SDUP<Neon_low4f, Neon_High4f, v2f32, v4f32>;
5176 defm : NeonI_SDUP<Neon_low2d, Neon_High2d, v1f64, v2f64>;
5178 // Patterns for vector extract of FP data using scalar DUP instructions
5179 defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
5180 v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5181 defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
5182 v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5184 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
5185 Instruction DUPI, Operand OpImm,
5186 RegisterClass ResRC> {
5187 def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn." # asmlane # "[$Imm]"),
5188 (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
5191 // Aliases for Scalar copy - DUP element (scalar)
5192 // FIXME: This is actually the preferred syntax but TableGen can't deal with
5193 // custom printing of aliases.
5194 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
5195 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
5196 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
5197 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
5200 //===----------------------------------------------------------------------===//
5201 // Non-Instruction Patterns
5202 //===----------------------------------------------------------------------===//
5204 // 64-bit vector bitcasts...
5206 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
5207 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
5208 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
5209 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
5211 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
5212 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
5213 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
5214 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
5216 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
5217 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
5218 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
5219 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
5221 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
5222 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
5223 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
5224 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
5226 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
5227 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
5228 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
5229 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
5231 // ..and 128-bit vector bitcasts...
5233 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
5234 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
5235 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
5236 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
5237 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
5239 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
5240 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
5241 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
5242 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
5243 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
5245 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
5246 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
5247 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
5248 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
5249 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
5251 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
5252 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
5253 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
5254 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
5255 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
5257 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
5258 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
5259 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
5260 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
5261 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
5263 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
5264 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
5265 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
5266 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
5267 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
5270 // ...and scalar bitcasts...
5271 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
5272 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
5273 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
5274 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
5275 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
5277 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
5278 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
5280 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
5281 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
5282 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
5284 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
5285 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
5286 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
5287 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
5288 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
5290 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
5291 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
5292 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
5293 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
5294 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
5295 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
5297 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
5298 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
5299 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
5300 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
5301 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
5303 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
5304 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
5306 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
5307 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
5308 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
5309 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
5310 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
5312 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
5313 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
5314 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
5315 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
5316 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
5317 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
5319 def neon_uimm3 : Operand<i64>,
5320 ImmLeaf<i64, [{return Imm < 8;}]> {
5321 let ParserMatchClass = uimm3_asmoperand;
5322 let PrintMethod = "printUImmHexOperand";
5325 def neon_uimm4 : Operand<i64>,
5326 ImmLeaf<i64, [{return Imm < 16;}]> {
5327 let ParserMatchClass = uimm4_asmoperand;
5328 let PrintMethod = "printUImmHexOperand";
5332 class NeonI_Extract<bit q, bits<2> op2, string asmop,
5333 string OpS, RegisterOperand OpVPR, Operand OpImm>
5334 : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
5335 (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
5336 asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
5337 ", $Rm." # OpS # ", $Index",
5343 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
5344 VPR64, neon_uimm3> {
5345 let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
5348 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
5349 VPR128, neon_uimm4> {
5350 let Inst{14-11} = Index;
5353 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
5355 : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
5357 (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
5359 def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
5360 def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
5361 def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
5362 def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
5363 def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
5364 def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
5365 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
5366 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
5367 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
5368 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
5369 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
5370 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
5373 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
5374 string asmop, string OpS, RegisterOperand OpVPR,
5375 RegisterOperand VecList>
5376 : NeonI_TBL<q, op2, len, op,
5377 (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
5378 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
5382 // The vectors in look up table are always 16b
5383 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
5384 def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
5385 !cast<RegisterOperand>(List # "16B_operand")>;
5387 def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
5388 !cast<RegisterOperand>(List # "16B_operand")>;
5391 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
5392 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
5393 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
5394 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
5396 // Table lookup extention
5397 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
5398 string asmop, string OpS, RegisterOperand OpVPR,
5399 RegisterOperand VecList>
5400 : NeonI_TBL<q, op2, len, op,
5401 (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
5402 asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
5405 let Constraints = "$src = $Rd";
5408 // The vectors in look up table are always 16b
5409 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
5410 def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
5411 !cast<RegisterOperand>(List # "16B_operand")>;
5413 def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
5414 !cast<RegisterOperand>(List # "16B_operand")>;
5417 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
5418 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
5419 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
5420 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
5422 // The followings are for instruction class (3V Elem)
5426 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
5427 string asmop, string ResS, string OpS, string EleOpS,
5428 Operand OpImm, RegisterOperand ResVPR,
5429 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
5430 : NeonI_2VElem<q, u, size, opcode,
5431 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
5432 EleOpVPR:$Re, OpImm:$Index),
5433 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
5434 ", $Re." # EleOpS # "[$Index]",
5440 let Constraints = "$src = $Rd";
5443 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
5444 // vector register class for element is always 128-bit to cover the max index
5445 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5446 neon_uimm2_bare, VPR64, VPR64, VPR128> {
5447 let Inst{11} = {Index{1}};
5448 let Inst{21} = {Index{0}};
5449 let Inst{20-16} = Re;
5452 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5453 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5454 let Inst{11} = {Index{1}};
5455 let Inst{21} = {Index{0}};
5456 let Inst{20-16} = Re;
5459 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5460 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
5461 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
5462 let Inst{11} = {Index{2}};
5463 let Inst{21} = {Index{1}};
5464 let Inst{20} = {Index{0}};
5465 let Inst{19-16} = Re{3-0};
5468 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
5469 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5470 let Inst{11} = {Index{2}};
5471 let Inst{21} = {Index{1}};
5472 let Inst{20} = {Index{0}};
5473 let Inst{19-16} = Re{3-0};
5477 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
5478 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
5480 // Pattern for lane in 128-bit vector
5481 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5482 RegisterOperand ResVPR, RegisterOperand OpVPR,
5483 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
5484 ValueType EleOpTy, SDPatternOperator coreop>
5485 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
5486 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5487 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5489 // Pattern for lane in 64-bit vector
5490 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5491 RegisterOperand ResVPR, RegisterOperand OpVPR,
5492 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
5493 ValueType EleOpTy, SDPatternOperator coreop>
5494 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
5495 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5496 (INST ResVPR:$src, OpVPR:$Rn,
5497 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5499 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
5501 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5502 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
5503 BinOpFrag<(Neon_vduplane
5504 (Neon_low4S node:$LHS), node:$RHS)>>;
5506 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5507 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
5508 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5510 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
5511 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
5512 BinOpFrag<(Neon_vduplane
5513 (Neon_low8H node:$LHS), node:$RHS)>>;
5515 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
5516 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
5517 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5519 // Index can only be half of the max value for lane in 64-bit vector
5521 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5522 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
5523 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5525 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5526 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
5527 BinOpFrag<(Neon_vduplane
5528 (Neon_combine_4S node:$LHS, undef),
5531 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
5532 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
5533 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5535 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
5536 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
5537 BinOpFrag<(Neon_vduplane
5538 (Neon_combine_8H node:$LHS, undef),
5542 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
5543 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
5545 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
5546 string asmop, string ResS, string OpS, string EleOpS,
5547 Operand OpImm, RegisterOperand ResVPR,
5548 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
5549 : NeonI_2VElem<q, u, size, opcode,
5550 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
5551 EleOpVPR:$Re, OpImm:$Index),
5552 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
5553 ", $Re." # EleOpS # "[$Index]",
5560 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
5561 // vector register class for element is always 128-bit to cover the max index
5562 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5563 neon_uimm2_bare, VPR64, VPR64, VPR128> {
5564 let Inst{11} = {Index{1}};
5565 let Inst{21} = {Index{0}};
5566 let Inst{20-16} = Re;
5569 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5570 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5571 let Inst{11} = {Index{1}};
5572 let Inst{21} = {Index{0}};
5573 let Inst{20-16} = Re;
5576 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5577 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
5578 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
5579 let Inst{11} = {Index{2}};
5580 let Inst{21} = {Index{1}};
5581 let Inst{20} = {Index{0}};
5582 let Inst{19-16} = Re{3-0};
5585 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
5586 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5587 let Inst{11} = {Index{2}};
5588 let Inst{21} = {Index{1}};
5589 let Inst{20} = {Index{0}};
5590 let Inst{19-16} = Re{3-0};
5594 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
5595 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
5596 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
5598 // Pattern for lane in 128-bit vector
5599 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5600 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5601 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5602 SDPatternOperator coreop>
5603 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5604 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5605 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5607 // Pattern for lane in 64-bit vector
5608 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5609 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5610 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5611 SDPatternOperator coreop>
5612 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5613 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5615 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5617 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
5618 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5619 op, VPR64, VPR128, v2i32, v2i32, v4i32,
5620 BinOpFrag<(Neon_vduplane
5621 (Neon_low4S node:$LHS), node:$RHS)>>;
5623 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5624 op, VPR128, VPR128, v4i32, v4i32, v4i32,
5625 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5627 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
5628 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
5629 BinOpFrag<(Neon_vduplane
5630 (Neon_low8H node:$LHS), node:$RHS)>>;
5632 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
5633 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
5634 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5636 // Index can only be half of the max value for lane in 64-bit vector
5638 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5639 op, VPR64, VPR64, v2i32, v2i32, v2i32,
5640 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5642 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5643 op, VPR128, VPR64, v4i32, v4i32, v2i32,
5644 BinOpFrag<(Neon_vduplane
5645 (Neon_combine_4S node:$LHS, undef),
5648 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
5649 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
5650 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5652 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
5653 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
5654 BinOpFrag<(Neon_vduplane
5655 (Neon_combine_8H node:$LHS, undef),
5659 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
5660 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
5661 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
5665 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
5666 // vector register class for element is always 128-bit to cover the max index
5667 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5668 neon_uimm2_bare, VPR64, VPR64, VPR128> {
5669 let Inst{11} = {Index{1}};
5670 let Inst{21} = {Index{0}};
5671 let Inst{20-16} = Re;
5674 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5675 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5676 let Inst{11} = {Index{1}};
5677 let Inst{21} = {Index{0}};
5678 let Inst{20-16} = Re;
5681 // _1d2d doesn't exist!
5683 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
5684 neon_uimm1_bare, VPR128, VPR128, VPR128> {
5685 let Inst{11} = {Index{0}};
5687 let Inst{20-16} = Re;
5691 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
5692 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
5694 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
5695 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
5696 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
5697 SDPatternOperator coreop>
5698 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
5699 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
5701 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
5703 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
5704 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
5705 op, VPR64, VPR128, v2f32, v2f32, v4f32,
5706 BinOpFrag<(Neon_vduplane
5707 (Neon_low4f node:$LHS), node:$RHS)>>;
5709 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
5710 op, VPR128, VPR128, v4f32, v4f32, v4f32,
5711 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5713 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
5714 op, VPR128, VPR128, v2f64, v2f64, v2f64,
5715 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5717 // Index can only be half of the max value for lane in 64-bit vector
5719 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
5720 op, VPR64, VPR64, v2f32, v2f32, v2f32,
5721 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5723 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
5724 op, VPR128, VPR64, v4f32, v4f32, v2f32,
5725 BinOpFrag<(Neon_vduplane
5726 (Neon_combine_4f node:$LHS, undef),
5729 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
5730 op, VPR128, VPR64, v2f64, v2f64, v1f64,
5731 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
5734 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
5735 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
5737 // The followings are patterns using fma
5738 // -ffp-contract=fast generates fma
5740 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
5741 // vector register class for element is always 128-bit to cover the max index
5742 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
5743 neon_uimm2_bare, VPR64, VPR64, VPR128> {
5744 let Inst{11} = {Index{1}};
5745 let Inst{21} = {Index{0}};
5746 let Inst{20-16} = Re;
5749 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
5750 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5751 let Inst{11} = {Index{1}};
5752 let Inst{21} = {Index{0}};
5753 let Inst{20-16} = Re;
5756 // _1d2d doesn't exist!
5758 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
5759 neon_uimm1_bare, VPR128, VPR128, VPR128> {
5760 let Inst{11} = {Index{0}};
5762 let Inst{20-16} = Re;
5766 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
5767 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
5769 // Pattern for lane in 128-bit vector
5770 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5771 RegisterOperand ResVPR, RegisterOperand OpVPR,
5772 ValueType ResTy, ValueType OpTy,
5773 SDPatternOperator coreop>
5774 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
5775 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
5776 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
5778 // Pattern for lane in 64-bit vector
5779 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5780 RegisterOperand ResVPR, RegisterOperand OpVPR,
5781 ValueType ResTy, ValueType OpTy,
5782 SDPatternOperator coreop>
5783 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
5784 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
5785 (INST ResVPR:$src, ResVPR:$Rn,
5786 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
5788 // Pattern for lane in 64-bit vector
5789 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
5790 SDPatternOperator op,
5791 RegisterOperand ResVPR, RegisterOperand OpVPR,
5792 ValueType ResTy, ValueType OpTy,
5793 SDPatternOperator coreop>
5794 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
5795 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
5796 (INST ResVPR:$src, ResVPR:$Rn,
5797 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
5800 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
5801 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5802 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5803 BinOpFrag<(Neon_vduplane
5804 (Neon_low4f node:$LHS), node:$RHS)>>;
5806 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5807 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5808 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5810 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5811 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5812 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5814 // Index can only be half of the max value for lane in 64-bit vector
5816 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5817 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5818 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5820 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5821 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5822 BinOpFrag<(Neon_vduplane
5823 (Neon_combine_4f node:$LHS, undef),
5826 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5827 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5828 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
5831 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
5833 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
5835 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5836 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5837 BinOpFrag<(fneg (Neon_vduplane
5838 (Neon_low4f node:$LHS), node:$RHS))>>;
5840 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
5841 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
5842 BinOpFrag<(Neon_vduplane
5843 (Neon_low4f (fneg node:$LHS)),
5846 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5847 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5848 BinOpFrag<(fneg (Neon_vduplane
5849 node:$LHS, node:$RHS))>>;
5851 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
5852 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
5853 BinOpFrag<(Neon_vduplane
5854 (fneg node:$LHS), node:$RHS)>>;
5856 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5857 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5858 BinOpFrag<(fneg (Neon_vduplane
5859 node:$LHS, node:$RHS))>>;
5861 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
5862 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
5863 BinOpFrag<(Neon_vduplane
5864 (fneg node:$LHS), node:$RHS)>>;
5866 // Index can only be half of the max value for lane in 64-bit vector
5868 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5869 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5870 BinOpFrag<(fneg (Neon_vduplane
5871 node:$LHS, node:$RHS))>>;
5873 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
5874 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
5875 BinOpFrag<(Neon_vduplane
5876 (fneg node:$LHS), node:$RHS)>>;
5878 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5879 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5880 BinOpFrag<(fneg (Neon_vduplane
5881 (Neon_combine_4f node:$LHS, undef),
5884 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
5885 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
5886 BinOpFrag<(Neon_vduplane
5887 (Neon_combine_4f (fneg node:$LHS), undef),
5890 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5891 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5892 BinOpFrag<(fneg (Neon_combine_2d
5893 node:$LHS, node:$RHS))>>;
5895 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
5896 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
5897 BinOpFrag<(Neon_combine_2d
5898 (fneg node:$LHS), (fneg node:$RHS))>>;
5901 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
5903 // Variant 3: Long type
5904 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
5905 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
5907 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
5908 // vector register class for element is always 128-bit to cover the max index
5909 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
5910 neon_uimm2_bare, VPR128, VPR64, VPR128> {
5911 let Inst{11} = {Index{1}};
5912 let Inst{21} = {Index{0}};
5913 let Inst{20-16} = Re;
5916 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
5917 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5918 let Inst{11} = {Index{1}};
5919 let Inst{21} = {Index{0}};
5920 let Inst{20-16} = Re;
5923 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5924 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
5925 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5926 let Inst{11} = {Index{2}};
5927 let Inst{21} = {Index{1}};
5928 let Inst{20} = {Index{0}};
5929 let Inst{19-16} = Re{3-0};
5932 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
5933 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
5934 let Inst{11} = {Index{2}};
5935 let Inst{21} = {Index{1}};
5936 let Inst{20} = {Index{0}};
5937 let Inst{19-16} = Re{3-0};
5941 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
5942 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
5943 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
5944 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
5945 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
5946 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
5948 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
5949 // vector register class for element is always 128-bit to cover the max index
5950 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
5951 neon_uimm2_bare, VPR128, VPR64, VPR128> {
5952 let Inst{11} = {Index{1}};
5953 let Inst{21} = {Index{0}};
5954 let Inst{20-16} = Re;
5957 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
5958 neon_uimm2_bare, VPR128, VPR128, VPR128> {
5959 let Inst{11} = {Index{1}};
5960 let Inst{21} = {Index{0}};
5961 let Inst{20-16} = Re;
5964 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
5965 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
5966 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
5967 let Inst{11} = {Index{2}};
5968 let Inst{21} = {Index{1}};
5969 let Inst{20} = {Index{0}};
5970 let Inst{19-16} = Re{3-0};
5973 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
5974 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
5975 let Inst{11} = {Index{2}};
5976 let Inst{21} = {Index{1}};
5977 let Inst{20} = {Index{0}};
5978 let Inst{19-16} = Re{3-0};
5982 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
5983 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
5984 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
5986 // Pattern for lane in 128-bit vector
5987 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
5988 RegisterOperand EleOpVPR, ValueType ResTy,
5989 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5990 SDPatternOperator hiop, SDPatternOperator coreop>
5991 : Pat<(ResTy (op (ResTy VPR128:$src),
5992 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5993 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5994 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5996 // Pattern for lane in 64-bit vector
5997 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5998 RegisterOperand EleOpVPR, ValueType ResTy,
5999 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
6000 SDPatternOperator hiop, SDPatternOperator coreop>
6001 : Pat<(ResTy (op (ResTy VPR128:$src),
6002 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
6003 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
6004 (INST VPR128:$src, VPR128:$Rn,
6005 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
6007 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
6008 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
6009 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
6010 BinOpFrag<(Neon_vduplane
6011 (Neon_low8H node:$LHS), node:$RHS)>>;
6013 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
6014 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
6015 BinOpFrag<(Neon_vduplane
6016 (Neon_low4S node:$LHS), node:$RHS)>>;
6018 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
6019 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
6020 BinOpFrag<(Neon_vduplane
6021 (Neon_low8H node:$LHS), node:$RHS)>>;
6023 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
6024 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
6025 BinOpFrag<(Neon_vduplane
6026 (Neon_low4S node:$LHS), node:$RHS)>>;
6028 // Index can only be half of the max value for lane in 64-bit vector
6030 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
6031 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
6032 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6034 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
6035 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
6036 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6038 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
6039 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
6040 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6042 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
6043 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
6044 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6047 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
6048 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
6049 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
6050 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
6052 // Pattern for lane in 128-bit vector
6053 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
6054 RegisterOperand EleOpVPR, ValueType ResTy,
6055 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
6056 SDPatternOperator hiop, SDPatternOperator coreop>
6058 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
6059 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
6060 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
6062 // Pattern for lane in 64-bit vector
6063 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
6064 RegisterOperand EleOpVPR, ValueType ResTy,
6065 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
6066 SDPatternOperator hiop, SDPatternOperator coreop>
6068 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
6069 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
6071 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
6073 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
6074 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
6075 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
6076 BinOpFrag<(Neon_vduplane
6077 (Neon_low8H node:$LHS), node:$RHS)>>;
6079 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
6080 op, VPR64, VPR128, v2i64, v2i32, v4i32,
6081 BinOpFrag<(Neon_vduplane
6082 (Neon_low4S node:$LHS), node:$RHS)>>;
6084 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
6085 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
6087 BinOpFrag<(Neon_vduplane
6088 (Neon_low8H node:$LHS), node:$RHS)>>;
6090 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
6091 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
6092 BinOpFrag<(Neon_vduplane
6093 (Neon_low4S node:$LHS), node:$RHS)>>;
6095 // Index can only be half of the max value for lane in 64-bit vector
6097 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
6098 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
6099 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6101 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
6102 op, VPR64, VPR64, v2i64, v2i32, v2i32,
6103 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6105 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
6106 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
6107 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6109 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
6110 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
6111 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6114 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
6115 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
6116 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
6118 multiclass NI_qdma<SDPatternOperator op> {
6119 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
6121 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
6123 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
6125 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
6128 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
6129 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
6131 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
6132 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
6133 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
6134 v4i32, v4i16, v8i16,
6135 BinOpFrag<(Neon_vduplane
6136 (Neon_low8H node:$LHS), node:$RHS)>>;
6138 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
6139 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
6140 v2i64, v2i32, v4i32,
6141 BinOpFrag<(Neon_vduplane
6142 (Neon_low4S node:$LHS), node:$RHS)>>;
6144 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
6145 !cast<PatFrag>(op # "_4s"), VPR128Lo,
6146 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
6147 BinOpFrag<(Neon_vduplane
6148 (Neon_low8H node:$LHS), node:$RHS)>>;
6150 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
6151 !cast<PatFrag>(op # "_2d"), VPR128,
6152 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
6153 BinOpFrag<(Neon_vduplane
6154 (Neon_low4S node:$LHS), node:$RHS)>>;
6156 // Index can only be half of the max value for lane in 64-bit vector
6158 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
6159 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
6160 v4i32, v4i16, v4i16,
6161 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6163 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
6164 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
6165 v2i64, v2i32, v2i32,
6166 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6168 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
6169 !cast<PatFrag>(op # "_4s"), VPR64Lo,
6170 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
6171 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6173 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
6174 !cast<PatFrag>(op # "_2d"), VPR64,
6175 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
6176 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
6179 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
6180 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
6182 // End of implementation for instruction class (3V Elem)
6184 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6185 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6186 : NeonI_copy<0b1, 0b0, 0b0011,
6187 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6188 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6189 [(set (ResTy VPR128:$Rd),
6190 (ResTy (vector_insert
6191 (ResTy VPR128:$src),
6196 let Constraints = "$src = $Rd";
6199 //Insert element (vector, from main)
6200 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6202 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6204 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6206 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6208 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6210 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6212 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6214 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6217 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6218 (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6219 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6220 (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6221 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6222 (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6223 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6224 (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6226 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6227 RegisterClass OpGPR, ValueType OpTy,
6228 Operand OpImm, Instruction INS>
6229 : Pat<(ResTy (vector_insert
6233 (ResTy (EXTRACT_SUBREG
6234 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6235 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6237 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6238 neon_uimm3_bare, INSbw>;
6239 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6240 neon_uimm2_bare, INShw>;
6241 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6242 neon_uimm1_bare, INSsw>;
6243 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6244 neon_uimm0_bare, INSdx>;
6246 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6247 : NeonI_insert<0b1, 0b1,
6248 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6249 ResImm:$Immd, ResImm:$Immn),
6250 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6253 let Constraints = "$src = $Rd";
6258 //Insert element (vector, from element)
6259 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6260 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6261 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6263 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6264 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6265 let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6266 // bit 11 is unspecified, but should be set to zero.
6268 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6269 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6270 let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6271 // bits 11-12 are unspecified, but should be set to zero.
6273 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6274 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6275 let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6276 // bits 11-13 are unspecified, but should be set to zero.
6279 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6280 (INSELb VPR128:$Rd, VPR128:$Rn,
6281 neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6282 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6283 (INSELh VPR128:$Rd, VPR128:$Rn,
6284 neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6285 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6286 (INSELs VPR128:$Rd, VPR128:$Rn,
6287 neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6288 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6289 (INSELd VPR128:$Rd, VPR128:$Rn,
6290 neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6292 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6293 ValueType MidTy, Operand StImm, Operand NaImm,
6295 def : Pat<(ResTy (vector_insert
6296 (ResTy VPR128:$src),
6297 (MidTy (vector_extract
6301 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6302 StImm:$Immd, StImm:$Immn)>;
6304 def : Pat <(ResTy (vector_insert
6305 (ResTy VPR128:$src),
6306 (MidTy (vector_extract
6310 (INS (ResTy VPR128:$src),
6311 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6312 StImm:$Immd, NaImm:$Immn)>;
6314 def : Pat <(NaTy (vector_insert
6316 (MidTy (vector_extract
6320 (NaTy (EXTRACT_SUBREG
6322 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6324 NaImm:$Immd, StImm:$Immn)),
6327 def : Pat <(NaTy (vector_insert
6329 (MidTy (vector_extract
6333 (NaTy (EXTRACT_SUBREG
6335 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6336 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6337 NaImm:$Immd, NaImm:$Immn)),
6341 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6342 neon_uimm1_bare, INSELs>;
6343 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6344 neon_uimm0_bare, INSELd>;
6345 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6346 neon_uimm3_bare, INSELb>;
6347 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6348 neon_uimm2_bare, INSELh>;
6349 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6350 neon_uimm1_bare, INSELs>;
6351 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6352 neon_uimm0_bare, INSELd>;
6354 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6356 RegisterClass OpFPR, Operand ResImm,
6357 SubRegIndex SubIndex, Instruction INS> {
6358 def : Pat <(ResTy (vector_insert
6359 (ResTy VPR128:$src),
6362 (INS (ResTy VPR128:$src),
6363 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6367 def : Pat <(NaTy (vector_insert
6371 (NaTy (EXTRACT_SUBREG
6373 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6374 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6380 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6382 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6385 class NeonI_SMOV<string asmop, string Res, bit Q,
6386 ValueType OpTy, ValueType eleTy,
6387 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6388 : NeonI_copy<Q, 0b0, 0b0101,
6389 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6390 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6391 [(set (ResTy ResGPR:$Rd),
6393 (ResTy (vector_extract
6394 (OpTy VPR128:$Rn), (OpImm:$Imm))),
6400 //Signed integer move (main, from element)
6401 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6403 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6405 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6407 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6409 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6411 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6413 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6415 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6417 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6419 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6422 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6423 ValueType eleTy, Operand StImm, Operand NaImm,
6424 Instruction SMOVI> {
6425 def : Pat<(i64 (sext_inreg
6427 (i32 (vector_extract
6428 (StTy VPR128:$Rn), (StImm:$Imm))))),
6430 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6432 def : Pat<(i64 (sext
6433 (i32 (vector_extract
6434 (StTy VPR128:$Rn), (StImm:$Imm))))),
6435 (SMOVI VPR128:$Rn, StImm:$Imm)>;
6437 def : Pat<(i64 (sext_inreg
6438 (i64 (vector_extract
6439 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6441 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6444 def : Pat<(i64 (sext_inreg
6446 (i32 (vector_extract
6447 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6449 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6452 def : Pat<(i64 (sext
6453 (i32 (vector_extract
6454 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6455 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6459 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6460 neon_uimm3_bare, SMOVxb>;
6461 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6462 neon_uimm2_bare, SMOVxh>;
6463 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6464 neon_uimm1_bare, SMOVxs>;
6466 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6467 ValueType eleTy, Operand StImm, Operand NaImm,
6469 : Pat<(i32 (sext_inreg
6470 (i32 (vector_extract
6471 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6473 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6476 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6477 neon_uimm3_bare, SMOVwb>;
6478 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6479 neon_uimm2_bare, SMOVwh>;
6481 class NeonI_UMOV<string asmop, string Res, bit Q,
6482 ValueType OpTy, Operand OpImm,
6483 RegisterClass ResGPR, ValueType ResTy>
6484 : NeonI_copy<Q, 0b0, 0b0111,
6485 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6486 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6487 [(set (ResTy ResGPR:$Rd),
6488 (ResTy (vector_extract
6489 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6494 //Unsigned integer move (main, from element)
6495 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6497 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6499 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6501 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6503 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6505 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6507 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6509 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6512 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6513 (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6514 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6515 (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6517 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6518 Operand StImm, Operand NaImm,
6520 : Pat<(ResTy (vector_extract
6521 (NaTy VPR64:$Rn), NaImm:$Imm)),
6522 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6525 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6526 neon_uimm3_bare, UMOVwb>;
6527 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6528 neon_uimm2_bare, UMOVwh>;
6529 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6530 neon_uimm1_bare, UMOVws>;
6533 (i32 (vector_extract
6534 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6536 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6539 (i32 (vector_extract
6540 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6542 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6544 def : Pat<(i64 (zext
6545 (i32 (vector_extract
6546 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6547 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6550 (i32 (vector_extract
6551 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6553 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6554 neon_uimm3_bare:$Imm)>;
6557 (i32 (vector_extract
6558 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6560 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6561 neon_uimm2_bare:$Imm)>;
6563 def : Pat<(i64 (zext
6564 (i32 (vector_extract
6565 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6566 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6567 neon_uimm0_bare:$Imm)>;
6569 // Additional copy patterns for scalar types
6570 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6572 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6574 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6576 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6578 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6579 (FMOVws FPR32:$Rn)>;
6581 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6582 (FMOVxd FPR64:$Rn)>;
6584 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6587 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
6590 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6591 (v1i8 (EXTRACT_SUBREG (v16i8
6592 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6595 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6596 (v1i16 (EXTRACT_SUBREG (v8i16
6597 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6600 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6603 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6606 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
6608 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6611 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
6614 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6615 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6616 (f64 FPR64:$src), sub_64)>;
6618 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
6619 RegisterOperand ResVPR, Operand OpImm>
6620 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6621 (ins VPR128:$Rn, OpImm:$Imm),
6622 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6628 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6630 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6633 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6635 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6638 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6640 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6643 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6645 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6648 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6650 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6653 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6655 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6658 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6660 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6663 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6664 ValueType OpTy,ValueType NaTy,
6665 ValueType ExTy, Operand OpLImm,
6667 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6668 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6670 def : Pat<(ResTy (Neon_vduplane
6671 (NaTy VPR64:$Rn), OpNImm:$Imm)),
6673 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6675 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6676 neon_uimm4_bare, neon_uimm3_bare>;
6677 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6678 neon_uimm4_bare, neon_uimm3_bare>;
6679 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6680 neon_uimm3_bare, neon_uimm2_bare>;
6681 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6682 neon_uimm3_bare, neon_uimm2_bare>;
6683 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6684 neon_uimm2_bare, neon_uimm1_bare>;
6685 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6686 neon_uimm2_bare, neon_uimm1_bare>;
6687 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6688 neon_uimm1_bare, neon_uimm0_bare>;
6689 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6690 neon_uimm2_bare, neon_uimm1_bare>;
6691 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6692 neon_uimm2_bare, neon_uimm1_bare>;
6693 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6694 neon_uimm1_bare, neon_uimm0_bare>;
6696 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6698 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6700 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6702 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6704 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6706 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6709 class NeonI_DUP<bit Q, string asmop, string rdlane,
6710 RegisterOperand ResVPR, ValueType ResTy,
6711 RegisterClass OpGPR, ValueType OpTy>
6712 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6713 asmop # "\t$Rd" # rdlane # ", $Rn",
6714 [(set (ResTy ResVPR:$Rd),
6715 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6718 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6719 let Inst{20-16} = 0b00001;
6720 // bits 17-20 are unspecified, but should be set to zero.
6723 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6724 let Inst{20-16} = 0b00010;
6725 // bits 18-20 are unspecified, but should be set to zero.
6728 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6729 let Inst{20-16} = 0b00100;
6730 // bits 19-20 are unspecified, but should be set to zero.
6733 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6734 let Inst{20-16} = 0b01000;
6735 // bit 20 is unspecified, but should be set to zero.
6738 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6739 let Inst{20-16} = 0b00001;
6740 // bits 17-20 are unspecified, but should be set to zero.
6743 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6744 let Inst{20-16} = 0b00010;
6745 // bits 18-20 are unspecified, but should be set to zero.
6748 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6749 let Inst{20-16} = 0b00100;
6750 // bits 19-20 are unspecified, but should be set to zero.
6753 // patterns for CONCAT_VECTORS
6754 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6755 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6756 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6757 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6759 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6760 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6763 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6765 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6769 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6770 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6771 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6772 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6773 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6774 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6776 //patterns for EXTRACT_SUBVECTOR
6777 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6778 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6779 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6780 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6781 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6782 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6783 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6784 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6785 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6786 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6787 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6788 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6790 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
6791 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
6792 SDPatternOperator Neon_Rev>
6793 : NeonI_2VMisc<Q, U, size, opcode,
6794 (outs ResVPR:$Rd), (ins ResVPR:$Rn),
6795 asmop # "\t$Rd." # Res # ", $Rn." # Res,
6796 [(set (ResTy ResVPR:$Rd),
6797 (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
6800 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
6802 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
6804 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
6806 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
6808 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
6810 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
6813 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
6814 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
6816 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
6818 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
6820 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
6822 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
6825 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
6827 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
6830 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
6831 SDPatternOperator Neon_Padd> {
6832 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
6833 (outs VPR128:$Rd), (ins VPR128:$Rn),
6834 asmop # "\t$Rd.8h, $Rn.16b",
6835 [(set (v8i16 VPR128:$Rd),
6836 (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
6839 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
6840 (outs VPR64:$Rd), (ins VPR64:$Rn),
6841 asmop # "\t$Rd.4h, $Rn.8b",
6842 [(set (v4i16 VPR64:$Rd),
6843 (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
6846 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
6847 (outs VPR128:$Rd), (ins VPR128:$Rn),
6848 asmop # "\t$Rd.4s, $Rn.8h",
6849 [(set (v4i32 VPR128:$Rd),
6850 (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
6853 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
6854 (outs VPR64:$Rd), (ins VPR64:$Rn),
6855 asmop # "\t$Rd.2s, $Rn.4h",
6856 [(set (v2i32 VPR64:$Rd),
6857 (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
6860 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
6861 (outs VPR128:$Rd), (ins VPR128:$Rn),
6862 asmop # "\t$Rd.2d, $Rn.4s",
6863 [(set (v2i64 VPR128:$Rd),
6864 (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
6867 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
6868 (outs VPR64:$Rd), (ins VPR64:$Rn),
6869 asmop # "\t$Rd.1d, $Rn.2s",
6870 [(set (v1i64 VPR64:$Rd),
6871 (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
6875 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
6876 int_arm_neon_vpaddls>;
6877 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
6878 int_arm_neon_vpaddlu>;
6880 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
6881 SDPatternOperator Neon_Padd> {
6882 let Constraints = "$src = $Rd" in {
6883 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
6884 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6885 asmop # "\t$Rd.8h, $Rn.16b",
6886 [(set (v8i16 VPR128:$Rd),
6888 (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
6891 def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
6892 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6893 asmop # "\t$Rd.4h, $Rn.8b",
6894 [(set (v4i16 VPR64:$Rd),
6896 (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
6899 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
6900 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6901 asmop # "\t$Rd.4s, $Rn.8h",
6902 [(set (v4i32 VPR128:$Rd),
6904 (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
6907 def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
6908 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6909 asmop # "\t$Rd.2s, $Rn.4h",
6910 [(set (v2i32 VPR64:$Rd),
6912 (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
6915 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
6916 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
6917 asmop # "\t$Rd.2d, $Rn.4s",
6918 [(set (v2i64 VPR128:$Rd),
6920 (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
6923 def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
6924 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
6925 asmop # "\t$Rd.1d, $Rn.2s",
6926 [(set (v1i64 VPR64:$Rd),
6928 (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
6933 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
6934 int_arm_neon_vpadals>;
6935 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
6936 int_arm_neon_vpadalu>;
6938 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
6939 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
6940 (outs VPR128:$Rd), (ins VPR128:$Rn),
6941 asmop # "\t$Rd.16b, $Rn.16b",
6944 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
6945 (outs VPR128:$Rd), (ins VPR128:$Rn),
6946 asmop # "\t$Rd.8h, $Rn.8h",
6949 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
6950 (outs VPR128:$Rd), (ins VPR128:$Rn),
6951 asmop # "\t$Rd.4s, $Rn.4s",
6954 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
6955 (outs VPR128:$Rd), (ins VPR128:$Rn),
6956 asmop # "\t$Rd.2d, $Rn.2d",
6959 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
6960 (outs VPR64:$Rd), (ins VPR64:$Rn),
6961 asmop # "\t$Rd.8b, $Rn.8b",
6964 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
6965 (outs VPR64:$Rd), (ins VPR64:$Rn),
6966 asmop # "\t$Rd.4h, $Rn.4h",
6969 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
6970 (outs VPR64:$Rd), (ins VPR64:$Rn),
6971 asmop # "\t$Rd.2s, $Rn.2s",
6975 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
6976 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
6977 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
6978 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
6980 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
6981 SDPatternOperator Neon_Op> {
6982 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
6983 (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
6985 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
6986 (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
6988 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
6989 (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
6991 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
6992 (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
6994 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
6995 (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
6997 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
6998 (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
7000 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
7001 (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
7004 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
7005 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
7006 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
7008 def : Pat<(v16i8 (sub
7009 (v16i8 Neon_AllZero),
7010 (v16i8 VPR128:$Rn))),
7011 (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
7012 def : Pat<(v8i8 (sub
7013 (v8i8 Neon_AllZero),
7015 (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
7016 def : Pat<(v8i16 (sub
7017 (v8i16 (bitconvert (v16i8 Neon_AllZero))),
7018 (v8i16 VPR128:$Rn))),
7019 (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
7020 def : Pat<(v4i16 (sub
7021 (v4i16 (bitconvert (v8i8 Neon_AllZero))),
7022 (v4i16 VPR64:$Rn))),
7023 (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
7024 def : Pat<(v4i32 (sub
7025 (v4i32 (bitconvert (v16i8 Neon_AllZero))),
7026 (v4i32 VPR128:$Rn))),
7027 (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
7028 def : Pat<(v2i32 (sub
7029 (v2i32 (bitconvert (v8i8 Neon_AllZero))),
7030 (v2i32 VPR64:$Rn))),
7031 (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
7032 def : Pat<(v2i64 (sub
7033 (v2i64 (bitconvert (v16i8 Neon_AllZero))),
7034 (v2i64 VPR128:$Rn))),
7035 (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
7037 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
7038 let Constraints = "$src = $Rd" in {
7039 def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7040 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7041 asmop # "\t$Rd.16b, $Rn.16b",
7044 def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7045 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7046 asmop # "\t$Rd.8h, $Rn.8h",
7049 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7050 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7051 asmop # "\t$Rd.4s, $Rn.4s",
7054 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7055 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7056 asmop # "\t$Rd.2d, $Rn.2d",
7059 def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7060 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7061 asmop # "\t$Rd.8b, $Rn.8b",
7064 def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7065 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7066 asmop # "\t$Rd.4h, $Rn.4h",
7069 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7070 (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7071 asmop # "\t$Rd.2s, $Rn.2s",
7076 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
7077 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
7079 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
7080 SDPatternOperator Neon_Op> {
7081 def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
7082 (v16i8 (!cast<Instruction>(Prefix # 16b)
7083 (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
7085 def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
7086 (v8i16 (!cast<Instruction>(Prefix # 8h)
7087 (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
7089 def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
7090 (v4i32 (!cast<Instruction>(Prefix # 4s)
7091 (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
7093 def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
7094 (v2i64 (!cast<Instruction>(Prefix # 2d)
7095 (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
7097 def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
7098 (v8i8 (!cast<Instruction>(Prefix # 8b)
7099 (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
7101 def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
7102 (v4i16 (!cast<Instruction>(Prefix # 4h)
7103 (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
7105 def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
7106 (v2i32 (!cast<Instruction>(Prefix # 2s)
7107 (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
7110 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
7111 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
7113 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
7114 SDPatternOperator Neon_Op> {
7115 def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
7116 (outs VPR128:$Rd), (ins VPR128:$Rn),
7117 asmop # "\t$Rd.16b, $Rn.16b",
7118 [(set (v16i8 VPR128:$Rd),
7119 (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
7122 def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
7123 (outs VPR128:$Rd), (ins VPR128:$Rn),
7124 asmop # "\t$Rd.8h, $Rn.8h",
7125 [(set (v8i16 VPR128:$Rd),
7126 (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
7129 def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
7130 (outs VPR128:$Rd), (ins VPR128:$Rn),
7131 asmop # "\t$Rd.4s, $Rn.4s",
7132 [(set (v4i32 VPR128:$Rd),
7133 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
7136 def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
7137 (outs VPR64:$Rd), (ins VPR64:$Rn),
7138 asmop # "\t$Rd.8b, $Rn.8b",
7139 [(set (v8i8 VPR64:$Rd),
7140 (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
7143 def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
7144 (outs VPR64:$Rd), (ins VPR64:$Rn),
7145 asmop # "\t$Rd.4h, $Rn.4h",
7146 [(set (v4i16 VPR64:$Rd),
7147 (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
7150 def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
7151 (outs VPR64:$Rd), (ins VPR64:$Rn),
7152 asmop # "\t$Rd.2s, $Rn.2s",
7153 [(set (v2i32 VPR64:$Rd),
7154 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
7158 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
7159 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
7161 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
7163 def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
7164 (outs VPR128:$Rd), (ins VPR128:$Rn),
7165 asmop # "\t$Rd.16b, $Rn.16b",
7168 def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
7169 (outs VPR64:$Rd), (ins VPR64:$Rn),
7170 asmop # "\t$Rd.8b, $Rn.8b",
7174 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
7175 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
7176 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
7178 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
7179 (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
7180 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
7181 (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
7183 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
7184 (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
7185 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
7186 (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
7188 def : Pat<(v16i8 (xor
7190 (v16i8 Neon_AllOne))),
7191 (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
7192 def : Pat<(v8i8 (xor
7194 (v8i8 Neon_AllOne))),
7195 (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
7196 def : Pat<(v8i16 (xor
7198 (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
7199 (NOT16b VPR128:$Rn)>;
7200 def : Pat<(v4i16 (xor
7202 (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
7204 def : Pat<(v4i32 (xor
7206 (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
7207 (NOT16b VPR128:$Rn)>;
7208 def : Pat<(v2i32 (xor
7210 (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
7212 def : Pat<(v2i64 (xor
7214 (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
7215 (NOT16b VPR128:$Rn)>;
7217 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
7218 (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
7219 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
7220 (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
7222 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
7223 SDPatternOperator Neon_Op> {
7224 def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7225 (outs VPR128:$Rd), (ins VPR128:$Rn),
7226 asmop # "\t$Rd.4s, $Rn.4s",
7227 [(set (v4f32 VPR128:$Rd),
7228 (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
7231 def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7232 (outs VPR128:$Rd), (ins VPR128:$Rn),
7233 asmop # "\t$Rd.2d, $Rn.2d",
7234 [(set (v2f64 VPR128:$Rd),
7235 (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
7238 def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7239 (outs VPR64:$Rd), (ins VPR64:$Rn),
7240 asmop # "\t$Rd.2s, $Rn.2s",
7241 [(set (v2f32 VPR64:$Rd),
7242 (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
7246 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
7247 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
7249 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
7250 def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7251 (outs VPR64:$Rd), (ins VPR128:$Rn),
7252 asmop # "\t$Rd.8b, $Rn.8h",
7255 def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7256 (outs VPR64:$Rd), (ins VPR128:$Rn),
7257 asmop # "\t$Rd.4h, $Rn.4s",
7260 def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7261 (outs VPR64:$Rd), (ins VPR128:$Rn),
7262 asmop # "\t$Rd.2s, $Rn.2d",
7265 let Constraints = "$Rd = $src" in {
7266 def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7267 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7268 asmop # "2\t$Rd.16b, $Rn.8h",
7271 def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7272 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7273 asmop # "2\t$Rd.8h, $Rn.4s",
7276 def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7277 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7278 asmop # "2\t$Rd.4s, $Rn.2d",
7283 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
7284 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
7285 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
7286 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
7288 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
7289 SDPatternOperator Neon_Op> {
7290 def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
7291 (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
7293 def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
7294 (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
7296 def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
7297 (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
7299 def : Pat<(v16i8 (concat_vectors
7301 (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
7302 (!cast<Instruction>(Prefix # 8h16b)
7303 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7306 def : Pat<(v8i16 (concat_vectors
7308 (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
7309 (!cast<Instruction>(Prefix # 4s8h)
7310 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7313 def : Pat<(v4i32 (concat_vectors
7315 (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
7316 (!cast<Instruction>(Prefix # 2d4s)
7317 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
7321 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
7322 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
7323 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
7324 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
7326 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
7327 def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7329 (ins VPR64:$Rn, uimm_exact8:$Imm),
7330 asmop # "\t$Rd.8h, $Rn.8b, $Imm",
7333 def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7335 (ins VPR64:$Rn, uimm_exact16:$Imm),
7336 asmop # "\t$Rd.4s, $Rn.4h, $Imm",
7339 def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7341 (ins VPR64:$Rn, uimm_exact32:$Imm),
7342 asmop # "\t$Rd.2d, $Rn.2s, $Imm",
7345 def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7347 (ins VPR128:$Rn, uimm_exact8:$Imm),
7348 asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
7351 def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7353 (ins VPR128:$Rn, uimm_exact16:$Imm),
7354 asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
7357 def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7359 (ins VPR128:$Rn, uimm_exact32:$Imm),
7360 asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
7364 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
7366 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
7367 SDPatternOperator ExtOp, Operand Neon_Imm,
7370 (DesTy (ExtOp (OpTy VPR64:$Rn))),
7372 (i32 Neon_Imm:$Imm))))),
7373 (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
7375 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
7376 SDPatternOperator ExtOp, Operand Neon_Imm,
7377 string suffix, PatFrag GetHigh>
7380 (OpTy (GetHigh VPR128:$Rn)))),
7382 (i32 Neon_Imm:$Imm))))),
7383 (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
7385 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
7386 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
7387 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
7388 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
7389 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
7390 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
7391 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
7393 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
7395 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
7397 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
7399 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
7401 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
7404 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
7405 def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7406 (outs VPR64:$Rd), (ins VPR128:$Rn),
7407 asmop # "\t$Rd.4h, $Rn.4s",
7410 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7411 (outs VPR64:$Rd), (ins VPR128:$Rn),
7412 asmop # "\t$Rd.2s, $Rn.2d",
7415 let Constraints = "$src = $Rd" in {
7416 def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7417 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7418 asmop # "2\t$Rd.8h, $Rn.4s",
7421 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7422 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7423 asmop # "2\t$Rd.4s, $Rn.2d",
7428 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
7430 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
7431 SDPatternOperator f32_to_f16_Op,
7432 SDPatternOperator f64_to_f32_Op> {
7434 def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
7435 (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
7437 def : Pat<(v8i16 (concat_vectors
7439 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
7440 (!cast<Instruction>(prefix # "4s8h")
7441 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7442 (v4f32 VPR128:$Rn))>;
7444 def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
7445 (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
7447 def : Pat<(v4f32 (concat_vectors
7449 (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
7450 (!cast<Instruction>(prefix # "2d4s")
7451 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7452 (v2f64 VPR128:$Rn))>;
7455 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
7457 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
7459 def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7460 (outs VPR64:$Rd), (ins VPR128:$Rn),
7461 asmop # "\t$Rd.2s, $Rn.2d",
7464 def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7465 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7466 asmop # "2\t$Rd.4s, $Rn.2d",
7468 let Constraints = "$src = $Rd";
7471 def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
7472 (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
7474 def : Pat<(v4f32 (concat_vectors
7476 (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
7477 (!cast<Instruction>(prefix # "2d4s")
7478 (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
7482 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
7484 def Neon_High4Float : PatFrag<(ops node:$in),
7485 (extract_subvector (v4f32 node:$in), (iPTR 2))>;
7487 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
7488 def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
7489 (outs VPR128:$Rd), (ins VPR64:$Rn),
7490 asmop # "\t$Rd.4s, $Rn.4h",
7493 def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
7494 (outs VPR128:$Rd), (ins VPR64:$Rn),
7495 asmop # "\t$Rd.2d, $Rn.2s",
7498 def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
7499 (outs VPR128:$Rd), (ins VPR128:$Rn),
7500 asmop # "2\t$Rd.4s, $Rn.8h",
7503 def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
7504 (outs VPR128:$Rd), (ins VPR128:$Rn),
7505 asmop # "2\t$Rd.2d, $Rn.4s",
7509 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
7511 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
7512 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
7513 (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
7515 def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
7517 (v8i16 VPR128:$Rn))))),
7518 (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
7520 def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
7521 (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
7523 def : Pat<(v2f64 (fextend
7524 (v2f32 (Neon_High4Float
7525 (v4f32 VPR128:$Rn))))),
7526 (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
7529 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
7531 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
7532 ValueType ResTy4s, ValueType OpTy4s,
7533 ValueType ResTy2d, ValueType OpTy2d,
7534 ValueType ResTy2s, ValueType OpTy2s,
7535 SDPatternOperator Neon_Op> {
7537 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
7538 (outs VPR128:$Rd), (ins VPR128:$Rn),
7539 asmop # "\t$Rd.4s, $Rn.4s",
7540 [(set (ResTy4s VPR128:$Rd),
7541 (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
7544 def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
7545 (outs VPR128:$Rd), (ins VPR128:$Rn),
7546 asmop # "\t$Rd.2d, $Rn.2d",
7547 [(set (ResTy2d VPR128:$Rd),
7548 (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
7551 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
7552 (outs VPR64:$Rd), (ins VPR64:$Rn),
7553 asmop # "\t$Rd.2s, $Rn.2s",
7554 [(set (ResTy2s VPR64:$Rd),
7555 (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
7559 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
7560 bits<5> opcode, SDPatternOperator Neon_Op> {
7561 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
7562 v2f64, v2i32, v2f32, Neon_Op>;
7565 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
7566 int_aarch64_neon_fcvtns>;
7567 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
7568 int_aarch64_neon_fcvtnu>;
7569 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
7570 int_aarch64_neon_fcvtps>;
7571 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
7572 int_aarch64_neon_fcvtpu>;
7573 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
7574 int_aarch64_neon_fcvtms>;
7575 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
7576 int_aarch64_neon_fcvtmu>;
7577 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
7578 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
7579 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
7580 int_aarch64_neon_fcvtas>;
7581 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
7582 int_aarch64_neon_fcvtau>;
7584 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
7585 bits<5> opcode, SDPatternOperator Neon_Op> {
7586 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
7587 v2i64, v2f32, v2i32, Neon_Op>;
7590 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
7591 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
7593 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
7594 bits<5> opcode, SDPatternOperator Neon_Op> {
7595 defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
7596 v2f64, v2f32, v2f32, Neon_Op>;
7599 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
7600 int_aarch64_neon_frintn>;
7601 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
7602 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
7603 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
7604 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
7605 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
7606 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
7607 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
7608 int_arm_neon_vrecpe>;
7609 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
7610 int_arm_neon_vrsqrte>;
7611 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111,
7612 int_aarch64_neon_fsqrt>;
7614 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
7615 bits<5> opcode, SDPatternOperator Neon_Op> {
7616 def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
7617 (outs VPR128:$Rd), (ins VPR128:$Rn),
7618 asmop # "\t$Rd.4s, $Rn.4s",
7619 [(set (v4i32 VPR128:$Rd),
7620 (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
7623 def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
7624 (outs VPR64:$Rd), (ins VPR64:$Rn),
7625 asmop # "\t$Rd.2s, $Rn.2s",
7626 [(set (v2i32 VPR64:$Rd),
7627 (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
7631 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
7632 int_arm_neon_vrecpe>;
7633 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
7634 int_arm_neon_vrsqrte>;
7637 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
7638 string asmop, SDPatternOperator opnode>
7639 : NeonI_Crypto_AES<size, opcode,
7640 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7641 asmop # "\t$Rd.16b, $Rn.16b",
7642 [(set (v16i8 VPR128:$Rd),
7643 (v16i8 (opnode (v16i8 VPR128:$src),
7644 (v16i8 VPR128:$Rn))))],
7646 let Constraints = "$src = $Rd";
7647 let Predicates = [HasNEON, HasCrypto];
7650 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
7651 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
7653 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
7654 string asmop, SDPatternOperator opnode>
7655 : NeonI_Crypto_AES<size, opcode,
7656 (outs VPR128:$Rd), (ins VPR128:$Rn),
7657 asmop # "\t$Rd.16b, $Rn.16b",
7658 [(set (v16i8 VPR128:$Rd),
7659 (v16i8 (opnode (v16i8 VPR128:$Rn))))],
7662 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
7663 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
7665 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
7666 string asmop, SDPatternOperator opnode>
7667 : NeonI_Crypto_SHA<size, opcode,
7668 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7669 asmop # "\t$Rd.4s, $Rn.4s",
7670 [(set (v4i32 VPR128:$Rd),
7671 (v4i32 (opnode (v4i32 VPR128:$src),
7672 (v4i32 VPR128:$Rn))))],
7674 let Constraints = "$src = $Rd";
7675 let Predicates = [HasNEON, HasCrypto];
7678 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
7679 int_arm_neon_sha1su1>;
7680 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
7681 int_arm_neon_sha256su0>;
7683 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
7684 string asmop, SDPatternOperator opnode>
7685 : NeonI_Crypto_SHA<size, opcode,
7686 (outs FPR32:$Rd), (ins FPR32:$Rn),
7687 asmop # "\t$Rd, $Rn",
7688 [(set (v1i32 FPR32:$Rd),
7689 (v1i32 (opnode (v1i32 FPR32:$Rn))))],
7691 let Predicates = [HasNEON, HasCrypto];
7694 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
7696 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
7697 SDPatternOperator opnode>
7698 : NeonI_Crypto_3VSHA<size, opcode,
7700 (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
7701 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
7702 [(set (v4i32 VPR128:$Rd),
7703 (v4i32 (opnode (v4i32 VPR128:$src),
7705 (v4i32 VPR128:$Rm))))],
7707 let Constraints = "$src = $Rd";
7708 let Predicates = [HasNEON, HasCrypto];
7711 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
7712 int_arm_neon_sha1su0>;
7713 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
7714 int_arm_neon_sha256su1>;
7716 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
7717 SDPatternOperator opnode>
7718 : NeonI_Crypto_3VSHA<size, opcode,
7720 (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
7721 asmop # "\t$Rd, $Rn, $Rm.4s",
7722 [(set (v4i32 FPR128:$Rd),
7723 (v4i32 (opnode (v4i32 FPR128:$src),
7725 (v4i32 VPR128:$Rm))))],
7727 let Constraints = "$src = $Rd";
7728 let Predicates = [HasNEON, HasCrypto];
7731 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
7732 int_arm_neon_sha256h>;
7733 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
7734 int_arm_neon_sha256h2>;
7736 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
7737 SDPatternOperator opnode>
7738 : NeonI_Crypto_3VSHA<size, opcode,
7740 (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
7741 asmop # "\t$Rd, $Rn, $Rm.4s",
7742 [(set (v4i32 FPR128:$Rd),
7743 (v4i32 (opnode (v4i32 FPR128:$src),
7745 (v4i32 VPR128:$Rm))))],
7747 let Constraints = "$src = $Rd";
7748 let Predicates = [HasNEON, HasCrypto];
7751 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
7752 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
7753 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;