1 //===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the ARM NEON instruction set.
12 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // NEON-specific Operands.
17 //===----------------------------------------------------------------------===//
18 def nModImm : Operand<i32> {
19 let PrintMethod = "printNEONModImmOperand";
22 def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
23 def nImmSplatI8 : Operand<i32> {
24 let PrintMethod = "printNEONModImmOperand";
25 let ParserMatchClass = nImmSplatI8AsmOperand;
27 def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
28 def nImmSplatI16 : Operand<i32> {
29 let PrintMethod = "printNEONModImmOperand";
30 let ParserMatchClass = nImmSplatI16AsmOperand;
32 def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
33 def nImmSplatI32 : Operand<i32> {
34 let PrintMethod = "printNEONModImmOperand";
35 let ParserMatchClass = nImmSplatI32AsmOperand;
37 def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
38 def nImmVMOVI32 : Operand<i32> {
39 let PrintMethod = "printNEONModImmOperand";
40 let ParserMatchClass = nImmVMOVI32AsmOperand;
42 def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
43 def nImmVMOVI32Neg : Operand<i32> {
44 let PrintMethod = "printNEONModImmOperand";
45 let ParserMatchClass = nImmVMOVI32NegAsmOperand;
47 def nImmVMOVF32 : Operand<i32> {
48 let PrintMethod = "printFPImmOperand";
49 let ParserMatchClass = FPImmOperand;
51 def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
52 def nImmSplatI64 : Operand<i32> {
53 let PrintMethod = "printNEONModImmOperand";
54 let ParserMatchClass = nImmSplatI64AsmOperand;
57 def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; }
58 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
59 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
60 def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
61 return ((uint64_t)Imm) < 8;
63 let ParserMatchClass = VectorIndex8Operand;
64 let PrintMethod = "printVectorIndex";
65 let MIOperandInfo = (ops i32imm);
67 def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
68 return ((uint64_t)Imm) < 4;
70 let ParserMatchClass = VectorIndex16Operand;
71 let PrintMethod = "printVectorIndex";
72 let MIOperandInfo = (ops i32imm);
74 def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
75 return ((uint64_t)Imm) < 2;
77 let ParserMatchClass = VectorIndex32Operand;
78 let PrintMethod = "printVectorIndex";
79 let MIOperandInfo = (ops i32imm);
82 // Register list of one D register.
83 def VecListOneDAsmOperand : AsmOperandClass {
84 let Name = "VecListOneD";
85 let ParserMethod = "parseVectorList";
86 let RenderMethod = "addVecListOperands";
88 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
89 let ParserMatchClass = VecListOneDAsmOperand;
91 // Register list of two sequential D registers.
92 def VecListTwoDAsmOperand : AsmOperandClass {
93 let Name = "VecListTwoD";
94 let ParserMethod = "parseVectorList";
95 let RenderMethod = "addVecListOperands";
97 def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
98 let ParserMatchClass = VecListTwoDAsmOperand;
100 // FIXME: Replace all VecListTwoD with VecListDPair
101 def VecListDPairAsmOperand : AsmOperandClass {
102 let Name = "VecListDPair";
103 let ParserMethod = "parseVectorList";
104 let RenderMethod = "addVecListOperands";
106 def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
107 let ParserMatchClass = VecListDPairAsmOperand;
109 // Register list of three sequential D registers.
110 def VecListThreeDAsmOperand : AsmOperandClass {
111 let Name = "VecListThreeD";
112 let ParserMethod = "parseVectorList";
113 let RenderMethod = "addVecListOperands";
115 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
116 let ParserMatchClass = VecListThreeDAsmOperand;
118 // Register list of four sequential D registers.
119 def VecListFourDAsmOperand : AsmOperandClass {
120 let Name = "VecListFourD";
121 let ParserMethod = "parseVectorList";
122 let RenderMethod = "addVecListOperands";
124 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
125 let ParserMatchClass = VecListFourDAsmOperand;
127 // Register list of two D registers spaced by 2 (two sequential Q registers).
128 def VecListTwoQAsmOperand : AsmOperandClass {
129 let Name = "VecListTwoQ";
130 let ParserMethod = "parseVectorList";
131 let RenderMethod = "addVecListOperands";
133 def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> {
134 let ParserMatchClass = VecListTwoQAsmOperand;
136 // Register list of three D registers spaced by 2 (three Q registers).
137 def VecListThreeQAsmOperand : AsmOperandClass {
138 let Name = "VecListThreeQ";
139 let ParserMethod = "parseVectorList";
140 let RenderMethod = "addVecListOperands";
142 def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
143 let ParserMatchClass = VecListThreeQAsmOperand;
145 // Register list of three D registers spaced by 2 (three Q registers).
146 def VecListFourQAsmOperand : AsmOperandClass {
147 let Name = "VecListFourQ";
148 let ParserMethod = "parseVectorList";
149 let RenderMethod = "addVecListOperands";
151 def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
152 let ParserMatchClass = VecListFourQAsmOperand;
155 // Register list of one D register, with "all lanes" subscripting.
156 def VecListOneDAllLanesAsmOperand : AsmOperandClass {
157 let Name = "VecListOneDAllLanes";
158 let ParserMethod = "parseVectorList";
159 let RenderMethod = "addVecListOperands";
161 def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
162 let ParserMatchClass = VecListOneDAllLanesAsmOperand;
164 // Register list of two D registers, with "all lanes" subscripting.
165 def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
166 let Name = "VecListTwoDAllLanes";
167 let ParserMethod = "parseVectorList";
168 let RenderMethod = "addVecListOperands";
170 def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
171 let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
173 // Register list of two D registers spaced by 2 (two sequential Q registers).
174 def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
175 let Name = "VecListTwoQAllLanes";
176 let ParserMethod = "parseVectorList";
177 let RenderMethod = "addVecListOperands";
179 def VecListTwoQAllLanes : RegisterOperand<DPR,
180 "printVectorListTwoSpacedAllLanes"> {
181 let ParserMatchClass = VecListTwoQAllLanesAsmOperand;
183 // Register list of three D registers, with "all lanes" subscripting.
184 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
185 let Name = "VecListThreeDAllLanes";
186 let ParserMethod = "parseVectorList";
187 let RenderMethod = "addVecListOperands";
189 def VecListThreeDAllLanes : RegisterOperand<DPR,
190 "printVectorListThreeAllLanes"> {
191 let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
193 // Register list of three D registers spaced by 2 (three sequential Q regs).
194 def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
195 let Name = "VecListThreeQAllLanes";
196 let ParserMethod = "parseVectorList";
197 let RenderMethod = "addVecListOperands";
199 def VecListThreeQAllLanes : RegisterOperand<DPR,
200 "printVectorListThreeSpacedAllLanes"> {
201 let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
203 // Register list of four D registers, with "all lanes" subscripting.
204 def VecListFourDAllLanesAsmOperand : AsmOperandClass {
205 let Name = "VecListFourDAllLanes";
206 let ParserMethod = "parseVectorList";
207 let RenderMethod = "addVecListOperands";
209 def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
210 let ParserMatchClass = VecListFourDAllLanesAsmOperand;
212 // Register list of four D registers spaced by 2 (four sequential Q regs).
213 def VecListFourQAllLanesAsmOperand : AsmOperandClass {
214 let Name = "VecListFourQAllLanes";
215 let ParserMethod = "parseVectorList";
216 let RenderMethod = "addVecListOperands";
218 def VecListFourQAllLanes : RegisterOperand<DPR,
219 "printVectorListFourSpacedAllLanes"> {
220 let ParserMatchClass = VecListFourQAllLanesAsmOperand;
224 // Register list of one D register, with byte lane subscripting.
225 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
226 let Name = "VecListOneDByteIndexed";
227 let ParserMethod = "parseVectorList";
228 let RenderMethod = "addVecListIndexedOperands";
230 def VecListOneDByteIndexed : Operand<i32> {
231 let ParserMatchClass = VecListOneDByteIndexAsmOperand;
232 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
234 // ...with half-word lane subscripting.
235 def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
236 let Name = "VecListOneDHWordIndexed";
237 let ParserMethod = "parseVectorList";
238 let RenderMethod = "addVecListIndexedOperands";
240 def VecListOneDHWordIndexed : Operand<i32> {
241 let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
242 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
244 // ...with word lane subscripting.
245 def VecListOneDWordIndexAsmOperand : AsmOperandClass {
246 let Name = "VecListOneDWordIndexed";
247 let ParserMethod = "parseVectorList";
248 let RenderMethod = "addVecListIndexedOperands";
250 def VecListOneDWordIndexed : Operand<i32> {
251 let ParserMatchClass = VecListOneDWordIndexAsmOperand;
252 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
255 // Register list of two D registers with byte lane subscripting.
256 def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
257 let Name = "VecListTwoDByteIndexed";
258 let ParserMethod = "parseVectorList";
259 let RenderMethod = "addVecListIndexedOperands";
261 def VecListTwoDByteIndexed : Operand<i32> {
262 let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
263 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
265 // ...with half-word lane subscripting.
266 def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
267 let Name = "VecListTwoDHWordIndexed";
268 let ParserMethod = "parseVectorList";
269 let RenderMethod = "addVecListIndexedOperands";
271 def VecListTwoDHWordIndexed : Operand<i32> {
272 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
273 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
275 // ...with word lane subscripting.
276 def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
277 let Name = "VecListTwoDWordIndexed";
278 let ParserMethod = "parseVectorList";
279 let RenderMethod = "addVecListIndexedOperands";
281 def VecListTwoDWordIndexed : Operand<i32> {
282 let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
283 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
285 // Register list of two Q registers with half-word lane subscripting.
286 def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
287 let Name = "VecListTwoQHWordIndexed";
288 let ParserMethod = "parseVectorList";
289 let RenderMethod = "addVecListIndexedOperands";
291 def VecListTwoQHWordIndexed : Operand<i32> {
292 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
293 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
295 // ...with word lane subscripting.
296 def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
297 let Name = "VecListTwoQWordIndexed";
298 let ParserMethod = "parseVectorList";
299 let RenderMethod = "addVecListIndexedOperands";
301 def VecListTwoQWordIndexed : Operand<i32> {
302 let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
303 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
307 // Register list of three D registers with byte lane subscripting.
308 def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
309 let Name = "VecListThreeDByteIndexed";
310 let ParserMethod = "parseVectorList";
311 let RenderMethod = "addVecListIndexedOperands";
313 def VecListThreeDByteIndexed : Operand<i32> {
314 let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
315 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
317 // ...with half-word lane subscripting.
318 def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
319 let Name = "VecListThreeDHWordIndexed";
320 let ParserMethod = "parseVectorList";
321 let RenderMethod = "addVecListIndexedOperands";
323 def VecListThreeDHWordIndexed : Operand<i32> {
324 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
325 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
327 // ...with word lane subscripting.
328 def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
329 let Name = "VecListThreeDWordIndexed";
330 let ParserMethod = "parseVectorList";
331 let RenderMethod = "addVecListIndexedOperands";
333 def VecListThreeDWordIndexed : Operand<i32> {
334 let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
335 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
337 // Register list of three Q registers with half-word lane subscripting.
338 def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
339 let Name = "VecListThreeQHWordIndexed";
340 let ParserMethod = "parseVectorList";
341 let RenderMethod = "addVecListIndexedOperands";
343 def VecListThreeQHWordIndexed : Operand<i32> {
344 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
345 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
347 // ...with word lane subscripting.
348 def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
349 let Name = "VecListThreeQWordIndexed";
350 let ParserMethod = "parseVectorList";
351 let RenderMethod = "addVecListIndexedOperands";
353 def VecListThreeQWordIndexed : Operand<i32> {
354 let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
355 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
358 // Register list of four D registers with byte lane subscripting.
359 def VecListFourDByteIndexAsmOperand : AsmOperandClass {
360 let Name = "VecListFourDByteIndexed";
361 let ParserMethod = "parseVectorList";
362 let RenderMethod = "addVecListIndexedOperands";
364 def VecListFourDByteIndexed : Operand<i32> {
365 let ParserMatchClass = VecListFourDByteIndexAsmOperand;
366 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
368 // ...with half-word lane subscripting.
369 def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
370 let Name = "VecListFourDHWordIndexed";
371 let ParserMethod = "parseVectorList";
372 let RenderMethod = "addVecListIndexedOperands";
374 def VecListFourDHWordIndexed : Operand<i32> {
375 let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
376 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
378 // ...with word lane subscripting.
379 def VecListFourDWordIndexAsmOperand : AsmOperandClass {
380 let Name = "VecListFourDWordIndexed";
381 let ParserMethod = "parseVectorList";
382 let RenderMethod = "addVecListIndexedOperands";
384 def VecListFourDWordIndexed : Operand<i32> {
385 let ParserMatchClass = VecListFourDWordIndexAsmOperand;
386 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
388 // Register list of four Q registers with half-word lane subscripting.
389 def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
390 let Name = "VecListFourQHWordIndexed";
391 let ParserMethod = "parseVectorList";
392 let RenderMethod = "addVecListIndexedOperands";
394 def VecListFourQHWordIndexed : Operand<i32> {
395 let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
396 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
398 // ...with word lane subscripting.
399 def VecListFourQWordIndexAsmOperand : AsmOperandClass {
400 let Name = "VecListFourQWordIndexed";
401 let ParserMethod = "parseVectorList";
402 let RenderMethod = "addVecListIndexedOperands";
404 def VecListFourQWordIndexed : Operand<i32> {
405 let ParserMatchClass = VecListFourQWordIndexAsmOperand;
406 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
410 //===----------------------------------------------------------------------===//
411 // NEON-specific DAG Nodes.
412 //===----------------------------------------------------------------------===//
414 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
415 def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
417 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
418 def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
419 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
420 def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
421 def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
422 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
423 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
424 def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
425 def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
426 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
427 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
429 // Types for vector shift by immediates. The "SHX" version is for long and
430 // narrow operations where the source and destination vectors have different
431 // types. The "SHINS" version is for shift and insert operations.
432 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
434 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
436 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
437 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
439 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
440 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
441 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
442 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
443 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
444 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
445 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
447 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
448 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
449 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
451 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
452 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
453 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
454 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
455 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
456 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
458 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
459 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
460 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
462 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
463 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
465 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
467 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
468 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
470 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
471 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
472 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
473 def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
475 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
477 def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
478 def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
480 def NEONvbsl : SDNode<"ARMISD::VBSL",
481 SDTypeProfile<1, 3, [SDTCisVec<0>,
484 SDTCisSameAs<0, 3>]>>;
486 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
488 // VDUPLANE can produce a quad-register result from a double-register source,
489 // so the result is not constrained to match the source.
490 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
491 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
494 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
495 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
496 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
498 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
499 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
500 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
501 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
503 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
505 SDTCisSameAs<0, 3>]>;
506 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
507 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
508 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
510 def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
511 SDTCisSameAs<1, 2>]>;
512 def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
513 def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
515 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
516 SDTCisSameAs<0, 2>]>;
517 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
518 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
520 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
521 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
522 unsigned EltBits = 0;
523 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
524 return (EltBits == 32 && EltVal == 0);
527 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
528 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
529 unsigned EltBits = 0;
530 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
531 return (EltBits == 8 && EltVal == 0xff);
534 //===----------------------------------------------------------------------===//
535 // NEON load / store instructions
536 //===----------------------------------------------------------------------===//
538 // Use VLDM to load a Q register as a D register pair.
539 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
541 : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
543 [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
545 // Use VSTM to store a Q register as a D register pair.
546 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
548 : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
550 [(store (v2f64 QPR:$src), GPR:$Rn)]>;
552 // Classes for VLD* pseudo-instructions with multi-register operands.
553 // These are expanded to real instructions after register allocation.
554 class VLDQPseudo<InstrItinClass itin>
555 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
556 class VLDQWBPseudo<InstrItinClass itin>
557 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
558 (ins addrmode6:$addr, am6offset:$offset), itin,
560 class VLDQWBfixedPseudo<InstrItinClass itin>
561 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
562 (ins addrmode6:$addr), itin,
564 class VLDQWBregisterPseudo<InstrItinClass itin>
565 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
566 (ins addrmode6:$addr, rGPR:$offset), itin,
569 class VLDQQPseudo<InstrItinClass itin>
570 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
571 class VLDQQWBPseudo<InstrItinClass itin>
572 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
573 (ins addrmode6:$addr, am6offset:$offset), itin,
575 class VLDQQWBfixedPseudo<InstrItinClass itin>
576 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
577 (ins addrmode6:$addr), itin,
579 class VLDQQWBregisterPseudo<InstrItinClass itin>
580 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
581 (ins addrmode6:$addr, rGPR:$offset), itin,
585 class VLDQQQQPseudo<InstrItinClass itin>
586 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
588 class VLDQQQQWBPseudo<InstrItinClass itin>
589 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
590 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
591 "$addr.addr = $wb, $src = $dst">;
593 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
595 // VLD1 : Vector Load (multiple single elements)
596 class VLD1D<bits<4> op7_4, string Dt>
597 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
598 (ins addrmode6:$Rn), IIC_VLD1,
599 "vld1", Dt, "$Vd, $Rn", "", []> {
602 let DecoderMethod = "DecodeVLDInstruction";
604 class VLD1Q<bits<4> op7_4, string Dt>
605 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
606 (ins addrmode6:$Rn), IIC_VLD1x2,
607 "vld1", Dt, "$Vd, $Rn", "", []> {
609 let Inst{5-4} = Rn{5-4};
610 let DecoderMethod = "DecodeVLDInstruction";
613 def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
614 def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
615 def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
616 def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
618 def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
619 def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
620 def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
621 def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
623 // ...with address register writeback:
624 multiclass VLD1DWB<bits<4> op7_4, string Dt> {
625 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
626 (ins addrmode6:$Rn), IIC_VLD1u,
627 "vld1", Dt, "$Vd, $Rn!",
628 "$Rn.addr = $wb", []> {
629 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
631 let DecoderMethod = "DecodeVLDInstruction";
632 let AsmMatchConverter = "cvtVLDwbFixed";
634 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
635 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
636 "vld1", Dt, "$Vd, $Rn, $Rm",
637 "$Rn.addr = $wb", []> {
639 let DecoderMethod = "DecodeVLDInstruction";
640 let AsmMatchConverter = "cvtVLDwbRegister";
643 multiclass VLD1QWB<bits<4> op7_4, string Dt> {
644 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
645 (ins addrmode6:$Rn), IIC_VLD1x2u,
646 "vld1", Dt, "$Vd, $Rn!",
647 "$Rn.addr = $wb", []> {
648 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
649 let Inst{5-4} = Rn{5-4};
650 let DecoderMethod = "DecodeVLDInstruction";
651 let AsmMatchConverter = "cvtVLDwbFixed";
653 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
654 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
655 "vld1", Dt, "$Vd, $Rn, $Rm",
656 "$Rn.addr = $wb", []> {
657 let Inst{5-4} = Rn{5-4};
658 let DecoderMethod = "DecodeVLDInstruction";
659 let AsmMatchConverter = "cvtVLDwbRegister";
663 defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
664 defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
665 defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
666 defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
667 defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
668 defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
669 defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
670 defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
672 // ...with 3 registers
673 class VLD1D3<bits<4> op7_4, string Dt>
674 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
675 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
676 "$Vd, $Rn", "", []> {
679 let DecoderMethod = "DecodeVLDInstruction";
681 multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
682 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
683 (ins addrmode6:$Rn), IIC_VLD1x2u,
684 "vld1", Dt, "$Vd, $Rn!",
685 "$Rn.addr = $wb", []> {
686 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
688 let DecoderMethod = "DecodeVLDInstruction";
689 let AsmMatchConverter = "cvtVLDwbFixed";
691 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
692 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
693 "vld1", Dt, "$Vd, $Rn, $Rm",
694 "$Rn.addr = $wb", []> {
696 let DecoderMethod = "DecodeVLDInstruction";
697 let AsmMatchConverter = "cvtVLDwbRegister";
701 def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
702 def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
703 def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
704 def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
706 defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
707 defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
708 defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
709 defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
711 def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
713 // ...with 4 registers
714 class VLD1D4<bits<4> op7_4, string Dt>
715 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
716 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
717 "$Vd, $Rn", "", []> {
719 let Inst{5-4} = Rn{5-4};
720 let DecoderMethod = "DecodeVLDInstruction";
722 multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
723 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
724 (ins addrmode6:$Rn), IIC_VLD1x2u,
725 "vld1", Dt, "$Vd, $Rn!",
726 "$Rn.addr = $wb", []> {
727 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
728 let Inst{5-4} = Rn{5-4};
729 let DecoderMethod = "DecodeVLDInstruction";
730 let AsmMatchConverter = "cvtVLDwbFixed";
732 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
733 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
734 "vld1", Dt, "$Vd, $Rn, $Rm",
735 "$Rn.addr = $wb", []> {
736 let Inst{5-4} = Rn{5-4};
737 let DecoderMethod = "DecodeVLDInstruction";
738 let AsmMatchConverter = "cvtVLDwbRegister";
742 def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
743 def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
744 def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
745 def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
747 defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
748 defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
749 defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
750 defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
752 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
754 // VLD2 : Vector Load (multiple 2-element structures)
755 class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
757 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
758 (ins addrmode6:$Rn), itin,
759 "vld2", Dt, "$Vd, $Rn", "", []> {
761 let Inst{5-4} = Rn{5-4};
762 let DecoderMethod = "DecodeVLDInstruction";
765 def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
766 def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
767 def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
769 def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
770 def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
771 def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
773 def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
774 def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
775 def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
777 // ...with address register writeback:
778 multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
779 RegisterOperand VdTy, InstrItinClass itin> {
780 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
781 (ins addrmode6:$Rn), itin,
782 "vld2", Dt, "$Vd, $Rn!",
783 "$Rn.addr = $wb", []> {
784 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
785 let Inst{5-4} = Rn{5-4};
786 let DecoderMethod = "DecodeVLDInstruction";
787 let AsmMatchConverter = "cvtVLDwbFixed";
789 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
790 (ins addrmode6:$Rn, rGPR:$Rm), itin,
791 "vld2", Dt, "$Vd, $Rn, $Rm",
792 "$Rn.addr = $wb", []> {
793 let Inst{5-4} = Rn{5-4};
794 let DecoderMethod = "DecodeVLDInstruction";
795 let AsmMatchConverter = "cvtVLDwbRegister";
799 defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
800 defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
801 defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
803 defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
804 defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
805 defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
807 def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
808 def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
809 def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
810 def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
811 def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
812 def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
814 // ...with double-spaced registers
815 def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
816 def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
817 def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
818 defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
819 defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
820 defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
822 // VLD3 : Vector Load (multiple 3-element structures)
823 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
824 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
825 (ins addrmode6:$Rn), IIC_VLD3,
826 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
829 let DecoderMethod = "DecodeVLDInstruction";
832 def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">;
833 def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">;
834 def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">;
836 def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
837 def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
838 def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
840 // ...with address register writeback:
841 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
842 : NLdSt<0, 0b10, op11_8, op7_4,
843 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
844 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
845 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
846 "$Rn.addr = $wb", []> {
848 let DecoderMethod = "DecodeVLDInstruction";
851 def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">;
852 def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
853 def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
855 def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
856 def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
857 def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
859 // ...with double-spaced registers:
860 def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">;
861 def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">;
862 def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">;
863 def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">;
864 def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
865 def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
867 def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
868 def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
869 def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
871 // ...alternate versions to be allocated odd register numbers:
872 def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
873 def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
874 def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>;
876 def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
877 def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
878 def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
880 // VLD4 : Vector Load (multiple 4-element structures)
881 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
882 : NLdSt<0, 0b10, op11_8, op7_4,
883 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
884 (ins addrmode6:$Rn), IIC_VLD4,
885 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
887 let Inst{5-4} = Rn{5-4};
888 let DecoderMethod = "DecodeVLDInstruction";
891 def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">;
892 def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">;
893 def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">;
895 def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
896 def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
897 def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
899 // ...with address register writeback:
900 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
901 : NLdSt<0, 0b10, op11_8, op7_4,
902 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
903 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
904 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
905 "$Rn.addr = $wb", []> {
906 let Inst{5-4} = Rn{5-4};
907 let DecoderMethod = "DecodeVLDInstruction";
910 def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">;
911 def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
912 def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
914 def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
915 def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
916 def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
918 // ...with double-spaced registers:
919 def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">;
920 def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">;
921 def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">;
922 def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">;
923 def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
924 def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
926 def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
927 def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
928 def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
930 // ...alternate versions to be allocated odd register numbers:
931 def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
932 def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
933 def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>;
935 def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
936 def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
937 def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
939 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
941 // Classes for VLD*LN pseudo-instructions with multi-register operands.
942 // These are expanded to real instructions after register allocation.
943 class VLDQLNPseudo<InstrItinClass itin>
944 : PseudoNLdSt<(outs QPR:$dst),
945 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
946 itin, "$src = $dst">;
947 class VLDQLNWBPseudo<InstrItinClass itin>
948 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
949 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
950 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
951 class VLDQQLNPseudo<InstrItinClass itin>
952 : PseudoNLdSt<(outs QQPR:$dst),
953 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
954 itin, "$src = $dst">;
955 class VLDQQLNWBPseudo<InstrItinClass itin>
956 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
957 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
958 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
959 class VLDQQQQLNPseudo<InstrItinClass itin>
960 : PseudoNLdSt<(outs QQQQPR:$dst),
961 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
962 itin, "$src = $dst">;
963 class VLDQQQQLNWBPseudo<InstrItinClass itin>
964 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
965 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
966 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
968 // VLD1LN : Vector Load (single element to one lane)
969 class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
971 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
972 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
973 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
975 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
976 (i32 (LoadOp addrmode6:$Rn)),
979 let DecoderMethod = "DecodeVLD1LN";
981 class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
983 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
984 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
985 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
987 [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
988 (i32 (LoadOp addrmode6oneL32:$Rn)),
991 let DecoderMethod = "DecodeVLD1LN";
993 class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
994 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
995 (i32 (LoadOp addrmode6:$addr)),
999 def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1000 let Inst{7-5} = lane{2-0};
1002 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1003 let Inst{7-6} = lane{1-0};
1004 let Inst{5-4} = Rn{5-4};
1006 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1007 let Inst{7} = lane{0};
1008 let Inst{5-4} = Rn{5-4};
1011 def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>;
1012 def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1013 def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1015 def : Pat<(vector_insert (v2f32 DPR:$src),
1016 (f32 (load addrmode6:$addr)), imm:$lane),
1017 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1018 def : Pat<(vector_insert (v4f32 QPR:$src),
1019 (f32 (load addrmode6:$addr)), imm:$lane),
1020 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1022 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
1024 // ...with address register writeback:
1025 class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1026 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1027 (ins addrmode6:$Rn, am6offset:$Rm,
1028 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1029 "\\{$Vd[$lane]\\}, $Rn$Rm",
1030 "$src = $Vd, $Rn.addr = $wb", []> {
1031 let DecoderMethod = "DecodeVLD1LN";
1034 def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1035 let Inst{7-5} = lane{2-0};
1037 def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1038 let Inst{7-6} = lane{1-0};
1039 let Inst{4} = Rn{4};
1041 def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1042 let Inst{7} = lane{0};
1043 let Inst{5} = Rn{4};
1044 let Inst{4} = Rn{4};
1047 def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1048 def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1049 def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
1051 // VLD2LN : Vector Load (single 2-element structure to one lane)
1052 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1053 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1054 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1055 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1056 "$src1 = $Vd, $src2 = $dst2", []> {
1058 let Inst{4} = Rn{4};
1059 let DecoderMethod = "DecodeVLD2LN";
1062 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1063 let Inst{7-5} = lane{2-0};
1065 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1066 let Inst{7-6} = lane{1-0};
1068 def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1069 let Inst{7} = lane{0};
1072 def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1073 def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1074 def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
1076 // ...with double-spaced registers:
1077 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1078 let Inst{7-6} = lane{1-0};
1080 def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1081 let Inst{7} = lane{0};
1084 def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1085 def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
1087 // ...with address register writeback:
1088 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1089 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1090 (ins addrmode6:$Rn, am6offset:$Rm,
1091 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1092 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1093 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1094 let Inst{4} = Rn{4};
1095 let DecoderMethod = "DecodeVLD2LN";
1098 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1099 let Inst{7-5} = lane{2-0};
1101 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1102 let Inst{7-6} = lane{1-0};
1104 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1105 let Inst{7} = lane{0};
1108 def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1109 def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1110 def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
1112 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1113 let Inst{7-6} = lane{1-0};
1115 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1116 let Inst{7} = lane{0};
1119 def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1120 def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
1122 // VLD3LN : Vector Load (single 3-element structure to one lane)
1123 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1124 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1125 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1126 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1127 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1128 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
1130 let DecoderMethod = "DecodeVLD3LN";
1133 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1134 let Inst{7-5} = lane{2-0};
1136 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1137 let Inst{7-6} = lane{1-0};
1139 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1140 let Inst{7} = lane{0};
1143 def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1144 def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1145 def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
1147 // ...with double-spaced registers:
1148 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1149 let Inst{7-6} = lane{1-0};
1151 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1152 let Inst{7} = lane{0};
1155 def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1156 def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
1158 // ...with address register writeback:
1159 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1160 : NLdStLn<1, 0b10, op11_8, op7_4,
1161 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1162 (ins addrmode6:$Rn, am6offset:$Rm,
1163 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1164 IIC_VLD3lnu, "vld3", Dt,
1165 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1166 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1168 let DecoderMethod = "DecodeVLD3LN";
1171 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1172 let Inst{7-5} = lane{2-0};
1174 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1175 let Inst{7-6} = lane{1-0};
1177 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1178 let Inst{7} = lane{0};
1181 def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1182 def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1183 def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
1185 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1186 let Inst{7-6} = lane{1-0};
1188 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1189 let Inst{7} = lane{0};
1192 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1193 def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
1195 // VLD4LN : Vector Load (single 4-element structure to one lane)
1196 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1197 : NLdStLn<1, 0b10, op11_8, op7_4,
1198 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1199 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1200 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1201 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1202 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
1204 let Inst{4} = Rn{4};
1205 let DecoderMethod = "DecodeVLD4LN";
1208 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1209 let Inst{7-5} = lane{2-0};
1211 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1212 let Inst{7-6} = lane{1-0};
1214 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1215 let Inst{7} = lane{0};
1216 let Inst{5} = Rn{5};
1219 def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1220 def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1221 def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
1223 // ...with double-spaced registers:
1224 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1225 let Inst{7-6} = lane{1-0};
1227 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1228 let Inst{7} = lane{0};
1229 let Inst{5} = Rn{5};
1232 def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1233 def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
1235 // ...with address register writeback:
1236 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1237 : NLdStLn<1, 0b10, op11_8, op7_4,
1238 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1239 (ins addrmode6:$Rn, am6offset:$Rm,
1240 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1241 IIC_VLD4lnu, "vld4", Dt,
1242 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1243 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1245 let Inst{4} = Rn{4};
1246 let DecoderMethod = "DecodeVLD4LN" ;
1249 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1250 let Inst{7-5} = lane{2-0};
1252 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1253 let Inst{7-6} = lane{1-0};
1255 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1256 let Inst{7} = lane{0};
1257 let Inst{5} = Rn{5};
1260 def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1261 def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1262 def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
1264 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1265 let Inst{7-6} = lane{1-0};
1267 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1268 let Inst{7} = lane{0};
1269 let Inst{5} = Rn{5};
1272 def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1273 def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
1275 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1277 // VLD1DUP : Vector Load (single element to all lanes)
1278 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
1279 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1280 (ins addrmode6dup:$Rn),
1281 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1282 [(set VecListOneDAllLanes:$Vd,
1283 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
1285 let Inst{4} = Rn{4};
1286 let DecoderMethod = "DecodeVLD1DupInstruction";
1288 class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
1289 let Pattern = [(set QPR:$dst,
1290 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
1293 def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
1294 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
1295 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
1297 def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
1298 def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
1299 def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
1301 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1302 (VLD1DUPd32 addrmode6:$addr)>;
1303 def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
1304 (VLD1DUPq32Pseudo addrmode6:$addr)>;
1306 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
1308 class VLD1QDUP<bits<4> op7_4, string Dt>
1309 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
1310 (ins addrmode6dup:$Rn), IIC_VLD1dup,
1311 "vld1", Dt, "$Vd, $Rn", "", []> {
1313 let Inst{4} = Rn{4};
1314 let DecoderMethod = "DecodeVLD1DupInstruction";
1317 def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
1318 def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
1319 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
1321 // ...with address register writeback:
1322 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
1323 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1324 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1325 (ins addrmode6dup:$Rn), IIC_VLD1dupu,
1326 "vld1", Dt, "$Vd, $Rn!",
1327 "$Rn.addr = $wb", []> {
1328 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1329 let Inst{4} = Rn{4};
1330 let DecoderMethod = "DecodeVLD1DupInstruction";
1331 let AsmMatchConverter = "cvtVLDwbFixed";
1333 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1334 (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1335 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1336 "vld1", Dt, "$Vd, $Rn, $Rm",
1337 "$Rn.addr = $wb", []> {
1338 let Inst{4} = Rn{4};
1339 let DecoderMethod = "DecodeVLD1DupInstruction";
1340 let AsmMatchConverter = "cvtVLDwbRegister";
1343 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
1344 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1345 (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
1346 (ins addrmode6dup:$Rn), IIC_VLD1dupu,
1347 "vld1", Dt, "$Vd, $Rn!",
1348 "$Rn.addr = $wb", []> {
1349 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1350 let Inst{4} = Rn{4};
1351 let DecoderMethod = "DecodeVLD1DupInstruction";
1352 let AsmMatchConverter = "cvtVLDwbFixed";
1354 def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1355 (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
1356 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1357 "vld1", Dt, "$Vd, $Rn, $Rm",
1358 "$Rn.addr = $wb", []> {
1359 let Inst{4} = Rn{4};
1360 let DecoderMethod = "DecodeVLD1DupInstruction";
1361 let AsmMatchConverter = "cvtVLDwbRegister";
1365 defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
1366 defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
1367 defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
1369 defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
1370 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
1371 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
1373 def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
1374 def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
1375 def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
1376 def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
1377 def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
1378 def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
1380 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
1381 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
1382 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1383 (ins addrmode6dup:$Rn), IIC_VLD2dup,
1384 "vld2", Dt, "$Vd, $Rn", "", []> {
1386 let Inst{4} = Rn{4};
1387 let DecoderMethod = "DecodeVLD2DupInstruction";
1390 def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>;
1391 def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
1392 def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
1394 def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
1395 def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
1396 def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
1398 // ...with double-spaced registers (not used for codegen):
1399 def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>;
1400 def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>;
1401 def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>;
1403 // ...with address register writeback:
1404 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
1405 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1406 (outs VdTy:$Vd, GPR:$wb),
1407 (ins addrmode6dup:$Rn), IIC_VLD2dupu,
1408 "vld2", Dt, "$Vd, $Rn!",
1409 "$Rn.addr = $wb", []> {
1410 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1411 let Inst{4} = Rn{4};
1412 let DecoderMethod = "DecodeVLD2DupInstruction";
1413 let AsmMatchConverter = "cvtVLDwbFixed";
1415 def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1416 (outs VdTy:$Vd, GPR:$wb),
1417 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1418 "vld2", Dt, "$Vd, $Rn, $Rm",
1419 "$Rn.addr = $wb", []> {
1420 let Inst{4} = Rn{4};
1421 let DecoderMethod = "DecodeVLD2DupInstruction";
1422 let AsmMatchConverter = "cvtVLDwbRegister";
1426 defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>;
1427 defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>;
1428 defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>;
1430 defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>;
1431 defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
1432 defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
1434 def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
1435 def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
1436 def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
1437 def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
1438 def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
1439 def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
1441 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
1442 class VLD3DUP<bits<4> op7_4, string Dt>
1443 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1444 (ins addrmode6dup:$Rn), IIC_VLD3dup,
1445 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
1448 let DecoderMethod = "DecodeVLD3DupInstruction";
1451 def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
1452 def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1453 def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1455 def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1456 def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1457 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
1459 // ...with double-spaced registers (not used for codegen):
1460 def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
1461 def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1462 def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1464 // ...with address register writeback:
1465 class VLD3DUPWB<bits<4> op7_4, string Dt>
1466 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1467 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1468 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1469 "$Rn.addr = $wb", []> {
1471 let DecoderMethod = "DecodeVLD3DupInstruction";
1474 def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
1475 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
1476 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
1478 def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
1479 def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
1480 def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
1482 def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1483 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1484 def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
1486 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
1487 class VLD4DUP<bits<4> op7_4, string Dt>
1488 : NLdSt<1, 0b10, 0b1111, op7_4,
1489 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1490 (ins addrmode6dup:$Rn), IIC_VLD4dup,
1491 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1493 let Inst{4} = Rn{4};
1494 let DecoderMethod = "DecodeVLD4DupInstruction";
1497 def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">;
1498 def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1499 def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1501 def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1502 def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1503 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
1505 // ...with double-spaced registers (not used for codegen):
1506 def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
1507 def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1508 def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1510 // ...with address register writeback:
1511 class VLD4DUPWB<bits<4> op7_4, string Dt>
1512 : NLdSt<1, 0b10, 0b1111, op7_4,
1513 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1514 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1515 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1516 "$Rn.addr = $wb", []> {
1517 let Inst{4} = Rn{4};
1518 let DecoderMethod = "DecodeVLD4DupInstruction";
1521 def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">;
1522 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1523 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1525 def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">;
1526 def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1527 def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1529 def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1530 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1531 def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
1533 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
1535 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
1537 // Classes for VST* pseudo-instructions with multi-register operands.
1538 // These are expanded to real instructions after register allocation.
1539 class VSTQPseudo<InstrItinClass itin>
1540 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1541 class VSTQWBPseudo<InstrItinClass itin>
1542 : PseudoNLdSt<(outs GPR:$wb),
1543 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1544 "$addr.addr = $wb">;
1545 class VSTQWBfixedPseudo<InstrItinClass itin>
1546 : PseudoNLdSt<(outs GPR:$wb),
1547 (ins addrmode6:$addr, QPR:$src), itin,
1548 "$addr.addr = $wb">;
1549 class VSTQWBregisterPseudo<InstrItinClass itin>
1550 : PseudoNLdSt<(outs GPR:$wb),
1551 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1552 "$addr.addr = $wb">;
1553 class VSTQQPseudo<InstrItinClass itin>
1554 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1555 class VSTQQWBPseudo<InstrItinClass itin>
1556 : PseudoNLdSt<(outs GPR:$wb),
1557 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1558 "$addr.addr = $wb">;
1559 class VSTQQWBfixedPseudo<InstrItinClass itin>
1560 : PseudoNLdSt<(outs GPR:$wb),
1561 (ins addrmode6:$addr, QQPR:$src), itin,
1562 "$addr.addr = $wb">;
1563 class VSTQQWBregisterPseudo<InstrItinClass itin>
1564 : PseudoNLdSt<(outs GPR:$wb),
1565 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1566 "$addr.addr = $wb">;
1568 class VSTQQQQPseudo<InstrItinClass itin>
1569 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1570 class VSTQQQQWBPseudo<InstrItinClass itin>
1571 : PseudoNLdSt<(outs GPR:$wb),
1572 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1573 "$addr.addr = $wb">;
1575 // VST1 : Vector Store (multiple single elements)
1576 class VST1D<bits<4> op7_4, string Dt>
1577 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
1578 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
1580 let Inst{4} = Rn{4};
1581 let DecoderMethod = "DecodeVSTInstruction";
1583 class VST1Q<bits<4> op7_4, string Dt>
1584 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
1585 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
1587 let Inst{5-4} = Rn{5-4};
1588 let DecoderMethod = "DecodeVSTInstruction";
1591 def VST1d8 : VST1D<{0,0,0,?}, "8">;
1592 def VST1d16 : VST1D<{0,1,0,?}, "16">;
1593 def VST1d32 : VST1D<{1,0,0,?}, "32">;
1594 def VST1d64 : VST1D<{1,1,0,?}, "64">;
1596 def VST1q8 : VST1Q<{0,0,?,?}, "8">;
1597 def VST1q16 : VST1Q<{0,1,?,?}, "16">;
1598 def VST1q32 : VST1Q<{1,0,?,?}, "32">;
1599 def VST1q64 : VST1Q<{1,1,?,?}, "64">;
1601 // ...with address register writeback:
1602 multiclass VST1DWB<bits<4> op7_4, string Dt> {
1603 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1604 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1605 "vst1", Dt, "$Vd, $Rn!",
1606 "$Rn.addr = $wb", []> {
1607 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1608 let Inst{4} = Rn{4};
1609 let DecoderMethod = "DecodeVSTInstruction";
1610 let AsmMatchConverter = "cvtVSTwbFixed";
1612 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1613 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1615 "vst1", Dt, "$Vd, $Rn, $Rm",
1616 "$Rn.addr = $wb", []> {
1617 let Inst{4} = Rn{4};
1618 let DecoderMethod = "DecodeVSTInstruction";
1619 let AsmMatchConverter = "cvtVSTwbRegister";
1622 multiclass VST1QWB<bits<4> op7_4, string Dt> {
1623 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1624 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1625 "vst1", Dt, "$Vd, $Rn!",
1626 "$Rn.addr = $wb", []> {
1627 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1628 let Inst{5-4} = Rn{5-4};
1629 let DecoderMethod = "DecodeVSTInstruction";
1630 let AsmMatchConverter = "cvtVSTwbFixed";
1632 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1633 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1635 "vst1", Dt, "$Vd, $Rn, $Rm",
1636 "$Rn.addr = $wb", []> {
1637 let Inst{5-4} = Rn{5-4};
1638 let DecoderMethod = "DecodeVSTInstruction";
1639 let AsmMatchConverter = "cvtVSTwbRegister";
1643 defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
1644 defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
1645 defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
1646 defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
1648 defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
1649 defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
1650 defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
1651 defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
1653 // ...with 3 registers
1654 class VST1D3<bits<4> op7_4, string Dt>
1655 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1656 (ins addrmode6:$Rn, VecListThreeD:$Vd),
1657 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
1659 let Inst{4} = Rn{4};
1660 let DecoderMethod = "DecodeVSTInstruction";
1662 multiclass VST1D3WB<bits<4> op7_4, string Dt> {
1663 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1664 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1665 "vst1", Dt, "$Vd, $Rn!",
1666 "$Rn.addr = $wb", []> {
1667 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1668 let Inst{5-4} = Rn{5-4};
1669 let DecoderMethod = "DecodeVSTInstruction";
1670 let AsmMatchConverter = "cvtVSTwbFixed";
1672 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1673 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1675 "vst1", Dt, "$Vd, $Rn, $Rm",
1676 "$Rn.addr = $wb", []> {
1677 let Inst{5-4} = Rn{5-4};
1678 let DecoderMethod = "DecodeVSTInstruction";
1679 let AsmMatchConverter = "cvtVSTwbRegister";
1683 def VST1d8T : VST1D3<{0,0,0,?}, "8">;
1684 def VST1d16T : VST1D3<{0,1,0,?}, "16">;
1685 def VST1d32T : VST1D3<{1,0,0,?}, "32">;
1686 def VST1d64T : VST1D3<{1,1,0,?}, "64">;
1688 defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
1689 defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
1690 defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
1691 defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
1693 def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
1694 def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
1695 def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
1697 // ...with 4 registers
1698 class VST1D4<bits<4> op7_4, string Dt>
1699 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1700 (ins addrmode6:$Rn, VecListFourD:$Vd),
1701 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1704 let Inst{5-4} = Rn{5-4};
1705 let DecoderMethod = "DecodeVSTInstruction";
1707 multiclass VST1D4WB<bits<4> op7_4, string Dt> {
1708 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1709 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1710 "vst1", Dt, "$Vd, $Rn!",
1711 "$Rn.addr = $wb", []> {
1712 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1713 let Inst{5-4} = Rn{5-4};
1714 let DecoderMethod = "DecodeVSTInstruction";
1715 let AsmMatchConverter = "cvtVSTwbFixed";
1717 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1718 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1720 "vst1", Dt, "$Vd, $Rn, $Rm",
1721 "$Rn.addr = $wb", []> {
1722 let Inst{5-4} = Rn{5-4};
1723 let DecoderMethod = "DecodeVSTInstruction";
1724 let AsmMatchConverter = "cvtVSTwbRegister";
1728 def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
1729 def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
1730 def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
1731 def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
1733 defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
1734 defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
1735 defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
1736 defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
1738 def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
1739 def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
1740 def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
1742 // VST2 : Vector Store (multiple 2-element structures)
1743 class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1744 InstrItinClass itin>
1745 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
1746 itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1748 let Inst{5-4} = Rn{5-4};
1749 let DecoderMethod = "DecodeVSTInstruction";
1752 def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
1753 def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
1754 def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
1756 def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
1757 def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
1758 def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
1760 def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
1761 def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
1762 def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
1764 // ...with address register writeback:
1765 multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1766 RegisterOperand VdTy> {
1767 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1768 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
1769 "vst2", Dt, "$Vd, $Rn!",
1770 "$Rn.addr = $wb", []> {
1771 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1772 let Inst{5-4} = Rn{5-4};
1773 let DecoderMethod = "DecodeVSTInstruction";
1774 let AsmMatchConverter = "cvtVSTwbFixed";
1776 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1777 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1778 "vst2", Dt, "$Vd, $Rn, $Rm",
1779 "$Rn.addr = $wb", []> {
1780 let Inst{5-4} = Rn{5-4};
1781 let DecoderMethod = "DecodeVSTInstruction";
1782 let AsmMatchConverter = "cvtVSTwbRegister";
1785 multiclass VST2QWB<bits<4> op7_4, string Dt> {
1786 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1787 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1788 "vst2", Dt, "$Vd, $Rn!",
1789 "$Rn.addr = $wb", []> {
1790 let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1791 let Inst{5-4} = Rn{5-4};
1792 let DecoderMethod = "DecodeVSTInstruction";
1793 let AsmMatchConverter = "cvtVSTwbFixed";
1795 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1796 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1798 "vst2", Dt, "$Vd, $Rn, $Rm",
1799 "$Rn.addr = $wb", []> {
1800 let Inst{5-4} = Rn{5-4};
1801 let DecoderMethod = "DecodeVSTInstruction";
1802 let AsmMatchConverter = "cvtVSTwbRegister";
1806 defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
1807 defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
1808 defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
1810 defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
1811 defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
1812 defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
1814 def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1815 def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1816 def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
1817 def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1818 def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1819 def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
1821 // ...with double-spaced registers
1822 def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>;
1823 def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
1824 def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
1825 defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
1826 defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
1827 defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
1829 // VST3 : Vector Store (multiple 3-element structures)
1830 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1831 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1832 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1833 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
1835 let Inst{4} = Rn{4};
1836 let DecoderMethod = "DecodeVSTInstruction";
1839 def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">;
1840 def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">;
1841 def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">;
1843 def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
1844 def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
1845 def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
1847 // ...with address register writeback:
1848 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1849 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1850 (ins addrmode6:$Rn, am6offset:$Rm,
1851 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1852 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1853 "$Rn.addr = $wb", []> {
1854 let Inst{4} = Rn{4};
1855 let DecoderMethod = "DecodeVSTInstruction";
1858 def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">;
1859 def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
1860 def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
1862 def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1863 def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1864 def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
1866 // ...with double-spaced registers:
1867 def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">;
1868 def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">;
1869 def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">;
1870 def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">;
1871 def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
1872 def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
1874 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1875 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1876 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1878 // ...alternate versions to be allocated odd register numbers:
1879 def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1880 def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1881 def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>;
1883 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1884 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1885 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
1887 // VST4 : Vector Store (multiple 4-element structures)
1888 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
1889 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1890 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
1891 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
1894 let Inst{5-4} = Rn{5-4};
1895 let DecoderMethod = "DecodeVSTInstruction";
1898 def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">;
1899 def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">;
1900 def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">;
1902 def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
1903 def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
1904 def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
1906 // ...with address register writeback:
1907 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1908 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1909 (ins addrmode6:$Rn, am6offset:$Rm,
1910 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
1911 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
1912 "$Rn.addr = $wb", []> {
1913 let Inst{5-4} = Rn{5-4};
1914 let DecoderMethod = "DecodeVSTInstruction";
1917 def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">;
1918 def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
1919 def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
1921 def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1922 def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1923 def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
1925 // ...with double-spaced registers:
1926 def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">;
1927 def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">;
1928 def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">;
1929 def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">;
1930 def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
1931 def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
1933 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1934 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1935 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1937 // ...alternate versions to be allocated odd register numbers:
1938 def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1939 def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1940 def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>;
1942 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1943 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1944 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
1946 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
1948 // Classes for VST*LN pseudo-instructions with multi-register operands.
1949 // These are expanded to real instructions after register allocation.
1950 class VSTQLNPseudo<InstrItinClass itin>
1951 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
1953 class VSTQLNWBPseudo<InstrItinClass itin>
1954 : PseudoNLdSt<(outs GPR:$wb),
1955 (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
1956 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1957 class VSTQQLNPseudo<InstrItinClass itin>
1958 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
1960 class VSTQQLNWBPseudo<InstrItinClass itin>
1961 : PseudoNLdSt<(outs GPR:$wb),
1962 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
1963 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1964 class VSTQQQQLNPseudo<InstrItinClass itin>
1965 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1967 class VSTQQQQLNWBPseudo<InstrItinClass itin>
1968 : PseudoNLdSt<(outs GPR:$wb),
1969 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1970 nohash_imm:$lane), itin, "$addr.addr = $wb">;
1972 // VST1LN : Vector Store (single element from one lane)
1973 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1974 PatFrag StoreOp, SDNode ExtractOp>
1975 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1976 (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
1977 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1978 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
1980 let DecoderMethod = "DecodeVST1LN";
1982 class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1983 PatFrag StoreOp, SDNode ExtractOp>
1984 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
1985 (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
1986 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
1987 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
1989 let DecoderMethod = "DecodeVST1LN";
1991 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
1992 : VSTQLNPseudo<IIC_VST1ln> {
1993 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
1997 def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
1999 let Inst{7-5} = lane{2-0};
2001 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2003 let Inst{7-6} = lane{1-0};
2004 let Inst{4} = Rn{5};
2007 def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
2008 let Inst{7} = lane{0};
2009 let Inst{5-4} = Rn{5-4};
2012 def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
2013 def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
2014 def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2016 def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2017 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2018 def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2019 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2021 // ...with address register writeback:
2022 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2023 PatFrag StoreOp, SDNode ExtractOp>
2024 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2025 (ins addrmode6:$Rn, am6offset:$Rm,
2026 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2027 "\\{$Vd[$lane]\\}, $Rn$Rm",
2029 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2030 addrmode6:$Rn, am6offset:$Rm))]> {
2031 let DecoderMethod = "DecodeVST1LN";
2033 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2034 : VSTQLNWBPseudo<IIC_VST1lnu> {
2035 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2036 addrmode6:$addr, am6offset:$offset))];
2039 def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2041 let Inst{7-5} = lane{2-0};
2043 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2045 let Inst{7-6} = lane{1-0};
2046 let Inst{4} = Rn{5};
2048 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2050 let Inst{7} = lane{0};
2051 let Inst{5-4} = Rn{5-4};
2054 def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
2055 def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
2056 def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2058 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
2060 // VST2LN : Vector Store (single 2-element structure from one lane)
2061 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2062 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2063 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2064 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2067 let Inst{4} = Rn{4};
2068 let DecoderMethod = "DecodeVST2LN";
2071 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> {
2072 let Inst{7-5} = lane{2-0};
2074 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2075 let Inst{7-6} = lane{1-0};
2077 def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2078 let Inst{7} = lane{0};
2081 def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2082 def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2083 def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
2085 // ...with double-spaced registers:
2086 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2087 let Inst{7-6} = lane{1-0};
2088 let Inst{4} = Rn{4};
2090 def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2091 let Inst{7} = lane{0};
2092 let Inst{4} = Rn{4};
2095 def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2096 def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
2098 // ...with address register writeback:
2099 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2100 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2101 (ins addrmode6:$Rn, am6offset:$Rm,
2102 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2103 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2104 "$Rn.addr = $wb", []> {
2105 let Inst{4} = Rn{4};
2106 let DecoderMethod = "DecodeVST2LN";
2109 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2110 let Inst{7-5} = lane{2-0};
2112 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2113 let Inst{7-6} = lane{1-0};
2115 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2116 let Inst{7} = lane{0};
2119 def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2120 def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2121 def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
2123 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2124 let Inst{7-6} = lane{1-0};
2126 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2127 let Inst{7} = lane{0};
2130 def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2131 def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
2133 // VST3LN : Vector Store (single 3-element structure from one lane)
2134 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2135 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2136 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2137 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2138 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
2140 let DecoderMethod = "DecodeVST3LN";
2143 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> {
2144 let Inst{7-5} = lane{2-0};
2146 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2147 let Inst{7-6} = lane{1-0};
2149 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2150 let Inst{7} = lane{0};
2153 def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2154 def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2155 def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
2157 // ...with double-spaced registers:
2158 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2159 let Inst{7-6} = lane{1-0};
2161 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2162 let Inst{7} = lane{0};
2165 def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2166 def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2168 // ...with address register writeback:
2169 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2170 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2171 (ins addrmode6:$Rn, am6offset:$Rm,
2172 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2173 IIC_VST3lnu, "vst3", Dt,
2174 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2175 "$Rn.addr = $wb", []> {
2176 let DecoderMethod = "DecodeVST3LN";
2179 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2180 let Inst{7-5} = lane{2-0};
2182 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2183 let Inst{7-6} = lane{1-0};
2185 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2186 let Inst{7} = lane{0};
2189 def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2190 def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2191 def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
2193 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2194 let Inst{7-6} = lane{1-0};
2196 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2197 let Inst{7} = lane{0};
2200 def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2201 def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
2203 // VST4LN : Vector Store (single 4-element structure from one lane)
2204 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2205 : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2206 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2207 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2208 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2211 let Inst{4} = Rn{4};
2212 let DecoderMethod = "DecodeVST4LN";
2215 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> {
2216 let Inst{7-5} = lane{2-0};
2218 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2219 let Inst{7-6} = lane{1-0};
2221 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2222 let Inst{7} = lane{0};
2223 let Inst{5} = Rn{5};
2226 def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2227 def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2228 def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
2230 // ...with double-spaced registers:
2231 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2232 let Inst{7-6} = lane{1-0};
2234 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2235 let Inst{7} = lane{0};
2236 let Inst{5} = Rn{5};
2239 def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2240 def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
2242 // ...with address register writeback:
2243 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2244 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2245 (ins addrmode6:$Rn, am6offset:$Rm,
2246 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2247 IIC_VST4lnu, "vst4", Dt,
2248 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2249 "$Rn.addr = $wb", []> {
2250 let Inst{4} = Rn{4};
2251 let DecoderMethod = "DecodeVST4LN";
2254 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2255 let Inst{7-5} = lane{2-0};
2257 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2258 let Inst{7-6} = lane{1-0};
2260 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2261 let Inst{7} = lane{0};
2262 let Inst{5} = Rn{5};
2265 def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2266 def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2267 def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
2269 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2270 let Inst{7-6} = lane{1-0};
2272 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2273 let Inst{7} = lane{0};
2274 let Inst{5} = Rn{5};
2277 def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2278 def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
2280 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
2283 //===----------------------------------------------------------------------===//
2284 // NEON pattern fragments
2285 //===----------------------------------------------------------------------===//
2287 // Extract D sub-registers of Q registers.
2288 def DSubReg_i8_reg : SDNodeXForm<imm, [{
2289 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2290 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
2292 def DSubReg_i16_reg : SDNodeXForm<imm, [{
2293 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2294 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
2296 def DSubReg_i32_reg : SDNodeXForm<imm, [{
2297 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2298 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
2300 def DSubReg_f64_reg : SDNodeXForm<imm, [{
2301 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
2302 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
2305 // Extract S sub-registers of Q/D registers.
2306 def SSubReg_f32_reg : SDNodeXForm<imm, [{
2307 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
2308 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
2311 // Translate lane numbers from Q registers to D subregs.
2312 def SubReg_i8_lane : SDNodeXForm<imm, [{
2313 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
2315 def SubReg_i16_lane : SDNodeXForm<imm, [{
2316 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
2318 def SubReg_i32_lane : SDNodeXForm<imm, [{
2319 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
2322 //===----------------------------------------------------------------------===//
2323 // Instruction Classes
2324 //===----------------------------------------------------------------------===//
2326 // Basic 2-register operations: double- and quad-register.
2327 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2328 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2329 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2330 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2331 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2332 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2333 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2334 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2335 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2336 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2337 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2338 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2340 // Basic 2-register intrinsics, both double- and quad-register.
2341 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2342 bits<2> op17_16, bits<5> op11_7, bit op4,
2343 InstrItinClass itin, string OpcodeStr, string Dt,
2344 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2345 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2346 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2347 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2348 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2349 bits<2> op17_16, bits<5> op11_7, bit op4,
2350 InstrItinClass itin, string OpcodeStr, string Dt,
2351 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2352 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2353 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2354 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2356 // Narrow 2-register operations.
2357 class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2358 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2359 InstrItinClass itin, string OpcodeStr, string Dt,
2360 ValueType TyD, ValueType TyQ, SDNode OpNode>
2361 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2362 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2363 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2365 // Narrow 2-register intrinsics.
2366 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2367 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2368 InstrItinClass itin, string OpcodeStr, string Dt,
2369 ValueType TyD, ValueType TyQ, Intrinsic IntOp>
2370 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2371 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2372 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2374 // Long 2-register operations (currently only used for VMOVL).
2375 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2376 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2377 InstrItinClass itin, string OpcodeStr, string Dt,
2378 ValueType TyQ, ValueType TyD, SDNode OpNode>
2379 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2380 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2381 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2383 // Long 2-register intrinsics.
2384 class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2385 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2386 InstrItinClass itin, string OpcodeStr, string Dt,
2387 ValueType TyQ, ValueType TyD, Intrinsic IntOp>
2388 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2389 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2390 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2392 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2393 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2394 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2395 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2396 OpcodeStr, Dt, "$Vd, $Vm",
2397 "$src1 = $Vd, $src2 = $Vm", []>;
2398 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2399 InstrItinClass itin, string OpcodeStr, string Dt>
2400 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2401 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2402 "$src1 = $Vd, $src2 = $Vm", []>;
2404 // Basic 3-register operations: double- and quad-register.
2405 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2406 InstrItinClass itin, string OpcodeStr, string Dt,
2407 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2408 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2409 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2410 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2411 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2412 let isCommutable = Commutable;
2414 // Same as N3VD but no data type.
2415 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2416 InstrItinClass itin, string OpcodeStr,
2417 ValueType ResTy, ValueType OpTy,
2418 SDNode OpNode, bit Commutable>
2419 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2420 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2421 OpcodeStr, "$Vd, $Vn, $Vm", "",
2422 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2423 let isCommutable = Commutable;
2426 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2427 InstrItinClass itin, string OpcodeStr, string Dt,
2428 ValueType Ty, SDNode ShOp>
2429 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2430 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2431 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2433 (Ty (ShOp (Ty DPR:$Vn),
2434 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2435 let isCommutable = 0;
2437 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2438 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2439 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2440 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2441 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2443 (Ty (ShOp (Ty DPR:$Vn),
2444 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2445 let isCommutable = 0;
2448 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2449 InstrItinClass itin, string OpcodeStr, string Dt,
2450 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2451 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2452 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2453 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2454 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2455 let isCommutable = Commutable;
2457 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2458 InstrItinClass itin, string OpcodeStr,
2459 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2460 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2461 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2462 OpcodeStr, "$Vd, $Vn, $Vm", "",
2463 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2464 let isCommutable = Commutable;
2466 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2467 InstrItinClass itin, string OpcodeStr, string Dt,
2468 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2469 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2470 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2471 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2472 [(set (ResTy QPR:$Vd),
2473 (ResTy (ShOp (ResTy QPR:$Vn),
2474 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2476 let isCommutable = 0;
2478 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2479 ValueType ResTy, ValueType OpTy, SDNode ShOp>
2480 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2481 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2482 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2483 [(set (ResTy QPR:$Vd),
2484 (ResTy (ShOp (ResTy QPR:$Vn),
2485 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2487 let isCommutable = 0;
2490 // Basic 3-register intrinsics, both double- and quad-register.
2491 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2492 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2493 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
2494 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2495 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2496 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2497 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2498 let isCommutable = Commutable;
2500 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2501 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
2502 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2503 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2504 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2506 (Ty (IntOp (Ty DPR:$Vn),
2507 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2509 let isCommutable = 0;
2511 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2512 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
2513 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2514 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2515 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2517 (Ty (IntOp (Ty DPR:$Vn),
2518 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2519 let isCommutable = 0;
2521 class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2522 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2523 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2524 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2525 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2526 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2527 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2528 let isCommutable = 0;
2531 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2532 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2533 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
2534 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2535 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2536 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2537 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2538 let isCommutable = Commutable;
2540 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2541 string OpcodeStr, string Dt,
2542 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2543 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2544 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2545 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2546 [(set (ResTy QPR:$Vd),
2547 (ResTy (IntOp (ResTy QPR:$Vn),
2548 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2550 let isCommutable = 0;
2552 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2553 string OpcodeStr, string Dt,
2554 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2555 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2556 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2557 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2558 [(set (ResTy QPR:$Vd),
2559 (ResTy (IntOp (ResTy QPR:$Vn),
2560 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2562 let isCommutable = 0;
2564 class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2565 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2566 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2567 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2568 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2569 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2570 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2571 let isCommutable = 0;
2574 // Multiply-Add/Sub operations: double- and quad-register.
2575 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2576 InstrItinClass itin, string OpcodeStr, string Dt,
2577 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2578 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2579 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2580 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2581 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2582 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2584 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2585 string OpcodeStr, string Dt,
2586 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2587 : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2589 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2591 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2593 (Ty (ShOp (Ty DPR:$src1),
2595 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
2597 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2598 string OpcodeStr, string Dt,
2599 ValueType Ty, SDNode MulOp, SDNode ShOp>
2600 : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2602 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2604 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2606 (Ty (ShOp (Ty DPR:$src1),
2608 (Ty (NEONvduplane (Ty DPR_8:$Vm),
2611 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2612 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2613 SDPatternOperator MulOp, SDPatternOperator OpNode>
2614 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2615 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2616 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2617 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2618 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2619 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2620 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2621 SDPatternOperator MulOp, SDPatternOperator ShOp>
2622 : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2624 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2626 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2627 [(set (ResTy QPR:$Vd),
2628 (ResTy (ShOp (ResTy QPR:$src1),
2629 (ResTy (MulOp QPR:$Vn,
2630 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2632 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2633 string OpcodeStr, string Dt,
2634 ValueType ResTy, ValueType OpTy,
2635 SDNode MulOp, SDNode ShOp>
2636 : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2638 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2640 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2641 [(set (ResTy QPR:$Vd),
2642 (ResTy (ShOp (ResTy QPR:$src1),
2643 (ResTy (MulOp QPR:$Vn,
2644 (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
2647 // Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2648 class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2649 InstrItinClass itin, string OpcodeStr, string Dt,
2650 ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2651 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2652 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2653 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2654 [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2655 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2656 class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2657 InstrItinClass itin, string OpcodeStr, string Dt,
2658 ValueType Ty, Intrinsic IntOp, SDNode OpNode>
2659 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2660 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2661 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2662 [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2663 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2665 // Neon 3-argument intrinsics, both double- and quad-register.
2666 // The destination register is also used as the first source operand register.
2667 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2668 InstrItinClass itin, string OpcodeStr, string Dt,
2669 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2670 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2671 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2672 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2673 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2674 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2675 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2676 InstrItinClass itin, string OpcodeStr, string Dt,
2677 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2678 : N3V<op24, op23, op21_20, op11_8, 1, op4,
2679 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2680 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2681 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2682 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2684 // Long Multiply-Add/Sub operations.
2685 class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2686 InstrItinClass itin, string OpcodeStr, string Dt,
2687 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2688 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2689 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2690 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2691 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2692 (TyQ (MulOp (TyD DPR:$Vn),
2693 (TyD DPR:$Vm)))))]>;
2694 class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2695 InstrItinClass itin, string OpcodeStr, string Dt,
2696 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2697 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2698 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2700 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2702 (OpNode (TyQ QPR:$src1),
2703 (TyQ (MulOp (TyD DPR:$Vn),
2704 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
2706 class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2707 InstrItinClass itin, string OpcodeStr, string Dt,
2708 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2709 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2710 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2712 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2714 (OpNode (TyQ QPR:$src1),
2715 (TyQ (MulOp (TyD DPR:$Vn),
2716 (TyD (NEONvduplane (TyD DPR_8:$Vm),
2719 // Long Intrinsic-Op vector operations with explicit extend (VABAL).
2720 class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2721 InstrItinClass itin, string OpcodeStr, string Dt,
2722 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2724 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2725 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2726 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2727 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2728 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2729 (TyD DPR:$Vm)))))))]>;
2731 // Neon Long 3-argument intrinsic. The destination register is
2732 // a quad-register and is also used as the first source operand register.
2733 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2734 InstrItinClass itin, string OpcodeStr, string Dt,
2735 ValueType TyQ, ValueType TyD, Intrinsic IntOp>
2736 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2737 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2738 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2740 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2741 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2742 string OpcodeStr, string Dt,
2743 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2744 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2746 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2748 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2749 [(set (ResTy QPR:$Vd),
2750 (ResTy (IntOp (ResTy QPR:$src1),
2752 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2754 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2755 InstrItinClass itin, string OpcodeStr, string Dt,
2756 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2757 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2759 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2761 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2762 [(set (ResTy QPR:$Vd),
2763 (ResTy (IntOp (ResTy QPR:$src1),
2765 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2768 // Narrowing 3-register intrinsics.
2769 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2770 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
2771 Intrinsic IntOp, bit Commutable>
2772 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2773 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
2774 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2775 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
2776 let isCommutable = Commutable;
2779 // Long 3-register operations.
2780 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2781 InstrItinClass itin, string OpcodeStr, string Dt,
2782 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
2783 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2784 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2785 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2786 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2787 let isCommutable = Commutable;
2789 class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2790 InstrItinClass itin, string OpcodeStr, string Dt,
2791 ValueType TyQ, ValueType TyD, SDNode OpNode>
2792 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2793 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2794 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2796 (TyQ (OpNode (TyD DPR:$Vn),
2797 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
2798 class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2799 InstrItinClass itin, string OpcodeStr, string Dt,
2800 ValueType TyQ, ValueType TyD, SDNode OpNode>
2801 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2802 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2803 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2805 (TyQ (OpNode (TyD DPR:$Vn),
2806 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
2808 // Long 3-register operations with explicitly extended operands.
2809 class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2810 InstrItinClass itin, string OpcodeStr, string Dt,
2811 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
2813 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2814 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2815 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2816 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
2817 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2818 let isCommutable = Commutable;
2821 // Long 3-register intrinsics with explicit extend (VABDL).
2822 class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2823 InstrItinClass itin, string OpcodeStr, string Dt,
2824 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
2826 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2827 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2828 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2829 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2830 (TyD DPR:$Vm))))))]> {
2831 let isCommutable = Commutable;
2834 // Long 3-register intrinsics.
2835 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2836 InstrItinClass itin, string OpcodeStr, string Dt,
2837 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
2838 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2839 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2840 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2841 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
2842 let isCommutable = Commutable;
2844 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2845 string OpcodeStr, string Dt,
2846 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2847 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2848 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2849 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2850 [(set (ResTy QPR:$Vd),
2851 (ResTy (IntOp (OpTy DPR:$Vn),
2852 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
2854 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2855 InstrItinClass itin, string OpcodeStr, string Dt,
2856 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2857 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2858 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2859 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2860 [(set (ResTy QPR:$Vd),
2861 (ResTy (IntOp (OpTy DPR:$Vn),
2862 (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
2865 // Wide 3-register operations.
2866 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2867 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
2868 SDNode OpNode, SDNode ExtOp, bit Commutable>
2869 : N3V<op24, op23, op21_20, op11_8, 0, op4,
2870 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
2871 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2872 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
2873 (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
2874 let isCommutable = Commutable;
2877 // Pairwise long 2-register intrinsics, both double- and quad-register.
2878 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2879 bits<2> op17_16, bits<5> op11_7, bit op4,
2880 string OpcodeStr, string Dt,
2881 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2882 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2883 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2884 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2885 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2886 bits<2> op17_16, bits<5> op11_7, bit op4,
2887 string OpcodeStr, string Dt,
2888 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2889 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2890 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
2891 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2893 // Pairwise long 2-register accumulate intrinsics,
2894 // both double- and quad-register.
2895 // The destination register is also used as the first source operand register.
2896 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2897 bits<2> op17_16, bits<5> op11_7, bit op4,
2898 string OpcodeStr, string Dt,
2899 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2900 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
2901 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
2902 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2903 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
2904 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2905 bits<2> op17_16, bits<5> op11_7, bit op4,
2906 string OpcodeStr, string Dt,
2907 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
2908 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
2909 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
2910 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
2911 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
2913 // Shift by immediate,
2914 // both double- and quad-register.
2915 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2916 Format f, InstrItinClass itin, Operand ImmTy,
2917 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2918 : N2VImm<op24, op23, op11_8, op7, 0, op4,
2919 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
2920 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2921 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
2922 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2923 Format f, InstrItinClass itin, Operand ImmTy,
2924 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
2925 : N2VImm<op24, op23, op11_8, op7, 1, op4,
2926 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
2927 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2928 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
2930 // Long shift by immediate.
2931 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2932 string OpcodeStr, string Dt,
2933 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2934 : N2VImm<op24, op23, op11_8, op7, op6, op4,
2935 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
2936 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2937 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
2938 (i32 imm:$SIMM))))]>;
2940 // Narrow shift by immediate.
2941 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
2942 InstrItinClass itin, string OpcodeStr, string Dt,
2943 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
2944 : N2VImm<op24, op23, op11_8, op7, op6, op4,
2945 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
2946 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2947 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
2948 (i32 imm:$SIMM))))]>;
2950 // Shift right by immediate and accumulate,
2951 // both double- and quad-register.
2952 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2953 Operand ImmTy, string OpcodeStr, string Dt,
2954 ValueType Ty, SDNode ShOp>
2955 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2956 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2957 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2958 [(set DPR:$Vd, (Ty (add DPR:$src1,
2959 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
2960 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2961 Operand ImmTy, string OpcodeStr, string Dt,
2962 ValueType Ty, SDNode ShOp>
2963 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2964 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
2965 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2966 [(set QPR:$Vd, (Ty (add QPR:$src1,
2967 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
2969 // Shift by immediate and insert,
2970 // both double- and quad-register.
2971 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2972 Operand ImmTy, Format f, string OpcodeStr, string Dt,
2973 ValueType Ty,SDNode ShOp>
2974 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
2975 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
2976 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2977 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
2978 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2979 Operand ImmTy, Format f, string OpcodeStr, string Dt,
2980 ValueType Ty,SDNode ShOp>
2981 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
2982 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
2983 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
2984 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
2986 // Convert, with fractional bits immediate,
2987 // both double- and quad-register.
2988 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2989 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2991 : N2VImm<op24, op23, op11_8, op7, 0, op4,
2992 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
2993 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
2994 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
2995 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
2996 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2998 : N2VImm<op24, op23, op11_8, op7, 1, op4,
2999 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3000 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3001 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3003 //===----------------------------------------------------------------------===//
3005 //===----------------------------------------------------------------------===//
3007 // Abbreviations used in multiclass suffixes:
3008 // Q = quarter int (8 bit) elements
3009 // H = half int (16 bit) elements
3010 // S = single int (32 bit) elements
3011 // D = double int (64 bit) elements
3013 // Neon 2-register vector operations and intrinsics.
3015 // Neon 2-register comparisons.
3016 // source operand element sizes of 8, 16 and 32 bits:
3017 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3018 bits<5> op11_7, bit op4, string opc, string Dt,
3019 string asm, SDNode OpNode> {
3020 // 64-bit vector types.
3021 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3022 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3023 opc, !strconcat(Dt, "8"), asm, "",
3024 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
3025 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3026 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3027 opc, !strconcat(Dt, "16"), asm, "",
3028 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
3029 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3030 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3031 opc, !strconcat(Dt, "32"), asm, "",
3032 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
3033 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3034 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3035 opc, "f32", asm, "",
3036 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
3037 let Inst{10} = 1; // overwrite F = 1
3040 // 128-bit vector types.
3041 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3042 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3043 opc, !strconcat(Dt, "8"), asm, "",
3044 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
3045 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3046 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3047 opc, !strconcat(Dt, "16"), asm, "",
3048 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
3049 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3050 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3051 opc, !strconcat(Dt, "32"), asm, "",
3052 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
3053 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3054 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3055 opc, "f32", asm, "",
3056 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
3057 let Inst{10} = 1; // overwrite F = 1
3062 // Neon 2-register vector intrinsics,
3063 // element sizes of 8, 16 and 32 bits:
3064 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3065 bits<5> op11_7, bit op4,
3066 InstrItinClass itinD, InstrItinClass itinQ,
3067 string OpcodeStr, string Dt, Intrinsic IntOp> {
3068 // 64-bit vector types.
3069 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3070 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3071 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3072 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3073 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3074 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3076 // 128-bit vector types.
3077 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3078 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3079 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3080 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3081 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3082 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3086 // Neon Narrowing 2-register vector operations,
3087 // source operand element sizes of 16, 32 and 64 bits:
3088 multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3089 bits<5> op11_7, bit op6, bit op4,
3090 InstrItinClass itin, string OpcodeStr, string Dt,
3092 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3093 itin, OpcodeStr, !strconcat(Dt, "16"),
3094 v8i8, v8i16, OpNode>;
3095 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3096 itin, OpcodeStr, !strconcat(Dt, "32"),
3097 v4i16, v4i32, OpNode>;
3098 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3099 itin, OpcodeStr, !strconcat(Dt, "64"),
3100 v2i32, v2i64, OpNode>;
3103 // Neon Narrowing 2-register vector intrinsics,
3104 // source operand element sizes of 16, 32 and 64 bits:
3105 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3106 bits<5> op11_7, bit op6, bit op4,
3107 InstrItinClass itin, string OpcodeStr, string Dt,
3109 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3110 itin, OpcodeStr, !strconcat(Dt, "16"),
3111 v8i8, v8i16, IntOp>;
3112 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3113 itin, OpcodeStr, !strconcat(Dt, "32"),
3114 v4i16, v4i32, IntOp>;
3115 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3116 itin, OpcodeStr, !strconcat(Dt, "64"),
3117 v2i32, v2i64, IntOp>;
3121 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3122 // source operand element sizes of 16, 32 and 64 bits:
3123 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3124 string OpcodeStr, string Dt, SDNode OpNode> {
3125 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3126 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3127 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3128 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3129 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3130 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3134 // Neon 3-register vector operations.
3136 // First with only element sizes of 8, 16 and 32 bits:
3137 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3138 InstrItinClass itinD16, InstrItinClass itinD32,
3139 InstrItinClass itinQ16, InstrItinClass itinQ32,
3140 string OpcodeStr, string Dt,
3141 SDNode OpNode, bit Commutable = 0> {
3142 // 64-bit vector types.
3143 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3144 OpcodeStr, !strconcat(Dt, "8"),
3145 v8i8, v8i8, OpNode, Commutable>;
3146 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3147 OpcodeStr, !strconcat(Dt, "16"),
3148 v4i16, v4i16, OpNode, Commutable>;
3149 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3150 OpcodeStr, !strconcat(Dt, "32"),
3151 v2i32, v2i32, OpNode, Commutable>;
3153 // 128-bit vector types.
3154 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3155 OpcodeStr, !strconcat(Dt, "8"),
3156 v16i8, v16i8, OpNode, Commutable>;
3157 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3158 OpcodeStr, !strconcat(Dt, "16"),
3159 v8i16, v8i16, OpNode, Commutable>;
3160 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3161 OpcodeStr, !strconcat(Dt, "32"),
3162 v4i32, v4i32, OpNode, Commutable>;
3165 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3166 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3167 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3168 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3169 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3170 v4i32, v2i32, ShOp>;
3173 // ....then also with element size 64 bits:
3174 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3175 InstrItinClass itinD, InstrItinClass itinQ,
3176 string OpcodeStr, string Dt,
3177 SDNode OpNode, bit Commutable = 0>
3178 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3179 OpcodeStr, Dt, OpNode, Commutable> {
3180 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3181 OpcodeStr, !strconcat(Dt, "64"),
3182 v1i64, v1i64, OpNode, Commutable>;
3183 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3184 OpcodeStr, !strconcat(Dt, "64"),
3185 v2i64, v2i64, OpNode, Commutable>;
3189 // Neon 3-register vector intrinsics.
3191 // First with only element sizes of 16 and 32 bits:
3192 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3193 InstrItinClass itinD16, InstrItinClass itinD32,
3194 InstrItinClass itinQ16, InstrItinClass itinQ32,
3195 string OpcodeStr, string Dt,
3196 Intrinsic IntOp, bit Commutable = 0> {
3197 // 64-bit vector types.
3198 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3199 OpcodeStr, !strconcat(Dt, "16"),
3200 v4i16, v4i16, IntOp, Commutable>;
3201 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3202 OpcodeStr, !strconcat(Dt, "32"),
3203 v2i32, v2i32, IntOp, Commutable>;
3205 // 128-bit vector types.
3206 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3207 OpcodeStr, !strconcat(Dt, "16"),
3208 v8i16, v8i16, IntOp, Commutable>;
3209 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3210 OpcodeStr, !strconcat(Dt, "32"),
3211 v4i32, v4i32, IntOp, Commutable>;
3213 multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3214 InstrItinClass itinD16, InstrItinClass itinD32,
3215 InstrItinClass itinQ16, InstrItinClass itinQ32,
3216 string OpcodeStr, string Dt,
3218 // 64-bit vector types.
3219 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3220 OpcodeStr, !strconcat(Dt, "16"),
3221 v4i16, v4i16, IntOp>;
3222 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3223 OpcodeStr, !strconcat(Dt, "32"),
3224 v2i32, v2i32, IntOp>;
3226 // 128-bit vector types.
3227 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3228 OpcodeStr, !strconcat(Dt, "16"),
3229 v8i16, v8i16, IntOp>;
3230 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3231 OpcodeStr, !strconcat(Dt, "32"),
3232 v4i32, v4i32, IntOp>;
3235 multiclass N3VIntSL_HS<bits<4> op11_8,
3236 InstrItinClass itinD16, InstrItinClass itinD32,
3237 InstrItinClass itinQ16, InstrItinClass itinQ32,
3238 string OpcodeStr, string Dt, Intrinsic IntOp> {
3239 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3240 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3241 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3242 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3243 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3244 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3245 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3246 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3249 // ....then also with element size of 8 bits:
3250 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3251 InstrItinClass itinD16, InstrItinClass itinD32,
3252 InstrItinClass itinQ16, InstrItinClass itinQ32,
3253 string OpcodeStr, string Dt,
3254 Intrinsic IntOp, bit Commutable = 0>
3255 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3256 OpcodeStr, Dt, IntOp, Commutable> {
3257 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3258 OpcodeStr, !strconcat(Dt, "8"),
3259 v8i8, v8i8, IntOp, Commutable>;
3260 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3261 OpcodeStr, !strconcat(Dt, "8"),
3262 v16i8, v16i8, IntOp, Commutable>;
3264 multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3265 InstrItinClass itinD16, InstrItinClass itinD32,
3266 InstrItinClass itinQ16, InstrItinClass itinQ32,
3267 string OpcodeStr, string Dt,
3269 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3270 OpcodeStr, Dt, IntOp> {
3271 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3272 OpcodeStr, !strconcat(Dt, "8"),
3274 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3275 OpcodeStr, !strconcat(Dt, "8"),
3276 v16i8, v16i8, IntOp>;
3280 // ....then also with element size of 64 bits:
3281 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3282 InstrItinClass itinD16, InstrItinClass itinD32,
3283 InstrItinClass itinQ16, InstrItinClass itinQ32,
3284 string OpcodeStr, string Dt,
3285 Intrinsic IntOp, bit Commutable = 0>
3286 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3287 OpcodeStr, Dt, IntOp, Commutable> {
3288 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3289 OpcodeStr, !strconcat(Dt, "64"),
3290 v1i64, v1i64, IntOp, Commutable>;
3291 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3292 OpcodeStr, !strconcat(Dt, "64"),
3293 v2i64, v2i64, IntOp, Commutable>;
3295 multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3296 InstrItinClass itinD16, InstrItinClass itinD32,
3297 InstrItinClass itinQ16, InstrItinClass itinQ32,
3298 string OpcodeStr, string Dt,
3300 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3301 OpcodeStr, Dt, IntOp> {
3302 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3303 OpcodeStr, !strconcat(Dt, "64"),
3304 v1i64, v1i64, IntOp>;
3305 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3306 OpcodeStr, !strconcat(Dt, "64"),
3307 v2i64, v2i64, IntOp>;
3310 // Neon Narrowing 3-register vector intrinsics,
3311 // source operand element sizes of 16, 32 and 64 bits:
3312 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3313 string OpcodeStr, string Dt,
3314 Intrinsic IntOp, bit Commutable = 0> {
3315 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
3316 OpcodeStr, !strconcat(Dt, "16"),
3317 v8i8, v8i16, IntOp, Commutable>;
3318 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3319 OpcodeStr, !strconcat(Dt, "32"),
3320 v4i16, v4i32, IntOp, Commutable>;
3321 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3322 OpcodeStr, !strconcat(Dt, "64"),
3323 v2i32, v2i64, IntOp, Commutable>;
3327 // Neon Long 3-register vector operations.
3329 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3330 InstrItinClass itin16, InstrItinClass itin32,
3331 string OpcodeStr, string Dt,
3332 SDNode OpNode, bit Commutable = 0> {
3333 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3334 OpcodeStr, !strconcat(Dt, "8"),
3335 v8i16, v8i8, OpNode, Commutable>;
3336 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3337 OpcodeStr, !strconcat(Dt, "16"),
3338 v4i32, v4i16, OpNode, Commutable>;
3339 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3340 OpcodeStr, !strconcat(Dt, "32"),
3341 v2i64, v2i32, OpNode, Commutable>;
3344 multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3345 InstrItinClass itin, string OpcodeStr, string Dt,
3347 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3348 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3349 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3350 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3353 multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3354 InstrItinClass itin16, InstrItinClass itin32,
3355 string OpcodeStr, string Dt,
3356 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3357 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3358 OpcodeStr, !strconcat(Dt, "8"),
3359 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3360 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3361 OpcodeStr, !strconcat(Dt, "16"),
3362 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3363 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3364 OpcodeStr, !strconcat(Dt, "32"),
3365 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3368 // Neon Long 3-register vector intrinsics.
3370 // First with only element sizes of 16 and 32 bits:
3371 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3372 InstrItinClass itin16, InstrItinClass itin32,
3373 string OpcodeStr, string Dt,
3374 Intrinsic IntOp, bit Commutable = 0> {
3375 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3376 OpcodeStr, !strconcat(Dt, "16"),
3377 v4i32, v4i16, IntOp, Commutable>;
3378 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3379 OpcodeStr, !strconcat(Dt, "32"),
3380 v2i64, v2i32, IntOp, Commutable>;
3383 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3384 InstrItinClass itin, string OpcodeStr, string Dt,
3386 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3387 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3388 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3389 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3392 // ....then also with element size of 8 bits:
3393 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3394 InstrItinClass itin16, InstrItinClass itin32,
3395 string OpcodeStr, string Dt,
3396 Intrinsic IntOp, bit Commutable = 0>
3397 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3398 IntOp, Commutable> {
3399 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3400 OpcodeStr, !strconcat(Dt, "8"),
3401 v8i16, v8i8, IntOp, Commutable>;
3404 // ....with explicit extend (VABDL).
3405 multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3406 InstrItinClass itin, string OpcodeStr, string Dt,
3407 Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
3408 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3409 OpcodeStr, !strconcat(Dt, "8"),
3410 v8i16, v8i8, IntOp, ExtOp, Commutable>;
3411 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3412 OpcodeStr, !strconcat(Dt, "16"),
3413 v4i32, v4i16, IntOp, ExtOp, Commutable>;
3414 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3415 OpcodeStr, !strconcat(Dt, "32"),
3416 v2i64, v2i32, IntOp, ExtOp, Commutable>;
3420 // Neon Wide 3-register vector intrinsics,
3421 // source operand element sizes of 8, 16 and 32 bits:
3422 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3423 string OpcodeStr, string Dt,
3424 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
3425 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3426 OpcodeStr, !strconcat(Dt, "8"),
3427 v8i16, v8i8, OpNode, ExtOp, Commutable>;
3428 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3429 OpcodeStr, !strconcat(Dt, "16"),
3430 v4i32, v4i16, OpNode, ExtOp, Commutable>;
3431 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3432 OpcodeStr, !strconcat(Dt, "32"),
3433 v2i64, v2i32, OpNode, ExtOp, Commutable>;
3437 // Neon Multiply-Op vector operations,
3438 // element sizes of 8, 16 and 32 bits:
3439 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3440 InstrItinClass itinD16, InstrItinClass itinD32,
3441 InstrItinClass itinQ16, InstrItinClass itinQ32,
3442 string OpcodeStr, string Dt, SDNode OpNode> {
3443 // 64-bit vector types.
3444 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3445 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3446 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3447 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3448 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3449 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3451 // 128-bit vector types.
3452 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3453 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3454 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3455 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3456 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3457 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3460 multiclass N3VMulOpSL_HS<bits<4> op11_8,
3461 InstrItinClass itinD16, InstrItinClass itinD32,
3462 InstrItinClass itinQ16, InstrItinClass itinQ32,
3463 string OpcodeStr, string Dt, SDNode ShOp> {
3464 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3465 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3466 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3467 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3468 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3469 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3471 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3472 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3476 // Neon Intrinsic-Op vector operations,
3477 // element sizes of 8, 16 and 32 bits:
3478 multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3479 InstrItinClass itinD, InstrItinClass itinQ,
3480 string OpcodeStr, string Dt, Intrinsic IntOp,
3482 // 64-bit vector types.
3483 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3484 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3485 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3486 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3487 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3488 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3490 // 128-bit vector types.
3491 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3492 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3493 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3494 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3495 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3496 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3499 // Neon 3-argument intrinsics,
3500 // element sizes of 8, 16 and 32 bits:
3501 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3502 InstrItinClass itinD, InstrItinClass itinQ,
3503 string OpcodeStr, string Dt, Intrinsic IntOp> {
3504 // 64-bit vector types.
3505 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
3506 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3507 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
3508 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3509 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
3510 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3512 // 128-bit vector types.
3513 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
3514 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3515 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
3516 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3517 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
3518 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3522 // Neon Long Multiply-Op vector operations,
3523 // element sizes of 8, 16 and 32 bits:
3524 multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3525 InstrItinClass itin16, InstrItinClass itin32,
3526 string OpcodeStr, string Dt, SDNode MulOp,
3528 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3529 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3530 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3531 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3532 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3533 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3536 multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3537 string Dt, SDNode MulOp, SDNode OpNode> {
3538 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3539 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3540 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3541 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3545 // Neon Long 3-argument intrinsics.
3547 // First with only element sizes of 16 and 32 bits:
3548 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3549 InstrItinClass itin16, InstrItinClass itin32,
3550 string OpcodeStr, string Dt, Intrinsic IntOp> {
3551 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3552 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3553 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3554 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3557 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3558 string OpcodeStr, string Dt, Intrinsic IntOp> {
3559 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3560 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3561 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3562 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3565 // ....then also with element size of 8 bits:
3566 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3567 InstrItinClass itin16, InstrItinClass itin32,
3568 string OpcodeStr, string Dt, Intrinsic IntOp>
3569 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3570 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3571 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3574 // ....with explicit extend (VABAL).
3575 multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3576 InstrItinClass itin, string OpcodeStr, string Dt,
3577 Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
3578 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3579 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3580 IntOp, ExtOp, OpNode>;
3581 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3582 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3583 IntOp, ExtOp, OpNode>;
3584 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3585 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3586 IntOp, ExtOp, OpNode>;
3590 // Neon Pairwise long 2-register intrinsics,
3591 // element sizes of 8, 16 and 32 bits:
3592 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3593 bits<5> op11_7, bit op4,
3594 string OpcodeStr, string Dt, Intrinsic IntOp> {
3595 // 64-bit vector types.
3596 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3597 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3598 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3599 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3600 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3601 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3603 // 128-bit vector types.
3604 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3605 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3606 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3607 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3608 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3609 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3613 // Neon Pairwise long 2-register accumulate intrinsics,
3614 // element sizes of 8, 16 and 32 bits:
3615 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3616 bits<5> op11_7, bit op4,
3617 string OpcodeStr, string Dt, Intrinsic IntOp> {
3618 // 64-bit vector types.
3619 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3620 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3621 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3622 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3623 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3624 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3626 // 128-bit vector types.
3627 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3628 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3629 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3630 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3631 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3632 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3636 // Neon 2-register vector shift by immediate,
3637 // with f of either N2RegVShLFrm or N2RegVShRFrm
3638 // element sizes of 8, 16, 32 and 64 bits:
3639 multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3640 InstrItinClass itin, string OpcodeStr, string Dt,
3642 // 64-bit vector types.
3643 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3644 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3645 let Inst{21-19} = 0b001; // imm6 = 001xxx
3647 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3648 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3649 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3651 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3652 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3653 let Inst{21} = 0b1; // imm6 = 1xxxxx
3655 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3656 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3659 // 128-bit vector types.
3660 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3661 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3662 let Inst{21-19} = 0b001; // imm6 = 001xxx
3664 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3665 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3666 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3668 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3669 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3670 let Inst{21} = 0b1; // imm6 = 1xxxxx
3672 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3673 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3676 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3677 InstrItinClass itin, string OpcodeStr, string Dt,
3679 // 64-bit vector types.
3680 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3681 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3682 let Inst{21-19} = 0b001; // imm6 = 001xxx
3684 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3685 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3686 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3688 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3689 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3690 let Inst{21} = 0b1; // imm6 = 1xxxxx
3692 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3693 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3696 // 128-bit vector types.
3697 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
3698 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3699 let Inst{21-19} = 0b001; // imm6 = 001xxx
3701 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
3702 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3703 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3705 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
3706 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
3707 let Inst{21} = 0b1; // imm6 = 1xxxxx
3709 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
3710 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
3714 // Neon Shift-Accumulate vector operations,
3715 // element sizes of 8, 16, 32 and 64 bits:
3716 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3717 string OpcodeStr, string Dt, SDNode ShOp> {
3718 // 64-bit vector types.
3719 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3720 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
3721 let Inst{21-19} = 0b001; // imm6 = 001xxx
3723 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3724 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
3725 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3727 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3728 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
3729 let Inst{21} = 0b1; // imm6 = 1xxxxx
3731 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3732 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
3735 // 128-bit vector types.
3736 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
3737 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
3738 let Inst{21-19} = 0b001; // imm6 = 001xxx
3740 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
3741 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
3742 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3744 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
3745 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
3746 let Inst{21} = 0b1; // imm6 = 1xxxxx
3748 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
3749 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
3753 // Neon Shift-Insert vector operations,
3754 // with f of either N2RegVShLFrm or N2RegVShRFrm
3755 // element sizes of 8, 16, 32 and 64 bits:
3756 multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3758 // 64-bit vector types.
3759 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3760 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
3761 let Inst{21-19} = 0b001; // imm6 = 001xxx
3763 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3764 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
3765 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3767 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
3768 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
3769 let Inst{21} = 0b1; // imm6 = 1xxxxx
3771 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
3772 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
3775 // 128-bit vector types.
3776 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3777 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
3778 let Inst{21-19} = 0b001; // imm6 = 001xxx
3780 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3781 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
3782 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3784 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
3785 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
3786 let Inst{21} = 0b1; // imm6 = 1xxxxx
3788 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
3789 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
3792 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3794 // 64-bit vector types.
3795 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3796 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
3797 let Inst{21-19} = 0b001; // imm6 = 001xxx
3799 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3800 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
3801 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3803 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3804 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
3805 let Inst{21} = 0b1; // imm6 = 1xxxxx
3807 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3808 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
3811 // 128-bit vector types.
3812 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
3813 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
3814 let Inst{21-19} = 0b001; // imm6 = 001xxx
3816 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
3817 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
3818 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3820 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
3821 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
3822 let Inst{21} = 0b1; // imm6 = 1xxxxx
3824 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
3825 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
3829 // Neon Shift Long operations,
3830 // element sizes of 8, 16, 32 bits:
3831 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3832 bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
3833 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3834 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
3835 let Inst{21-19} = 0b001; // imm6 = 001xxx
3837 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3838 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
3839 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3841 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
3842 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
3843 let Inst{21} = 0b1; // imm6 = 1xxxxx
3847 // Neon Shift Narrow operations,
3848 // element sizes of 16, 32, 64 bits:
3849 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
3850 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
3852 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3853 OpcodeStr, !strconcat(Dt, "16"),
3854 v8i8, v8i16, shr_imm8, OpNode> {
3855 let Inst{21-19} = 0b001; // imm6 = 001xxx
3857 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3858 OpcodeStr, !strconcat(Dt, "32"),
3859 v4i16, v4i32, shr_imm16, OpNode> {
3860 let Inst{21-20} = 0b01; // imm6 = 01xxxx
3862 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
3863 OpcodeStr, !strconcat(Dt, "64"),
3864 v2i32, v2i64, shr_imm32, OpNode> {
3865 let Inst{21} = 0b1; // imm6 = 1xxxxx
3869 //===----------------------------------------------------------------------===//
3870 // Instruction Definitions.
3871 //===----------------------------------------------------------------------===//
3873 // Vector Add Operations.
3875 // VADD : Vector Add (integer and floating-point)
3876 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
3878 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
3879 v2f32, v2f32, fadd, 1>;
3880 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
3881 v4f32, v4f32, fadd, 1>;
3882 // VADDL : Vector Add Long (Q = D + D)
3883 defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3884 "vaddl", "s", add, sext, 1>;
3885 defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
3886 "vaddl", "u", add, zext, 1>;
3887 // VADDW : Vector Add Wide (Q = Q + D)
3888 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
3889 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
3890 // VHADD : Vector Halving Add
3891 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
3892 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3893 "vhadd", "s", int_arm_neon_vhadds, 1>;
3894 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
3895 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3896 "vhadd", "u", int_arm_neon_vhaddu, 1>;
3897 // VRHADD : Vector Rounding Halving Add
3898 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
3899 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3900 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
3901 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
3902 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3903 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
3904 // VQADD : Vector Saturating Add
3905 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
3906 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3907 "vqadd", "s", int_arm_neon_vqadds, 1>;
3908 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
3909 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
3910 "vqadd", "u", int_arm_neon_vqaddu, 1>;
3911 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
3912 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
3913 int_arm_neon_vaddhn, 1>;
3914 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
3915 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
3916 int_arm_neon_vraddhn, 1>;
3918 // Vector Multiply Operations.
3920 // VMUL : Vector Multiply (integer, polynomial and floating-point)
3921 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
3922 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
3923 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
3924 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
3925 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
3926 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
3927 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
3928 v2f32, v2f32, fmul, 1>;
3929 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
3930 v4f32, v4f32, fmul, 1>;
3931 defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
3932 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
3933 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
3936 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
3937 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
3938 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
3939 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3940 (DSubReg_i16_reg imm:$lane))),
3941 (SubReg_i16_lane imm:$lane)))>;
3942 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
3943 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
3944 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
3945 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3946 (DSubReg_i32_reg imm:$lane))),
3947 (SubReg_i32_lane imm:$lane)))>;
3948 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
3949 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
3950 (v4f32 (VMULslfq (v4f32 QPR:$src1),
3951 (v2f32 (EXTRACT_SUBREG QPR:$src2,
3952 (DSubReg_i32_reg imm:$lane))),
3953 (SubReg_i32_lane imm:$lane)))>;
3955 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
3956 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
3957 IIC_VMULi16Q, IIC_VMULi32Q,
3958 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
3959 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
3960 IIC_VMULi16Q, IIC_VMULi32Q,
3961 "vqdmulh", "s", int_arm_neon_vqdmulh>;
3962 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
3963 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3965 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
3966 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3967 (DSubReg_i16_reg imm:$lane))),
3968 (SubReg_i16_lane imm:$lane)))>;
3969 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
3970 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3972 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
3973 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3974 (DSubReg_i32_reg imm:$lane))),
3975 (SubReg_i32_lane imm:$lane)))>;
3977 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
3978 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
3979 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
3980 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
3981 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
3982 IIC_VMULi16Q, IIC_VMULi32Q,
3983 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
3984 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
3985 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
3987 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
3988 (v4i16 (EXTRACT_SUBREG QPR:$src2,
3989 (DSubReg_i16_reg imm:$lane))),
3990 (SubReg_i16_lane imm:$lane)))>;
3991 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
3992 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
3994 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
3995 (v2i32 (EXTRACT_SUBREG QPR:$src2,
3996 (DSubReg_i32_reg imm:$lane))),
3997 (SubReg_i32_lane imm:$lane)))>;
3999 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
4000 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4001 "vmull", "s", NEONvmulls, 1>;
4002 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4003 "vmull", "u", NEONvmullu, 1>;
4004 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4005 v8i16, v8i8, int_arm_neon_vmullp, 1>;
4006 defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
4007 defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
4009 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
4010 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4011 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4012 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4013 "vqdmull", "s", int_arm_neon_vqdmull>;
4015 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
4017 // VMLA : Vector Multiply Accumulate (integer and floating-point)
4018 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4019 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4020 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4021 v2f32, fmul_su, fadd_mlx>,
4022 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4023 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4024 v4f32, fmul_su, fadd_mlx>,
4025 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4026 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4027 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4028 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4029 v2f32, fmul_su, fadd_mlx>,
4030 Requires<[HasNEON, UseFPVMLx]>;
4031 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4032 v4f32, v2f32, fmul_su, fadd_mlx>,
4033 Requires<[HasNEON, UseFPVMLx]>;
4035 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4036 (mul (v8i16 QPR:$src2),
4037 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4038 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4039 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4040 (DSubReg_i16_reg imm:$lane))),
4041 (SubReg_i16_lane imm:$lane)))>;
4043 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4044 (mul (v4i32 QPR:$src2),
4045 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4046 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4047 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4048 (DSubReg_i32_reg imm:$lane))),
4049 (SubReg_i32_lane imm:$lane)))>;
4051 def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4052 (fmul_su (v4f32 QPR:$src2),
4053 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4054 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4056 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4057 (DSubReg_i32_reg imm:$lane))),
4058 (SubReg_i32_lane imm:$lane)))>,
4059 Requires<[HasNEON, UseFPVMLx]>;
4061 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
4062 defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4063 "vmlal", "s", NEONvmulls, add>;
4064 defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4065 "vmlal", "u", NEONvmullu, add>;
4067 defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
4068 defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
4070 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4071 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4072 "vqdmlal", "s", int_arm_neon_vqdmlal>;
4073 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
4075 // VMLS : Vector Multiply Subtract (integer and floating-point)
4076 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4077 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4078 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4079 v2f32, fmul_su, fsub_mlx>,
4080 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4081 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4082 v4f32, fmul_su, fsub_mlx>,
4083 Requires<[HasNEON, UseFPVMLx, NoNEON2]>;
4084 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4085 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4086 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4087 v2f32, fmul_su, fsub_mlx>,
4088 Requires<[HasNEON, UseFPVMLx]>;
4089 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4090 v4f32, v2f32, fmul_su, fsub_mlx>,
4091 Requires<[HasNEON, UseFPVMLx]>;
4093 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4094 (mul (v8i16 QPR:$src2),
4095 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
4096 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4097 (v4i16 (EXTRACT_SUBREG QPR:$src3,
4098 (DSubReg_i16_reg imm:$lane))),
4099 (SubReg_i16_lane imm:$lane)))>;
4101 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4102 (mul (v4i32 QPR:$src2),
4103 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
4104 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4105 (v2i32 (EXTRACT_SUBREG QPR:$src3,
4106 (DSubReg_i32_reg imm:$lane))),
4107 (SubReg_i32_lane imm:$lane)))>;
4109 def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4110 (fmul_su (v4f32 QPR:$src2),
4111 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
4112 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4113 (v2f32 (EXTRACT_SUBREG QPR:$src3,
4114 (DSubReg_i32_reg imm:$lane))),
4115 (SubReg_i32_lane imm:$lane)))>,
4116 Requires<[HasNEON, UseFPVMLx]>;
4118 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
4119 defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4120 "vmlsl", "s", NEONvmulls, sub>;
4121 defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4122 "vmlsl", "u", NEONvmullu, sub>;
4124 defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
4125 defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
4127 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4128 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4129 "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
4130 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
4133 // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4134 def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4135 v2f32, fmul_su, fadd_mlx>,
4136 Requires<[HasNEON2,FPContractions]>;
4138 def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4139 v4f32, fmul_su, fadd_mlx>,
4140 Requires<[HasNEON2,FPContractions]>;
4142 // Fused Vector Multiply Subtract (floating-point)
4143 def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4144 v2f32, fmul_su, fsub_mlx>,
4145 Requires<[HasNEON2,FPContractions]>;
4146 def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4147 v4f32, fmul_su, fsub_mlx>,
4148 Requires<[HasNEON2,FPContractions]>;
4150 // Vector Subtract Operations.
4152 // VSUB : Vector Subtract (integer and floating-point)
4153 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
4154 "vsub", "i", sub, 0>;
4155 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
4156 v2f32, v2f32, fsub, 0>;
4157 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
4158 v4f32, v4f32, fsub, 0>;
4159 // VSUBL : Vector Subtract Long (Q = D - D)
4160 defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4161 "vsubl", "s", sub, sext, 0>;
4162 defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
4163 "vsubl", "u", sub, zext, 0>;
4164 // VSUBW : Vector Subtract Wide (Q = Q - D)
4165 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
4166 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
4167 // VHSUB : Vector Halving Subtract
4168 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
4169 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4170 "vhsub", "s", int_arm_neon_vhsubs, 0>;
4171 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
4172 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4173 "vhsub", "u", int_arm_neon_vhsubu, 0>;
4174 // VQSUB : Vector Saturing Subtract
4175 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
4176 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4177 "vqsub", "s", int_arm_neon_vqsubs, 0>;
4178 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
4179 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4180 "vqsub", "u", int_arm_neon_vqsubu, 0>;
4181 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
4182 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
4183 int_arm_neon_vsubhn, 0>;
4184 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
4185 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
4186 int_arm_neon_vrsubhn, 0>;
4188 // Vector Comparisons.
4190 // VCEQ : Vector Compare Equal
4191 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4192 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
4193 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
4195 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
4198 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
4199 "$Vd, $Vm, #0", NEONvceqz>;
4201 // VCGE : Vector Compare Greater Than or Equal
4202 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4203 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
4204 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4205 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
4206 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
4208 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
4211 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
4212 "$Vd, $Vm, #0", NEONvcgez>;
4213 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
4214 "$Vd, $Vm, #0", NEONvclez>;
4216 // VCGT : Vector Compare Greater Than
4217 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4218 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
4219 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
4220 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
4221 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
4223 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
4226 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
4227 "$Vd, $Vm, #0", NEONvcgtz>;
4228 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
4229 "$Vd, $Vm, #0", NEONvcltz>;
4231 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
4232 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
4233 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
4234 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
4235 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
4236 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
4237 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
4238 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
4239 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
4240 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
4241 // VTST : Vector Test Bits
4242 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
4243 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
4245 // Vector Bitwise Operations.
4247 def vnotd : PatFrag<(ops node:$in),
4248 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
4249 def vnotq : PatFrag<(ops node:$in),
4250 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
4253 // VAND : Vector Bitwise AND
4254 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
4255 v2i32, v2i32, and, 1>;
4256 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
4257 v4i32, v4i32, and, 1>;
4259 // VEOR : Vector Bitwise Exclusive OR
4260 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
4261 v2i32, v2i32, xor, 1>;
4262 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
4263 v4i32, v4i32, xor, 1>;
4265 // VORR : Vector Bitwise OR
4266 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
4267 v2i32, v2i32, or, 1>;
4268 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
4269 v4i32, v4i32, or, 1>;
4271 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
4272 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4274 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4276 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4277 let Inst{9} = SIMM{9};
4280 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
4281 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4283 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4285 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
4286 let Inst{10-9} = SIMM{10-9};
4289 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
4290 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4292 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
4294 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4295 let Inst{9} = SIMM{9};
4298 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
4299 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4301 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
4303 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
4304 let Inst{10-9} = SIMM{10-9};
4308 // VBIC : Vector Bitwise Bit Clear (AND NOT)
4309 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4310 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4311 "vbic", "$Vd, $Vn, $Vm", "",
4312 [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
4313 (vnotd DPR:$Vm))))]>;
4314 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4315 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4316 "vbic", "$Vd, $Vn, $Vm", "",
4317 [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
4318 (vnotq QPR:$Vm))))]>;
4320 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
4321 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
4323 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4325 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4326 let Inst{9} = SIMM{9};
4329 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
4330 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
4332 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4334 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
4335 let Inst{10-9} = SIMM{10-9};
4338 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
4339 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
4341 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
4343 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4344 let Inst{9} = SIMM{9};
4347 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
4348 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
4350 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
4352 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
4353 let Inst{10-9} = SIMM{10-9};
4356 // VORN : Vector Bitwise OR NOT
4357 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
4358 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
4359 "vorn", "$Vd, $Vn, $Vm", "",
4360 [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
4361 (vnotd DPR:$Vm))))]>;
4362 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
4363 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
4364 "vorn", "$Vd, $Vn, $Vm", "",
4365 [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
4366 (vnotq QPR:$Vm))))]>;
4368 // VMVN : Vector Bitwise NOT (Immediate)
4370 let isReMaterializable = 1 in {
4372 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
4373 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4374 "vmvn", "i16", "$Vd, $SIMM", "",
4375 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
4376 let Inst{9} = SIMM{9};
4379 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
4380 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4381 "vmvn", "i16", "$Vd, $SIMM", "",
4382 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
4383 let Inst{9} = SIMM{9};
4386 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
4387 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4388 "vmvn", "i32", "$Vd, $SIMM", "",
4389 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
4390 let Inst{11-8} = SIMM{11-8};
4393 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
4394 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4395 "vmvn", "i32", "$Vd, $SIMM", "",
4396 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
4397 let Inst{11-8} = SIMM{11-8};
4401 // VMVN : Vector Bitwise NOT
4402 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
4403 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
4404 "vmvn", "$Vd, $Vm", "",
4405 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
4406 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
4407 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
4408 "vmvn", "$Vd, $Vm", "",
4409 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
4410 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
4411 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
4413 // VBSL : Vector Bitwise Select
4414 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
4415 (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4416 N3RegFrm, IIC_VCNTiD,
4417 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4419 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
4421 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
4422 (and DPR:$Vm, (vnotd DPR:$Vd)))),
4423 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
4425 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
4426 (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4427 N3RegFrm, IIC_VCNTiQ,
4428 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4430 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
4432 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
4433 (and QPR:$Vm, (vnotq QPR:$Vd)))),
4434 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
4436 // VBIF : Vector Bitwise Insert if False
4437 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
4438 // FIXME: This instruction's encoding MAY NOT BE correct.
4439 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
4440 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4441 N3RegFrm, IIC_VBINiD,
4442 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4444 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
4445 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4446 N3RegFrm, IIC_VBINiQ,
4447 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4450 // VBIT : Vector Bitwise Insert if True
4451 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
4452 // FIXME: This instruction's encoding MAY NOT BE correct.
4453 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
4454 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
4455 N3RegFrm, IIC_VBINiD,
4456 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4458 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
4459 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
4460 N3RegFrm, IIC_VBINiQ,
4461 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
4464 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
4465 // for equivalent operations with different register constraints; it just
4468 // Vector Absolute Differences.
4470 // VABD : Vector Absolute Difference
4471 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
4472 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4473 "vabd", "s", int_arm_neon_vabds, 1>;
4474 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
4475 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4476 "vabd", "u", int_arm_neon_vabdu, 1>;
4477 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
4478 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
4479 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
4480 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
4482 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
4483 defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
4484 "vabdl", "s", int_arm_neon_vabds, zext, 1>;
4485 defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
4486 "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
4488 // VABA : Vector Absolute Difference and Accumulate
4489 defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4490 "vaba", "s", int_arm_neon_vabds, add>;
4491 defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
4492 "vaba", "u", int_arm_neon_vabdu, add>;
4494 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
4495 defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
4496 "vabal", "s", int_arm_neon_vabds, zext, add>;
4497 defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
4498 "vabal", "u", int_arm_neon_vabdu, zext, add>;
4500 // Vector Maximum and Minimum.
4502 // VMAX : Vector Maximum
4503 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
4504 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4505 "vmax", "s", int_arm_neon_vmaxs, 1>;
4506 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
4507 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4508 "vmax", "u", int_arm_neon_vmaxu, 1>;
4509 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
4511 v2f32, v2f32, int_arm_neon_vmaxs, 1>;
4512 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4514 v4f32, v4f32, int_arm_neon_vmaxs, 1>;
4516 // VMIN : Vector Minimum
4517 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
4518 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4519 "vmin", "s", int_arm_neon_vmins, 1>;
4520 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
4521 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
4522 "vmin", "u", int_arm_neon_vminu, 1>;
4523 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
4525 v2f32, v2f32, int_arm_neon_vmins, 1>;
4526 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
4528 v4f32, v4f32, int_arm_neon_vmins, 1>;
4530 // Vector Pairwise Operations.
4532 // VPADD : Vector Pairwise Add
4533 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4535 v8i8, v8i8, int_arm_neon_vpadd, 0>;
4536 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4538 v4i16, v4i16, int_arm_neon_vpadd, 0>;
4539 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
4541 v2i32, v2i32, int_arm_neon_vpadd, 0>;
4542 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
4543 IIC_VPBIND, "vpadd", "f32",
4544 v2f32, v2f32, int_arm_neon_vpadd, 0>;
4546 // VPADDL : Vector Pairwise Add Long
4547 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
4548 int_arm_neon_vpaddls>;
4549 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
4550 int_arm_neon_vpaddlu>;
4552 // VPADAL : Vector Pairwise Add and Accumulate Long
4553 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
4554 int_arm_neon_vpadals>;
4555 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
4556 int_arm_neon_vpadalu>;
4558 // VPMAX : Vector Pairwise Maximum
4559 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4560 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
4561 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4562 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
4563 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4564 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
4565 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4566 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
4567 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4568 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
4569 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
4570 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
4571 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
4572 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
4574 // VPMIN : Vector Pairwise Minimum
4575 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4576 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
4577 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4578 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
4579 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4580 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
4581 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4582 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
4583 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4584 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
4585 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
4586 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
4587 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
4588 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
4590 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
4592 // VRECPE : Vector Reciprocal Estimate
4593 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4594 IIC_VUNAD, "vrecpe", "u32",
4595 v2i32, v2i32, int_arm_neon_vrecpe>;
4596 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
4597 IIC_VUNAQ, "vrecpe", "u32",
4598 v4i32, v4i32, int_arm_neon_vrecpe>;
4599 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4600 IIC_VUNAD, "vrecpe", "f32",
4601 v2f32, v2f32, int_arm_neon_vrecpe>;
4602 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
4603 IIC_VUNAQ, "vrecpe", "f32",
4604 v4f32, v4f32, int_arm_neon_vrecpe>;
4606 // VRECPS : Vector Reciprocal Step
4607 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4608 IIC_VRECSD, "vrecps", "f32",
4609 v2f32, v2f32, int_arm_neon_vrecps, 1>;
4610 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
4611 IIC_VRECSQ, "vrecps", "f32",
4612 v4f32, v4f32, int_arm_neon_vrecps, 1>;
4614 // VRSQRTE : Vector Reciprocal Square Root Estimate
4615 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4616 IIC_VUNAD, "vrsqrte", "u32",
4617 v2i32, v2i32, int_arm_neon_vrsqrte>;
4618 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
4619 IIC_VUNAQ, "vrsqrte", "u32",
4620 v4i32, v4i32, int_arm_neon_vrsqrte>;
4621 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4622 IIC_VUNAD, "vrsqrte", "f32",
4623 v2f32, v2f32, int_arm_neon_vrsqrte>;
4624 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
4625 IIC_VUNAQ, "vrsqrte", "f32",
4626 v4f32, v4f32, int_arm_neon_vrsqrte>;
4628 // VRSQRTS : Vector Reciprocal Square Root Step
4629 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4630 IIC_VRECSD, "vrsqrts", "f32",
4631 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
4632 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
4633 IIC_VRECSQ, "vrsqrts", "f32",
4634 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
4638 // VSHL : Vector Shift
4639 defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
4640 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4641 "vshl", "s", int_arm_neon_vshifts>;
4642 defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
4643 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
4644 "vshl", "u", int_arm_neon_vshiftu>;
4646 // VSHL : Vector Shift Left (Immediate)
4647 defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
4649 // VSHR : Vector Shift Right (Immediate)
4650 defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
4651 defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
4653 // VSHLL : Vector Shift Left Long
4654 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
4655 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
4657 // VSHLL : Vector Shift Left Long (with maximum shift count)
4658 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
4659 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
4660 ValueType OpTy, Operand ImmTy, SDNode OpNode>
4661 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
4662 ResTy, OpTy, ImmTy, OpNode> {
4663 let Inst{21-16} = op21_16;
4664 let DecoderMethod = "DecodeVSHLMaxInstruction";
4666 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
4667 v8i16, v8i8, imm8, NEONvshlli>;
4668 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
4669 v4i32, v4i16, imm16, NEONvshlli>;
4670 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
4671 v2i64, v2i32, imm32, NEONvshlli>;
4673 // VSHRN : Vector Shift Right and Narrow
4674 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
4677 // VRSHL : Vector Rounding Shift
4678 defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
4679 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4680 "vrshl", "s", int_arm_neon_vrshifts>;
4681 defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
4682 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4683 "vrshl", "u", int_arm_neon_vrshiftu>;
4684 // VRSHR : Vector Rounding Shift Right
4685 defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
4686 defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
4688 // VRSHRN : Vector Rounding Shift Right and Narrow
4689 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
4692 // VQSHL : Vector Saturating Shift
4693 defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
4694 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4695 "vqshl", "s", int_arm_neon_vqshifts>;
4696 defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
4697 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4698 "vqshl", "u", int_arm_neon_vqshiftu>;
4699 // VQSHL : Vector Saturating Shift Left (Immediate)
4700 defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
4701 defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
4703 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
4704 defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
4706 // VQSHRN : Vector Saturating Shift Right and Narrow
4707 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
4709 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
4712 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
4713 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
4716 // VQRSHL : Vector Saturating Rounding Shift
4717 defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
4718 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4719 "vqrshl", "s", int_arm_neon_vqrshifts>;
4720 defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
4721 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
4722 "vqrshl", "u", int_arm_neon_vqrshiftu>;
4724 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
4725 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
4727 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
4730 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
4731 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
4734 // VSRA : Vector Shift Right and Accumulate
4735 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
4736 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
4737 // VRSRA : Vector Rounding Shift Right and Accumulate
4738 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
4739 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
4741 // VSLI : Vector Shift Left and Insert
4742 defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
4744 // VSRI : Vector Shift Right and Insert
4745 defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
4747 // Vector Absolute and Saturating Absolute.
4749 // VABS : Vector Absolute Value
4750 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
4751 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
4753 def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4754 IIC_VUNAD, "vabs", "f32",
4755 v2f32, v2f32, int_arm_neon_vabs>;
4756 def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
4757 IIC_VUNAQ, "vabs", "f32",
4758 v4f32, v4f32, int_arm_neon_vabs>;
4760 // VQABS : Vector Saturating Absolute Value
4761 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
4762 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
4763 int_arm_neon_vqabs>;
4767 def vnegd : PatFrag<(ops node:$in),
4768 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
4769 def vnegq : PatFrag<(ops node:$in),
4770 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
4772 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4773 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
4774 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
4775 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
4776 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
4777 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
4778 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
4779 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
4781 // VNEG : Vector Negate (integer)
4782 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
4783 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
4784 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
4785 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
4786 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
4787 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
4789 // VNEG : Vector Negate (floating-point)
4790 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
4791 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
4792 "vneg", "f32", "$Vd, $Vm", "",
4793 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
4794 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
4795 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
4796 "vneg", "f32", "$Vd, $Vm", "",
4797 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
4799 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
4800 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
4801 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
4802 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
4803 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
4804 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
4806 // VQNEG : Vector Saturating Negate
4807 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
4808 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
4809 int_arm_neon_vqneg>;
4811 // Vector Bit Counting Operations.
4813 // VCLS : Vector Count Leading Sign Bits
4814 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
4815 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
4817 // VCLZ : Vector Count Leading Zeros
4818 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
4819 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
4821 // VCNT : Vector Count One Bits
4822 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4823 IIC_VCNTiD, "vcnt", "8",
4824 v8i8, v8i8, int_arm_neon_vcnt>;
4825 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
4826 IIC_VCNTiQ, "vcnt", "8",
4827 v16i8, v16i8, int_arm_neon_vcnt>;
4830 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
4831 (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1),
4832 NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1",
4834 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
4835 (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1),
4836 NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1",
4839 // Vector Move Operations.
4841 // VMOV : Vector Move (Register)
4842 def : InstAlias<"vmov${p} $Vd, $Vm",
4843 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
4844 def : InstAlias<"vmov${p} $Vd, $Vm",
4845 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
4847 // VMOV : Vector Move (Immediate)
4849 let isReMaterializable = 1 in {
4850 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
4851 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4852 "vmov", "i8", "$Vd, $SIMM", "",
4853 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
4854 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
4855 (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
4856 "vmov", "i8", "$Vd, $SIMM", "",
4857 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
4859 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
4860 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4861 "vmov", "i16", "$Vd, $SIMM", "",
4862 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
4863 let Inst{9} = SIMM{9};
4866 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
4867 (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
4868 "vmov", "i16", "$Vd, $SIMM", "",
4869 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
4870 let Inst{9} = SIMM{9};
4873 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
4874 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4875 "vmov", "i32", "$Vd, $SIMM", "",
4876 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
4877 let Inst{11-8} = SIMM{11-8};
4880 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
4881 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
4882 "vmov", "i32", "$Vd, $SIMM", "",
4883 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
4884 let Inst{11-8} = SIMM{11-8};
4887 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
4888 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4889 "vmov", "i64", "$Vd, $SIMM", "",
4890 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
4891 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
4892 (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
4893 "vmov", "i64", "$Vd, $SIMM", "",
4894 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
4896 def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
4897 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
4898 "vmov", "f32", "$Vd, $SIMM", "",
4899 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
4900 def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
4901 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
4902 "vmov", "f32", "$Vd, $SIMM", "",
4903 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
4904 } // isReMaterializable
4906 // VMOV : Vector Get Lane (move scalar to ARM core register)
4908 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
4909 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4910 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
4911 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
4913 let Inst{21} = lane{2};
4914 let Inst{6-5} = lane{1-0};
4916 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
4917 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4918 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
4919 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
4921 let Inst{21} = lane{1};
4922 let Inst{6} = lane{0};
4924 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
4925 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
4926 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
4927 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
4929 let Inst{21} = lane{2};
4930 let Inst{6-5} = lane{1-0};
4932 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
4933 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
4934 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
4935 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
4937 let Inst{21} = lane{1};
4938 let Inst{6} = lane{0};
4940 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
4941 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
4942 IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
4943 [(set GPR:$R, (extractelt (v2i32 DPR:$V),
4945 let Inst{21} = lane{0};
4947 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
4948 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
4949 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4950 (DSubReg_i8_reg imm:$lane))),
4951 (SubReg_i8_lane imm:$lane))>;
4952 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
4953 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4954 (DSubReg_i16_reg imm:$lane))),
4955 (SubReg_i16_lane imm:$lane))>;
4956 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
4957 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
4958 (DSubReg_i8_reg imm:$lane))),
4959 (SubReg_i8_lane imm:$lane))>;
4960 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
4961 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
4962 (DSubReg_i16_reg imm:$lane))),
4963 (SubReg_i16_lane imm:$lane))>;
4964 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
4965 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
4966 (DSubReg_i32_reg imm:$lane))),
4967 (SubReg_i32_lane imm:$lane))>;
4968 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
4969 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
4970 (SSubReg_f32_reg imm:$src2))>;
4971 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
4972 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
4973 (SSubReg_f32_reg imm:$src2))>;
4974 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
4975 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4976 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
4977 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
4980 // VMOV : Vector Set Lane (move ARM core register to scalar)
4982 let Constraints = "$src1 = $V" in {
4983 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
4984 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
4985 IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
4986 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
4987 GPR:$R, imm:$lane))]> {
4988 let Inst{21} = lane{2};
4989 let Inst{6-5} = lane{1-0};
4991 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
4992 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
4993 IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
4994 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
4995 GPR:$R, imm:$lane))]> {
4996 let Inst{21} = lane{1};
4997 let Inst{6} = lane{0};
4999 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
5000 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
5001 IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
5002 [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
5003 GPR:$R, imm:$lane))]> {
5004 let Inst{21} = lane{0};
5007 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
5008 (v16i8 (INSERT_SUBREG QPR:$src1,
5009 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
5010 (DSubReg_i8_reg imm:$lane))),
5011 GPR:$src2, (SubReg_i8_lane imm:$lane))),
5012 (DSubReg_i8_reg imm:$lane)))>;
5013 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
5014 (v8i16 (INSERT_SUBREG QPR:$src1,
5015 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
5016 (DSubReg_i16_reg imm:$lane))),
5017 GPR:$src2, (SubReg_i16_lane imm:$lane))),
5018 (DSubReg_i16_reg imm:$lane)))>;
5019 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
5020 (v4i32 (INSERT_SUBREG QPR:$src1,
5021 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
5022 (DSubReg_i32_reg imm:$lane))),
5023 GPR:$src2, (SubReg_i32_lane imm:$lane))),
5024 (DSubReg_i32_reg imm:$lane)))>;
5026 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
5027 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
5028 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5029 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
5030 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
5031 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
5033 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5034 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5035 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
5036 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
5038 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
5039 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5040 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
5041 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
5042 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
5043 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
5045 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
5046 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5047 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
5048 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5049 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
5050 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
5052 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
5053 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
5054 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5056 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
5057 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
5058 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5060 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
5061 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
5062 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
5065 // VDUP : Vector Duplicate (from ARM core register to all elements)
5067 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5068 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
5069 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5070 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5071 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
5072 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
5073 IIC_VMOVIS, "vdup", Dt, "$V, $R",
5074 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
5076 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
5077 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
5078 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
5079 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
5080 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
5081 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
5083 def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
5084 def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
5086 // VDUP : Vector Duplicate Lane (from scalar to all elements)
5088 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
5089 ValueType Ty, Operand IdxTy>
5090 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5091 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
5092 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
5094 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
5095 ValueType ResTy, ValueType OpTy, Operand IdxTy>
5096 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
5097 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
5098 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
5099 VectorIndex32:$lane)))]>;
5101 // Inst{19-16} is partially specified depending on the element size.
5103 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
5105 let Inst{19-17} = lane{2-0};
5107 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
5109 let Inst{19-18} = lane{1-0};
5111 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
5113 let Inst{19} = lane{0};
5115 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
5117 let Inst{19-17} = lane{2-0};
5119 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
5121 let Inst{19-18} = lane{1-0};
5123 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
5125 let Inst{19} = lane{0};
5128 def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5129 (VDUPLN32d DPR:$Vm, imm:$lane)>;
5131 def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
5132 (VDUPLN32q DPR:$Vm, imm:$lane)>;
5134 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
5135 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
5136 (DSubReg_i8_reg imm:$lane))),
5137 (SubReg_i8_lane imm:$lane)))>;
5138 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
5139 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
5140 (DSubReg_i16_reg imm:$lane))),
5141 (SubReg_i16_lane imm:$lane)))>;
5142 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
5143 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
5144 (DSubReg_i32_reg imm:$lane))),
5145 (SubReg_i32_lane imm:$lane)))>;
5146 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
5147 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
5148 (DSubReg_i32_reg imm:$lane))),
5149 (SubReg_i32_lane imm:$lane)))>;
5151 def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
5152 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
5153 def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
5154 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
5156 // VMOVN : Vector Narrowing Move
5157 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
5158 "vmovn", "i", trunc>;
5159 // VQMOVN : Vector Saturating Narrowing Move
5160 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
5161 "vqmovn", "s", int_arm_neon_vqmovns>;
5162 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
5163 "vqmovn", "u", int_arm_neon_vqmovnu>;
5164 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
5165 "vqmovun", "s", int_arm_neon_vqmovnsu>;
5166 // VMOVL : Vector Lengthening Move
5167 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
5168 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
5169 def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
5170 def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
5171 def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
5173 // Vector Conversions.
5175 // VCVT : Vector Convert Between Floating-Point and Integers
5176 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5177 v2i32, v2f32, fp_to_sint>;
5178 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5179 v2i32, v2f32, fp_to_uint>;
5180 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5181 v2f32, v2i32, sint_to_fp>;
5182 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5183 v2f32, v2i32, uint_to_fp>;
5185 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
5186 v4i32, v4f32, fp_to_sint>;
5187 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
5188 v4i32, v4f32, fp_to_uint>;
5189 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
5190 v4f32, v4i32, sint_to_fp>;
5191 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
5192 v4f32, v4i32, uint_to_fp>;
5194 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
5195 let DecoderMethod = "DecodeVCVTD" in {
5196 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5197 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
5198 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5199 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
5200 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5201 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
5202 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5203 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
5206 let DecoderMethod = "DecodeVCVTQ" in {
5207 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
5208 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
5209 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
5210 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
5211 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
5212 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
5213 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
5214 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
5217 // VCVT : Vector Convert Between Half-Precision and Single-Precision.
5218 def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
5219 IIC_VUNAQ, "vcvt", "f16.f32",
5220 v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
5221 Requires<[HasNEON, HasFP16]>;
5222 def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
5223 IIC_VUNAQ, "vcvt", "f32.f16",
5224 v4f32, v4i16, int_arm_neon_vcvthf2fp>,
5225 Requires<[HasNEON, HasFP16]>;
5229 // VREV64 : Vector Reverse elements within 64-bit doublewords
5231 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5232 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
5233 (ins DPR:$Vm), IIC_VMOVD,
5234 OpcodeStr, Dt, "$Vd, $Vm", "",
5235 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
5236 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5237 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
5238 (ins QPR:$Vm), IIC_VMOVQ,
5239 OpcodeStr, Dt, "$Vd, $Vm", "",
5240 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
5242 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
5243 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
5244 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
5245 def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
5247 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
5248 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
5249 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
5250 def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
5252 // VREV32 : Vector Reverse elements within 32-bit words
5254 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5255 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
5256 (ins DPR:$Vm), IIC_VMOVD,
5257 OpcodeStr, Dt, "$Vd, $Vm", "",
5258 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
5259 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5260 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
5261 (ins QPR:$Vm), IIC_VMOVQ,
5262 OpcodeStr, Dt, "$Vd, $Vm", "",
5263 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
5265 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
5266 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
5268 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
5269 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
5271 // VREV16 : Vector Reverse elements within 16-bit halfwords
5273 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5274 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
5275 (ins DPR:$Vm), IIC_VMOVD,
5276 OpcodeStr, Dt, "$Vd, $Vm", "",
5277 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
5278 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
5279 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
5280 (ins QPR:$Vm), IIC_VMOVQ,
5281 OpcodeStr, Dt, "$Vd, $Vm", "",
5282 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
5284 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
5285 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
5287 // Other Vector Shuffles.
5289 // Aligned extractions: really just dropping registers
5291 class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
5292 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
5293 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
5295 def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
5297 def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
5299 def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
5301 def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
5303 def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
5306 // VEXT : Vector Extract
5308 class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
5309 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
5310 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
5311 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
5312 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
5313 (Ty DPR:$Vm), imm:$index)))]> {
5315 let Inst{11-8} = index{3-0};
5318 class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
5319 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
5320 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
5321 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
5322 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
5323 (Ty QPR:$Vm), imm:$index)))]> {
5325 let Inst{11-8} = index{3-0};
5328 def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> {
5329 let Inst{11-8} = index{3-0};
5331 def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
5332 let Inst{11-9} = index{2-0};
5335 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
5336 let Inst{11-10} = index{1-0};
5337 let Inst{9-8} = 0b00;
5339 def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
5342 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
5344 def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> {
5345 let Inst{11-8} = index{3-0};
5347 def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
5348 let Inst{11-9} = index{2-0};
5351 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
5352 let Inst{11-10} = index{1-0};
5353 let Inst{9-8} = 0b00;
5355 def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
5356 let Inst{11} = index{0};
5357 let Inst{10-8} = 0b000;
5359 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
5362 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
5364 // VTRN : Vector Transpose
5366 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
5367 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
5368 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
5370 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
5371 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
5372 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
5374 // VUZP : Vector Unzip (Deinterleave)
5376 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
5377 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
5378 def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
5380 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
5381 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
5382 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
5384 // VZIP : Vector Zip (Interleave)
5386 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
5387 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
5388 def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
5390 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
5391 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
5392 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
5394 // Vector Table Lookup and Table Extension.
5396 // VTBL : Vector Table Lookup
5397 let DecoderMethod = "DecodeTBLInstruction" in {
5399 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
5400 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
5401 "vtbl", "8", "$Vd, $Vn, $Vm", "",
5402 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
5403 let hasExtraSrcRegAllocReq = 1 in {
5405 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
5406 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
5407 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5409 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
5410 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
5411 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5413 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
5414 (ins VecListFourD:$Vn, DPR:$Vm),
5416 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
5417 } // hasExtraSrcRegAllocReq = 1
5420 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
5422 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
5424 // VTBX : Vector Table Extension
5426 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
5427 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
5428 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
5429 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
5430 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
5431 let hasExtraSrcRegAllocReq = 1 in {
5433 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
5434 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
5435 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
5437 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
5438 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
5439 NVTBLFrm, IIC_VTBX3,
5440 "vtbx", "8", "$Vd, $Vn, $Vm",
5443 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
5444 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
5445 "vtbx", "8", "$Vd, $Vn, $Vm",
5447 } // hasExtraSrcRegAllocReq = 1
5450 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5451 IIC_VTBX3, "$orig = $dst", []>;
5453 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
5454 IIC_VTBX4, "$orig = $dst", []>;
5455 } // DecoderMethod = "DecodeTBLInstruction"
5457 //===----------------------------------------------------------------------===//
5458 // NEON instructions for single-precision FP math
5459 //===----------------------------------------------------------------------===//
5461 class N2VSPat<SDNode OpNode, NeonI Inst>
5462 : NEONFPPat<(f32 (OpNode SPR:$a)),
5464 (v2f32 (COPY_TO_REGCLASS (Inst
5466 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5467 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
5469 class N3VSPat<SDNode OpNode, NeonI Inst>
5470 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
5472 (v2f32 (COPY_TO_REGCLASS (Inst
5474 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5477 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5478 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5480 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
5481 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
5483 (v2f32 (COPY_TO_REGCLASS (Inst
5485 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5488 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5491 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
5492 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
5494 def : N3VSPat<fadd, VADDfd>;
5495 def : N3VSPat<fsub, VSUBfd>;
5496 def : N3VSPat<fmul, VMULfd>;
5497 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
5498 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
5499 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
5500 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>;
5501 def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
5502 Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
5503 def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
5504 Requires<[HasNEON2, UseNEONForFP,FPContractions]>;
5505 def : N2VSPat<fabs, VABSfd>;
5506 def : N2VSPat<fneg, VNEGfd>;
5507 def : N3VSPat<NEONfmax, VMAXfd>;
5508 def : N3VSPat<NEONfmin, VMINfd>;
5509 def : N2VSPat<arm_ftosi, VCVTf2sd>;
5510 def : N2VSPat<arm_ftoui, VCVTf2ud>;
5511 def : N2VSPat<arm_sitof, VCVTs2fd>;
5512 def : N2VSPat<arm_uitof, VCVTu2fd>;
5514 //===----------------------------------------------------------------------===//
5515 // Non-Instruction Patterns
5516 //===----------------------------------------------------------------------===//
5519 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
5520 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
5521 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
5522 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
5523 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
5524 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
5525 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
5526 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
5527 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
5528 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
5529 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
5530 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
5531 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
5532 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
5533 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
5534 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
5535 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
5536 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
5537 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
5538 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
5539 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
5540 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
5541 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
5542 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
5543 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
5544 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
5545 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
5546 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
5547 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
5548 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
5550 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
5551 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
5552 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
5553 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
5554 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
5555 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
5556 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
5557 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
5558 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
5559 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
5560 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
5561 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
5562 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
5563 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
5564 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
5565 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
5566 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
5567 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
5568 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
5569 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
5570 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
5571 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
5572 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
5573 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
5574 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
5575 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
5576 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
5577 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
5578 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
5579 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
5581 // Vector lengthening move with load, matching extending loads.
5583 // extload, zextload and sextload for a standard lengthening load. Example:
5584 // Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
5585 // (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
5586 multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
5587 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5588 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
5589 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
5590 (VLDRD addrmode5:$addr))>;
5591 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5592 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
5593 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
5594 (VLDRD addrmode5:$addr))>;
5595 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5596 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
5597 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
5598 (VLDRD addrmode5:$addr))>;
5601 // extload, zextload and sextload for a lengthening load which only uses
5602 // half the lanes available. Example:
5603 // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
5604 // Pat<(v4i16 (extloadvi8 addrmode5:$addr))
5605 // (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5606 // (VLDRS addrmode5:$addr),
5609 multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
5610 string InsnLanes, string InsnTy> {
5611 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5612 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
5613 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
5614 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5616 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5617 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
5618 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
5619 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5621 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5622 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
5623 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
5624 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5628 // extload, zextload and sextload for a lengthening load followed by another
5629 // lengthening load, to quadruple the initial length.
5630 // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
5631 // Pat<(v4i32 (extloadvi8 addrmode5:$addr))
5632 // (EXTRACT_SUBREG (VMOVLuv4i32
5633 // (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5634 // (VLDRS addrmode5:$addr),
5638 multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
5639 string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
5640 string Insn2Ty, SubRegIndex RegType> {
5641 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5642 (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
5643 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5644 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5645 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
5648 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5649 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
5650 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
5651 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
5652 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
5655 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
5656 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
5657 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
5658 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
5659 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
5664 defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
5665 defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
5666 defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
5668 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
5669 defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
5670 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
5672 // Double lengthening - v4i8 -> v4i16 -> v4i32
5673 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
5674 // v2i8 -> v2i16 -> v2i32
5675 defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
5676 // v2i16 -> v2i32 -> v2i64
5677 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
5679 // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
5680 def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
5681 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
5682 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5683 dsub_0)), dsub_0))>;
5684 def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
5685 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
5686 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5687 dsub_0)), dsub_0))>;
5688 def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
5689 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
5690 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
5691 dsub_0)), dsub_0))>;
5693 //===----------------------------------------------------------------------===//
5694 // Assembler aliases
5697 def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
5698 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
5699 def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
5700 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
5703 // VADD two-operand aliases.
5704 def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
5705 (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5706 def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
5707 (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5708 def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
5709 (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5710 def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
5711 (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5713 def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
5714 (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5715 def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
5716 (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5717 def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
5718 (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5719 def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
5720 (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5722 def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
5723 (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5724 def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
5725 (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5727 // VSUB two-operand aliases.
5728 def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
5729 (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5730 def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
5731 (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5732 def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
5733 (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5734 def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
5735 (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5737 def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
5738 (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5739 def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
5740 (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5741 def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
5742 (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5743 def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
5744 (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5746 def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
5747 (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5748 def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
5749 (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5751 // VADDW two-operand aliases.
5752 def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
5753 (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5754 def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
5755 (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5756 def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
5757 (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5758 def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
5759 (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5760 def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
5761 (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5762 def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
5763 (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
5765 // VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
5766 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
5767 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5768 defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
5769 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5770 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
5771 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5772 defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
5773 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5774 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
5775 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5776 defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
5777 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5778 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
5779 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
5780 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
5781 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
5782 // ... two-operand aliases
5783 def : NEONInstAlias<"vand${p} $Vdn, $Vm",
5784 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5785 def : NEONInstAlias<"vand${p} $Vdn, $Vm",
5786 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5787 def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
5788 (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5789 def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
5790 (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5791 def : NEONInstAlias<"veor${p} $Vdn, $Vm",
5792 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5793 def : NEONInstAlias<"veor${p} $Vdn, $Vm",
5794 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5795 def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
5796 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5797 def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
5798 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5800 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
5801 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5802 defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
5803 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5804 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
5805 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5806 defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
5807 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5808 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
5809 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5810 defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
5811 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5813 // VMUL two-operand aliases.
5814 def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
5815 (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
5816 def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
5817 (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
5818 def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
5819 (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
5820 def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
5821 (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
5823 def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
5824 (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
5825 def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
5826 (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
5827 def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
5828 (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
5829 def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
5830 (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
5832 def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
5833 (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
5834 def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
5835 (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
5837 def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
5838 (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
5839 VectorIndex16:$lane, pred:$p)>;
5840 def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
5841 (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
5842 VectorIndex16:$lane, pred:$p)>;
5844 def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
5845 (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
5846 VectorIndex32:$lane, pred:$p)>;
5847 def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
5848 (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
5849 VectorIndex32:$lane, pred:$p)>;
5851 def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
5852 (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
5853 VectorIndex32:$lane, pred:$p)>;
5854 def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
5855 (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
5856 VectorIndex32:$lane, pred:$p)>;
5858 // VQADD (register) two-operand aliases.
5859 def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
5860 (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5861 def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
5862 (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5863 def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
5864 (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5865 def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
5866 (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5867 def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
5868 (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5869 def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
5870 (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5871 def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
5872 (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5873 def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
5874 (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5876 def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
5877 (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5878 def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
5879 (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5880 def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
5881 (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5882 def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
5883 (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5884 def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
5885 (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5886 def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
5887 (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5888 def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
5889 (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5890 def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
5891 (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5893 // VSHL (immediate) two-operand aliases.
5894 def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
5895 (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
5896 def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
5897 (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
5898 def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
5899 (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
5900 def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
5901 (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
5903 def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
5904 (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
5905 def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
5906 (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
5907 def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
5908 (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
5909 def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
5910 (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
5912 // VSHL (register) two-operand aliases.
5913 def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
5914 (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5915 def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
5916 (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5917 def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
5918 (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5919 def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
5920 (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5921 def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
5922 (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5923 def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
5924 (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5925 def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
5926 (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5927 def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
5928 (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
5930 def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
5931 (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5932 def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
5933 (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5934 def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
5935 (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5936 def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
5937 (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5938 def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
5939 (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5940 def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
5941 (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5942 def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
5943 (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5944 def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
5945 (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
5947 // VSHL (immediate) two-operand aliases.
5948 def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
5949 (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
5950 def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
5951 (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
5952 def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
5953 (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
5954 def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
5955 (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
5957 def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
5958 (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
5959 def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
5960 (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
5961 def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
5962 (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
5963 def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
5964 (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
5966 def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
5967 (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
5968 def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
5969 (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
5970 def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
5971 (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
5972 def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
5973 (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
5975 def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
5976 (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
5977 def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
5978 (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
5979 def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
5980 (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
5981 def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
5982 (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
5984 // VLD1 single-lane pseudo-instructions. These need special handling for
5985 // the lane index that an InstAlias can't handle, so we use these instead.
5986 def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
5987 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5988 def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
5989 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5990 def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
5991 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5993 def VLD1LNdWB_fixed_Asm_8 :
5994 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
5995 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
5996 def VLD1LNdWB_fixed_Asm_16 :
5997 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
5998 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
5999 def VLD1LNdWB_fixed_Asm_32 :
6000 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
6001 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6002 def VLD1LNdWB_register_Asm_8 :
6003 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
6004 (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
6005 rGPR:$Rm, pred:$p)>;
6006 def VLD1LNdWB_register_Asm_16 :
6007 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
6008 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
6009 rGPR:$Rm, pred:$p)>;
6010 def VLD1LNdWB_register_Asm_32 :
6011 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
6012 (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
6013 rGPR:$Rm, pred:$p)>;
6016 // VST1 single-lane pseudo-instructions. These need special handling for
6017 // the lane index that an InstAlias can't handle, so we use these instead.
6018 def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
6019 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6020 def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
6021 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6022 def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
6023 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6025 def VST1LNdWB_fixed_Asm_8 :
6026 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
6027 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6028 def VST1LNdWB_fixed_Asm_16 :
6029 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
6030 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6031 def VST1LNdWB_fixed_Asm_32 :
6032 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
6033 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6034 def VST1LNdWB_register_Asm_8 :
6035 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
6036 (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
6037 rGPR:$Rm, pred:$p)>;
6038 def VST1LNdWB_register_Asm_16 :
6039 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
6040 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
6041 rGPR:$Rm, pred:$p)>;
6042 def VST1LNdWB_register_Asm_32 :
6043 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
6044 (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
6045 rGPR:$Rm, pred:$p)>;
6047 // VLD2 single-lane pseudo-instructions. These need special handling for
6048 // the lane index that an InstAlias can't handle, so we use these instead.
6049 def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
6050 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6051 def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
6052 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6053 def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
6054 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6055 def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
6056 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6057 def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
6058 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6060 def VLD2LNdWB_fixed_Asm_8 :
6061 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
6062 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6063 def VLD2LNdWB_fixed_Asm_16 :
6064 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
6065 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6066 def VLD2LNdWB_fixed_Asm_32 :
6067 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
6068 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6069 def VLD2LNqWB_fixed_Asm_16 :
6070 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
6071 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6072 def VLD2LNqWB_fixed_Asm_32 :
6073 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
6074 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6075 def VLD2LNdWB_register_Asm_8 :
6076 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
6077 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
6078 rGPR:$Rm, pred:$p)>;
6079 def VLD2LNdWB_register_Asm_16 :
6080 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
6081 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
6082 rGPR:$Rm, pred:$p)>;
6083 def VLD2LNdWB_register_Asm_32 :
6084 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
6085 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
6086 rGPR:$Rm, pred:$p)>;
6087 def VLD2LNqWB_register_Asm_16 :
6088 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
6089 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
6090 rGPR:$Rm, pred:$p)>;
6091 def VLD2LNqWB_register_Asm_32 :
6092 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
6093 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
6094 rGPR:$Rm, pred:$p)>;
6097 // VST2 single-lane pseudo-instructions. These need special handling for
6098 // the lane index that an InstAlias can't handle, so we use these instead.
6099 def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
6100 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6101 def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
6102 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6103 def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
6104 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6105 def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
6106 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6107 def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
6108 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6110 def VST2LNdWB_fixed_Asm_8 :
6111 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
6112 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6113 def VST2LNdWB_fixed_Asm_16 :
6114 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
6115 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6116 def VST2LNdWB_fixed_Asm_32 :
6117 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
6118 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6119 def VST2LNqWB_fixed_Asm_16 :
6120 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
6121 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6122 def VST2LNqWB_fixed_Asm_32 :
6123 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
6124 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6125 def VST2LNdWB_register_Asm_8 :
6126 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
6127 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
6128 rGPR:$Rm, pred:$p)>;
6129 def VST2LNdWB_register_Asm_16 :
6130 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
6131 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
6132 rGPR:$Rm, pred:$p)>;
6133 def VST2LNdWB_register_Asm_32 :
6134 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
6135 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
6136 rGPR:$Rm, pred:$p)>;
6137 def VST2LNqWB_register_Asm_16 :
6138 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
6139 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
6140 rGPR:$Rm, pred:$p)>;
6141 def VST2LNqWB_register_Asm_32 :
6142 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
6143 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
6144 rGPR:$Rm, pred:$p)>;
6146 // VLD3 all-lanes pseudo-instructions. These need special handling for
6147 // the lane index that an InstAlias can't handle, so we use these instead.
6148 def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6149 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6150 def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6151 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6152 def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6153 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6154 def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6155 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6156 def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6157 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6158 def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6159 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6161 def VLD3DUPdWB_fixed_Asm_8 :
6162 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6163 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6164 def VLD3DUPdWB_fixed_Asm_16 :
6165 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6166 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6167 def VLD3DUPdWB_fixed_Asm_32 :
6168 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6169 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6170 def VLD3DUPqWB_fixed_Asm_8 :
6171 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6172 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6173 def VLD3DUPqWB_fixed_Asm_16 :
6174 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6175 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6176 def VLD3DUPqWB_fixed_Asm_32 :
6177 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6178 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6179 def VLD3DUPdWB_register_Asm_8 :
6180 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6181 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6182 rGPR:$Rm, pred:$p)>;
6183 def VLD3DUPdWB_register_Asm_16 :
6184 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6185 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6186 rGPR:$Rm, pred:$p)>;
6187 def VLD3DUPdWB_register_Asm_32 :
6188 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6189 (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
6190 rGPR:$Rm, pred:$p)>;
6191 def VLD3DUPqWB_register_Asm_8 :
6192 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6193 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6194 rGPR:$Rm, pred:$p)>;
6195 def VLD3DUPqWB_register_Asm_16 :
6196 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6197 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6198 rGPR:$Rm, pred:$p)>;
6199 def VLD3DUPqWB_register_Asm_32 :
6200 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6201 (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
6202 rGPR:$Rm, pred:$p)>;
6205 // VLD3 single-lane pseudo-instructions. These need special handling for
6206 // the lane index that an InstAlias can't handle, so we use these instead.
6207 def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6208 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6209 def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6210 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6211 def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6212 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6213 def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6214 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6215 def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6216 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6218 def VLD3LNdWB_fixed_Asm_8 :
6219 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6220 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6221 def VLD3LNdWB_fixed_Asm_16 :
6222 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6223 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6224 def VLD3LNdWB_fixed_Asm_32 :
6225 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6226 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6227 def VLD3LNqWB_fixed_Asm_16 :
6228 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6229 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6230 def VLD3LNqWB_fixed_Asm_32 :
6231 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6232 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6233 def VLD3LNdWB_register_Asm_8 :
6234 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6235 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
6236 rGPR:$Rm, pred:$p)>;
6237 def VLD3LNdWB_register_Asm_16 :
6238 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6239 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
6240 rGPR:$Rm, pred:$p)>;
6241 def VLD3LNdWB_register_Asm_32 :
6242 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6243 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
6244 rGPR:$Rm, pred:$p)>;
6245 def VLD3LNqWB_register_Asm_16 :
6246 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6247 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
6248 rGPR:$Rm, pred:$p)>;
6249 def VLD3LNqWB_register_Asm_32 :
6250 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6251 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
6252 rGPR:$Rm, pred:$p)>;
6254 // VLD3 multiple structure pseudo-instructions. These need special handling for
6255 // the vector operands that the normal instructions don't yet model.
6256 // FIXME: Remove these when the register classes and instructions are updated.
6257 def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6258 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6259 def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6260 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6261 def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6262 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6263 def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
6264 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6265 def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
6266 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6267 def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
6268 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6270 def VLD3dWB_fixed_Asm_8 :
6271 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6272 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6273 def VLD3dWB_fixed_Asm_16 :
6274 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6275 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6276 def VLD3dWB_fixed_Asm_32 :
6277 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6278 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6279 def VLD3qWB_fixed_Asm_8 :
6280 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
6281 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6282 def VLD3qWB_fixed_Asm_16 :
6283 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
6284 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6285 def VLD3qWB_fixed_Asm_32 :
6286 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
6287 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6288 def VLD3dWB_register_Asm_8 :
6289 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6290 (ins VecListThreeD:$list, addrmode6:$addr,
6291 rGPR:$Rm, pred:$p)>;
6292 def VLD3dWB_register_Asm_16 :
6293 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6294 (ins VecListThreeD:$list, addrmode6:$addr,
6295 rGPR:$Rm, pred:$p)>;
6296 def VLD3dWB_register_Asm_32 :
6297 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6298 (ins VecListThreeD:$list, addrmode6:$addr,
6299 rGPR:$Rm, pred:$p)>;
6300 def VLD3qWB_register_Asm_8 :
6301 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
6302 (ins VecListThreeQ:$list, addrmode6:$addr,
6303 rGPR:$Rm, pred:$p)>;
6304 def VLD3qWB_register_Asm_16 :
6305 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
6306 (ins VecListThreeQ:$list, addrmode6:$addr,
6307 rGPR:$Rm, pred:$p)>;
6308 def VLD3qWB_register_Asm_32 :
6309 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
6310 (ins VecListThreeQ:$list, addrmode6:$addr,
6311 rGPR:$Rm, pred:$p)>;
6313 // VST3 single-lane pseudo-instructions. These need special handling for
6314 // the lane index that an InstAlias can't handle, so we use these instead.
6315 def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6316 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6317 def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6318 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6319 def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6320 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6321 def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6322 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6323 def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6324 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6326 def VST3LNdWB_fixed_Asm_8 :
6327 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6328 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6329 def VST3LNdWB_fixed_Asm_16 :
6330 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6331 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6332 def VST3LNdWB_fixed_Asm_32 :
6333 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6334 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6335 def VST3LNqWB_fixed_Asm_16 :
6336 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6337 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6338 def VST3LNqWB_fixed_Asm_32 :
6339 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6340 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6341 def VST3LNdWB_register_Asm_8 :
6342 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6343 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
6344 rGPR:$Rm, pred:$p)>;
6345 def VST3LNdWB_register_Asm_16 :
6346 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6347 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
6348 rGPR:$Rm, pred:$p)>;
6349 def VST3LNdWB_register_Asm_32 :
6350 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6351 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
6352 rGPR:$Rm, pred:$p)>;
6353 def VST3LNqWB_register_Asm_16 :
6354 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6355 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
6356 rGPR:$Rm, pred:$p)>;
6357 def VST3LNqWB_register_Asm_32 :
6358 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6359 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
6360 rGPR:$Rm, pred:$p)>;
6363 // VST3 multiple structure pseudo-instructions. These need special handling for
6364 // the vector operands that the normal instructions don't yet model.
6365 // FIXME: Remove these when the register classes and instructions are updated.
6366 def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6367 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6368 def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6369 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6370 def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6371 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6372 def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
6373 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6374 def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
6375 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6376 def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
6377 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6379 def VST3dWB_fixed_Asm_8 :
6380 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6381 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6382 def VST3dWB_fixed_Asm_16 :
6383 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6384 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6385 def VST3dWB_fixed_Asm_32 :
6386 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6387 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
6388 def VST3qWB_fixed_Asm_8 :
6389 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
6390 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6391 def VST3qWB_fixed_Asm_16 :
6392 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
6393 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6394 def VST3qWB_fixed_Asm_32 :
6395 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
6396 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
6397 def VST3dWB_register_Asm_8 :
6398 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6399 (ins VecListThreeD:$list, addrmode6:$addr,
6400 rGPR:$Rm, pred:$p)>;
6401 def VST3dWB_register_Asm_16 :
6402 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6403 (ins VecListThreeD:$list, addrmode6:$addr,
6404 rGPR:$Rm, pred:$p)>;
6405 def VST3dWB_register_Asm_32 :
6406 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6407 (ins VecListThreeD:$list, addrmode6:$addr,
6408 rGPR:$Rm, pred:$p)>;
6409 def VST3qWB_register_Asm_8 :
6410 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
6411 (ins VecListThreeQ:$list, addrmode6:$addr,
6412 rGPR:$Rm, pred:$p)>;
6413 def VST3qWB_register_Asm_16 :
6414 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
6415 (ins VecListThreeQ:$list, addrmode6:$addr,
6416 rGPR:$Rm, pred:$p)>;
6417 def VST3qWB_register_Asm_32 :
6418 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
6419 (ins VecListThreeQ:$list, addrmode6:$addr,
6420 rGPR:$Rm, pred:$p)>;
6422 // VLD4 all-lanes pseudo-instructions. These need special handling for
6423 // the lane index that an InstAlias can't handle, so we use these instead.
6424 def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6425 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6426 def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6427 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6428 def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6429 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6430 def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6431 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6432 def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6433 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6434 def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6435 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6437 def VLD4DUPdWB_fixed_Asm_8 :
6438 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6439 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6440 def VLD4DUPdWB_fixed_Asm_16 :
6441 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6442 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6443 def VLD4DUPdWB_fixed_Asm_32 :
6444 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6445 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
6446 def VLD4DUPqWB_fixed_Asm_8 :
6447 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6448 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6449 def VLD4DUPqWB_fixed_Asm_16 :
6450 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6451 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6452 def VLD4DUPqWB_fixed_Asm_32 :
6453 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6454 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
6455 def VLD4DUPdWB_register_Asm_8 :
6456 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6457 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6458 rGPR:$Rm, pred:$p)>;
6459 def VLD4DUPdWB_register_Asm_16 :
6460 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6461 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6462 rGPR:$Rm, pred:$p)>;
6463 def VLD4DUPdWB_register_Asm_32 :
6464 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6465 (ins VecListFourDAllLanes:$list, addrmode6:$addr,
6466 rGPR:$Rm, pred:$p)>;
6467 def VLD4DUPqWB_register_Asm_8 :
6468 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6469 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6470 rGPR:$Rm, pred:$p)>;
6471 def VLD4DUPqWB_register_Asm_16 :
6472 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6473 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6474 rGPR:$Rm, pred:$p)>;
6475 def VLD4DUPqWB_register_Asm_32 :
6476 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6477 (ins VecListFourQAllLanes:$list, addrmode6:$addr,
6478 rGPR:$Rm, pred:$p)>;
6481 // VLD4 single-lane pseudo-instructions. These need special handling for
6482 // the lane index that an InstAlias can't handle, so we use these instead.
6483 def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6484 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6485 def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6486 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6487 def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6488 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6489 def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6490 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6491 def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6492 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6494 def VLD4LNdWB_fixed_Asm_8 :
6495 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6496 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6497 def VLD4LNdWB_fixed_Asm_16 :
6498 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6499 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6500 def VLD4LNdWB_fixed_Asm_32 :
6501 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6502 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6503 def VLD4LNqWB_fixed_Asm_16 :
6504 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6505 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6506 def VLD4LNqWB_fixed_Asm_32 :
6507 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6508 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6509 def VLD4LNdWB_register_Asm_8 :
6510 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6511 (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
6512 rGPR:$Rm, pred:$p)>;
6513 def VLD4LNdWB_register_Asm_16 :
6514 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6515 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
6516 rGPR:$Rm, pred:$p)>;
6517 def VLD4LNdWB_register_Asm_32 :
6518 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6519 (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
6520 rGPR:$Rm, pred:$p)>;
6521 def VLD4LNqWB_register_Asm_16 :
6522 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6523 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
6524 rGPR:$Rm, pred:$p)>;
6525 def VLD4LNqWB_register_Asm_32 :
6526 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6527 (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
6528 rGPR:$Rm, pred:$p)>;
6532 // VLD4 multiple structure pseudo-instructions. These need special handling for
6533 // the vector operands that the normal instructions don't yet model.
6534 // FIXME: Remove these when the register classes and instructions are updated.
6535 def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6536 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6537 def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6538 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6539 def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6540 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6541 def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
6542 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6543 def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
6544 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6545 def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
6546 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6548 def VLD4dWB_fixed_Asm_8 :
6549 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6550 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6551 def VLD4dWB_fixed_Asm_16 :
6552 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6553 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6554 def VLD4dWB_fixed_Asm_32 :
6555 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6556 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6557 def VLD4qWB_fixed_Asm_8 :
6558 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
6559 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6560 def VLD4qWB_fixed_Asm_16 :
6561 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
6562 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6563 def VLD4qWB_fixed_Asm_32 :
6564 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
6565 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6566 def VLD4dWB_register_Asm_8 :
6567 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6568 (ins VecListFourD:$list, addrmode6:$addr,
6569 rGPR:$Rm, pred:$p)>;
6570 def VLD4dWB_register_Asm_16 :
6571 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6572 (ins VecListFourD:$list, addrmode6:$addr,
6573 rGPR:$Rm, pred:$p)>;
6574 def VLD4dWB_register_Asm_32 :
6575 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6576 (ins VecListFourD:$list, addrmode6:$addr,
6577 rGPR:$Rm, pred:$p)>;
6578 def VLD4qWB_register_Asm_8 :
6579 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
6580 (ins VecListFourQ:$list, addrmode6:$addr,
6581 rGPR:$Rm, pred:$p)>;
6582 def VLD4qWB_register_Asm_16 :
6583 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
6584 (ins VecListFourQ:$list, addrmode6:$addr,
6585 rGPR:$Rm, pred:$p)>;
6586 def VLD4qWB_register_Asm_32 :
6587 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
6588 (ins VecListFourQ:$list, addrmode6:$addr,
6589 rGPR:$Rm, pred:$p)>;
6591 // VST4 single-lane pseudo-instructions. These need special handling for
6592 // the lane index that an InstAlias can't handle, so we use these instead.
6593 def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6594 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6595 def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6596 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6597 def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6598 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6599 def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6600 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6601 def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6602 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6604 def VST4LNdWB_fixed_Asm_8 :
6605 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6606 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
6607 def VST4LNdWB_fixed_Asm_16 :
6608 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6609 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6610 def VST4LNdWB_fixed_Asm_32 :
6611 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6612 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6613 def VST4LNqWB_fixed_Asm_16 :
6614 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6615 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6616 def VST4LNqWB_fixed_Asm_32 :
6617 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6618 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
6619 def VST4LNdWB_register_Asm_8 :
6620 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6621 (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
6622 rGPR:$Rm, pred:$p)>;
6623 def VST4LNdWB_register_Asm_16 :
6624 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6625 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
6626 rGPR:$Rm, pred:$p)>;
6627 def VST4LNdWB_register_Asm_32 :
6628 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6629 (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
6630 rGPR:$Rm, pred:$p)>;
6631 def VST4LNqWB_register_Asm_16 :
6632 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6633 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
6634 rGPR:$Rm, pred:$p)>;
6635 def VST4LNqWB_register_Asm_32 :
6636 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6637 (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
6638 rGPR:$Rm, pred:$p)>;
6641 // VST4 multiple structure pseudo-instructions. These need special handling for
6642 // the vector operands that the normal instructions don't yet model.
6643 // FIXME: Remove these when the register classes and instructions are updated.
6644 def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6645 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6646 def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6647 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6648 def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6649 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6650 def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
6651 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6652 def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
6653 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6654 def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
6655 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6657 def VST4dWB_fixed_Asm_8 :
6658 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6659 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6660 def VST4dWB_fixed_Asm_16 :
6661 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6662 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6663 def VST4dWB_fixed_Asm_32 :
6664 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6665 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
6666 def VST4qWB_fixed_Asm_8 :
6667 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
6668 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6669 def VST4qWB_fixed_Asm_16 :
6670 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
6671 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6672 def VST4qWB_fixed_Asm_32 :
6673 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
6674 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
6675 def VST4dWB_register_Asm_8 :
6676 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6677 (ins VecListFourD:$list, addrmode6:$addr,
6678 rGPR:$Rm, pred:$p)>;
6679 def VST4dWB_register_Asm_16 :
6680 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6681 (ins VecListFourD:$list, addrmode6:$addr,
6682 rGPR:$Rm, pred:$p)>;
6683 def VST4dWB_register_Asm_32 :
6684 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6685 (ins VecListFourD:$list, addrmode6:$addr,
6686 rGPR:$Rm, pred:$p)>;
6687 def VST4qWB_register_Asm_8 :
6688 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
6689 (ins VecListFourQ:$list, addrmode6:$addr,
6690 rGPR:$Rm, pred:$p)>;
6691 def VST4qWB_register_Asm_16 :
6692 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
6693 (ins VecListFourQ:$list, addrmode6:$addr,
6694 rGPR:$Rm, pred:$p)>;
6695 def VST4qWB_register_Asm_32 :
6696 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
6697 (ins VecListFourQ:$list, addrmode6:$addr,
6698 rGPR:$Rm, pred:$p)>;
6700 // VMOV takes an optional datatype suffix
6701 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
6702 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6703 defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
6704 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6706 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
6707 // D-register versions.
6708 def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
6709 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6710 def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
6711 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6712 def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
6713 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6714 def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
6715 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6716 def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
6717 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6718 def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
6719 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6720 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
6721 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6722 // Q-register versions.
6723 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
6724 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6725 def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
6726 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6727 def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
6728 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6729 def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
6730 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6731 def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
6732 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6733 def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
6734 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6735 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
6736 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6738 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
6739 // D-register versions.
6740 def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
6741 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6742 def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
6743 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6744 def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
6745 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6746 def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
6747 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6748 def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
6749 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6750 def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
6751 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6752 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
6753 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
6754 // Q-register versions.
6755 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
6756 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6757 def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
6758 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6759 def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
6760 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6761 def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
6762 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6763 def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
6764 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6765 def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
6766 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6767 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
6768 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
6770 // Two-operand variants for VEXT
6771 def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
6772 (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
6773 def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
6774 (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
6775 def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
6776 (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
6778 def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
6779 (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
6780 def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
6781 (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
6782 def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
6783 (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
6784 def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
6785 (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
6787 // Two-operand variants for VQDMULH
6788 def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
6789 (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6790 def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
6791 (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6793 def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
6794 (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6795 def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
6796 (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6798 // Two-operand variants for VMAX.
6799 def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
6800 (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6801 def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
6802 (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6803 def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
6804 (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6805 def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
6806 (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6807 def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
6808 (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6809 def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
6810 (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6811 def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
6812 (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6814 def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
6815 (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6816 def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
6817 (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6818 def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
6819 (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6820 def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
6821 (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6822 def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
6823 (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6824 def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
6825 (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6826 def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
6827 (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6829 // Two-operand variants for VMIN.
6830 def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
6831 (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6832 def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
6833 (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6834 def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
6835 (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6836 def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
6837 (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6838 def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
6839 (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6840 def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
6841 (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6842 def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
6843 (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6845 def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
6846 (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6847 def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
6848 (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6849 def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
6850 (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6851 def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
6852 (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6853 def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
6854 (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6855 def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
6856 (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6857 def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
6858 (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
6860 // Two-operand variants for VPADD.
6861 def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
6862 (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6863 def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
6864 (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6865 def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
6866 (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6867 def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
6868 (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
6870 // Two-operand variants for VSRA.
6872 def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
6873 (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6874 def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
6875 (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6876 def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
6877 (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6878 def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
6879 (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6881 def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
6882 (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6883 def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
6884 (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6885 def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
6886 (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6887 def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
6888 (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6891 def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
6892 (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6893 def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
6894 (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6895 def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
6896 (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6897 def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
6898 (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6900 def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
6901 (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6902 def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
6903 (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6904 def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
6905 (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6906 def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
6907 (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6909 // Two-operand variants for VSRI.
6910 def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
6911 (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6912 def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
6913 (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6914 def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
6915 (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6916 def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
6917 (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6919 def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
6920 (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6921 def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
6922 (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6923 def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
6924 (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6925 def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
6926 (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6928 // Two-operand variants for VSLI.
6929 def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
6930 (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6931 def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
6932 (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6933 def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
6934 (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6935 def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
6936 (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6938 def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
6939 (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
6940 def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
6941 (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
6942 def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
6943 (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
6944 def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
6945 (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
6947 // VSWP allows, but does not require, a type suffix.
6948 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
6949 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
6950 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
6951 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
6953 // VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
6954 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
6955 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6956 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
6957 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6958 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
6959 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
6960 defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
6961 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6962 defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
6963 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6964 defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
6965 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
6967 // "vmov Rd, #-imm" can be handled via "vmvn".
6968 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
6969 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6970 def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
6971 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6972 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
6973 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6974 def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
6975 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
6977 // 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
6978 // these should restrict to just the Q register variants, but the register
6979 // classes are enough to match correctly regardless, so we keep it simple
6980 // and just use MnemonicAlias.
6981 def : NEONMnemonicAlias<"vbicq", "vbic">;
6982 def : NEONMnemonicAlias<"vandq", "vand">;
6983 def : NEONMnemonicAlias<"veorq", "veor">;
6984 def : NEONMnemonicAlias<"vorrq", "vorr">;
6986 def : NEONMnemonicAlias<"vmovq", "vmov">;
6987 def : NEONMnemonicAlias<"vmvnq", "vmvn">;
6988 // Explicit versions for floating point so that the FPImm variants get
6989 // handled early. The parser gets confused otherwise.
6990 def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
6991 def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
6993 def : NEONMnemonicAlias<"vaddq", "vadd">;
6994 def : NEONMnemonicAlias<"vsubq", "vsub">;
6996 def : NEONMnemonicAlias<"vminq", "vmin">;
6997 def : NEONMnemonicAlias<"vmaxq", "vmax">;
6999 def : NEONMnemonicAlias<"vmulq", "vmul">;
7001 def : NEONMnemonicAlias<"vabsq", "vabs">;
7003 def : NEONMnemonicAlias<"vshlq", "vshl">;
7004 def : NEONMnemonicAlias<"vshrq", "vshr">;
7006 def : NEONMnemonicAlias<"vcvtq", "vcvt">;
7008 def : NEONMnemonicAlias<"vcleq", "vcle">;
7009 def : NEONMnemonicAlias<"vceqq", "vceq">;
7011 def : NEONMnemonicAlias<"vzipq", "vzip">;
7012 def : NEONMnemonicAlias<"vswpq", "vswp">;
7014 def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
7015 def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
7018 // Alias for loading floating point immediates that aren't representable
7019 // using the vmov.f32 encoding but the bitpattern is representable using
7020 // the .i32 encoding.
7021 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
7022 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
7023 def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
7024 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;