1 //===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the ARM NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
18 def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
20 def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
21 def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
22 def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
23 def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
24 def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
25 def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
27 // Types for vector shift by immediates. The "SHX" version is for long and
28 // narrow operations where the source and destination vectors have different
29 // types. The "SHINS" version is for shift and insert operations.
30 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
32 def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
34 def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
37 def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
38 def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
39 def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
40 def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
41 def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
42 def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
43 def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
45 def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
46 def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
47 def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
49 def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
50 def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
51 def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
52 def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
53 def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
54 def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
56 def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
57 def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
58 def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
60 def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
61 def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
63 def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
65 def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
66 def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
68 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
69 def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
70 def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
72 def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
74 // VDUPLANE can produce a quad-register result from a double-register source,
75 // so the result is not constrained to match the source.
76 def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
77 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
80 def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
81 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
82 def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
84 def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
85 def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
86 def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
87 def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
89 def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
92 def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
93 def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
94 def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
96 def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
98 def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
99 def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
101 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
102 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
103 unsigned EltBits = 0;
104 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
105 return (EltBits == 32 && EltVal == 0);
108 def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
109 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
110 unsigned EltBits = 0;
111 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
112 return (EltBits == 8 && EltVal == 0xff);
115 //===----------------------------------------------------------------------===//
116 // NEON operand definitions
117 //===----------------------------------------------------------------------===//
119 def nModImm : Operand<i32> {
120 let PrintMethod = "printNEONModImmOperand";
123 //===----------------------------------------------------------------------===//
124 // NEON load / store instructions
125 //===----------------------------------------------------------------------===//
127 // Use vldmia to load a Q register as a D register pair.
128 // This is equivalent to VLDMD except that it has a Q register operand
129 // instead of a pair of D registers.
131 : AXDI5<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
132 IndexModeNone, IIC_fpLoadm,
133 "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
134 [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
136 let mayLoad = 1, neverHasSideEffects = 1 in {
137 // Use vld1 to load a Q register as a D register pair.
138 // This alternative to VLDMQ allows an alignment to be specified.
139 // This is equivalent to VLD1q64 except that it has a Q register operand.
141 : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
142 IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
143 } // mayLoad = 1, neverHasSideEffects = 1
145 // Use vstmia to store a Q register as a D register pair.
146 // This is equivalent to VSTMD except that it has a Q register operand
147 // instead of a pair of D registers.
149 : AXDI5<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
150 IndexModeNone, IIC_fpStorem,
151 "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
152 [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
154 let mayStore = 1, neverHasSideEffects = 1 in {
155 // Use vst1 to store a Q register as a D register pair.
156 // This alternative to VSTMQ allows an alignment to be specified.
157 // This is equivalent to VST1q64 except that it has a Q register operand.
159 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
160 IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
161 } // mayStore = 1, neverHasSideEffects = 1
163 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
165 // VLD1 : Vector Load (multiple single elements)
166 class VLD1D<bits<4> op7_4, string Dt>
167 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
168 (ins addrmode6:$addr), IIC_VLD1,
169 "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
170 class VLD1Q<bits<4> op7_4, string Dt>
171 : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
172 (ins addrmode6:$addr), IIC_VLD1,
173 "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
175 def VLD1d8 : VLD1D<0b0000, "8">;
176 def VLD1d16 : VLD1D<0b0100, "16">;
177 def VLD1d32 : VLD1D<0b1000, "32">;
178 def VLD1d64 : VLD1D<0b1100, "64">;
180 def VLD1q8 : VLD1Q<0b0000, "8">;
181 def VLD1q16 : VLD1Q<0b0100, "16">;
182 def VLD1q32 : VLD1Q<0b1000, "32">;
183 def VLD1q64 : VLD1Q<0b1100, "64">;
185 // ...with address register writeback:
186 class VLD1DWB<bits<4> op7_4, string Dt>
187 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
188 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
189 "vld1", Dt, "\\{$dst\\}, $addr$offset",
190 "$addr.addr = $wb", []>;
191 class VLD1QWB<bits<4> op7_4, string Dt>
192 : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
193 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
194 "vld1", Dt, "${dst:dregpair}, $addr$offset",
195 "$addr.addr = $wb", []>;
197 def VLD1d8_UPD : VLD1DWB<0b0000, "8">;
198 def VLD1d16_UPD : VLD1DWB<0b0100, "16">;
199 def VLD1d32_UPD : VLD1DWB<0b1000, "32">;
200 def VLD1d64_UPD : VLD1DWB<0b1100, "64">;
202 def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
203 def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
204 def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
205 def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
207 // ...with 3 registers (some of these are only for the disassembler):
208 class VLD1D3<bits<4> op7_4, string Dt>
209 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
210 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
211 "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
212 class VLD1D3WB<bits<4> op7_4, string Dt>
213 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
214 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
215 "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
217 def VLD1d8T : VLD1D3<0b0000, "8">;
218 def VLD1d16T : VLD1D3<0b0100, "16">;
219 def VLD1d32T : VLD1D3<0b1000, "32">;
220 def VLD1d64T : VLD1D3<0b1100, "64">;
222 def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
223 def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
224 def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
225 def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
227 // ...with 4 registers (some of these are only for the disassembler):
228 class VLD1D4<bits<4> op7_4, string Dt>
229 : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
230 (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
231 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
232 class VLD1D4WB<bits<4> op7_4, string Dt>
233 : NLdSt<0,0b10,0b0010,op7_4,
234 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
235 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
236 "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
239 def VLD1d8Q : VLD1D4<0b0000, "8">;
240 def VLD1d16Q : VLD1D4<0b0100, "16">;
241 def VLD1d32Q : VLD1D4<0b1000, "32">;
242 def VLD1d64Q : VLD1D4<0b1100, "64">;
244 def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">;
245 def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
246 def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
247 def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
249 // VLD2 : Vector Load (multiple 2-element structures)
250 class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
251 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
252 (ins addrmode6:$addr), IIC_VLD2,
253 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
254 class VLD2Q<bits<4> op7_4, string Dt>
255 : NLdSt<0, 0b10, 0b0011, op7_4,
256 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
257 (ins addrmode6:$addr), IIC_VLD2,
258 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
260 def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
261 def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
262 def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
264 def VLD2q8 : VLD2Q<0b0000, "8">;
265 def VLD2q16 : VLD2Q<0b0100, "16">;
266 def VLD2q32 : VLD2Q<0b1000, "32">;
268 // ...with address register writeback:
269 class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
270 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
271 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
272 "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
273 "$addr.addr = $wb", []>;
274 class VLD2QWB<bits<4> op7_4, string Dt>
275 : NLdSt<0, 0b10, 0b0011, op7_4,
276 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
277 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
278 "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
279 "$addr.addr = $wb", []>;
281 def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
282 def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
283 def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
285 def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
286 def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
287 def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
289 // ...with double-spaced registers (for disassembly only):
290 def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
291 def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
292 def VLD2b32 : VLD2D<0b1001, 0b1000, "32">;
293 def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">;
294 def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">;
295 def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">;
297 // VLD3 : Vector Load (multiple 3-element structures)
298 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
299 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
300 (ins addrmode6:$addr), IIC_VLD3,
301 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
303 def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
304 def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
305 def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
307 // ...with address register writeback:
308 class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
309 : NLdSt<0, 0b10, op11_8, op7_4,
310 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
311 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
312 "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
313 "$addr.addr = $wb", []>;
315 def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
316 def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
317 def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
319 // ...with double-spaced registers (non-updating versions for disassembly only):
320 def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
321 def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
322 def VLD3q32 : VLD3D<0b0101, 0b1000, "32">;
323 def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
324 def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
325 def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
327 // ...alternate versions to be allocated odd register numbers:
328 def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
329 def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
330 def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
332 // VLD4 : Vector Load (multiple 4-element structures)
333 class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
334 : NLdSt<0, 0b10, op11_8, op7_4,
335 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
336 (ins addrmode6:$addr), IIC_VLD4,
337 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
339 def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
340 def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
341 def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
343 // ...with address register writeback:
344 class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
345 : NLdSt<0, 0b10, op11_8, op7_4,
346 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
347 (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4,
348 "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
349 "$addr.addr = $wb", []>;
351 def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
352 def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
353 def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
355 // ...with double-spaced registers (non-updating versions for disassembly only):
356 def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
357 def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
358 def VLD4q32 : VLD4D<0b0001, 0b1000, "32">;
359 def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
360 def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
361 def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
363 // ...alternate versions to be allocated odd register numbers:
364 def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
365 def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
366 def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
368 // VLD1LN : Vector Load (single element to one lane)
369 // FIXME: Not yet implemented.
371 // VLD2LN : Vector Load (single 2-element structure to one lane)
372 class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
373 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
374 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
375 IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
376 "$src1 = $dst1, $src2 = $dst2", []>;
378 def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
379 def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
380 def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
382 // ...with double-spaced registers:
383 def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
384 def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
386 // ...alternate versions to be allocated odd register numbers:
387 def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
388 def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
390 // ...with address register writeback:
391 class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
392 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
393 (ins addrmode6:$addr, am6offset:$offset,
394 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
395 "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
396 "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
398 def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
399 def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
400 def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
402 def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
403 def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
405 // VLD3LN : Vector Load (single 3-element structure to one lane)
406 class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
407 : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
408 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
409 nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
410 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
411 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
413 def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
414 def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
415 def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
417 // ...with double-spaced registers:
418 def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
419 def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
421 // ...alternate versions to be allocated odd register numbers:
422 def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
423 def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
425 // ...with address register writeback:
426 class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
427 : NLdSt<1, 0b10, op11_8, op7_4,
428 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
429 (ins addrmode6:$addr, am6offset:$offset,
430 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
431 IIC_VLD3, "vld3", Dt,
432 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
433 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
436 def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
437 def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
438 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
440 def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
441 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
443 // VLD4LN : Vector Load (single 4-element structure to one lane)
444 class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
445 : NLdSt<1, 0b10, op11_8, op7_4,
446 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
447 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
448 nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
449 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
450 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
452 def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
453 def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
454 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
456 // ...with double-spaced registers:
457 def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
458 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
460 // ...alternate versions to be allocated odd register numbers:
461 def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
462 def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
464 // ...with address register writeback:
465 class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
466 : NLdSt<1, 0b10, op11_8, op7_4,
467 (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
468 (ins addrmode6:$addr, am6offset:$offset,
469 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
470 IIC_VLD4, "vld4", Dt,
471 "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
472 "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
475 def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
476 def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
477 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
479 def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
480 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
482 // VLD1DUP : Vector Load (single element to all lanes)
483 // VLD2DUP : Vector Load (single 2-element structure to all lanes)
484 // VLD3DUP : Vector Load (single 3-element structure to all lanes)
485 // VLD4DUP : Vector Load (single 4-element structure to all lanes)
486 // FIXME: Not yet implemented.
487 } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
489 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
491 // Classes for VST* pseudo-instructions with multi-register operands.
492 // These are expanded to real instructions after register allocation.
494 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
496 : PseudoNLdSt<(outs GPR:$wb),
497 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
500 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
502 : PseudoNLdSt<(outs GPR:$wb),
503 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
505 class VSTQQQQWBPseudo
506 : PseudoNLdSt<(outs GPR:$wb),
507 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
510 // VST1 : Vector Store (multiple single elements)
511 class VST1D<bits<4> op7_4, string Dt>
512 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
513 "vst1", Dt, "\\{$src\\}, $addr", "", []>;
514 class VST1Q<bits<4> op7_4, string Dt>
515 : NLdSt<0,0b00,0b1010,op7_4, (outs),
516 (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
517 "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
519 def VST1d8 : VST1D<0b0000, "8">;
520 def VST1d16 : VST1D<0b0100, "16">;
521 def VST1d32 : VST1D<0b1000, "32">;
522 def VST1d64 : VST1D<0b1100, "64">;
524 def VST1q8 : VST1Q<0b0000, "8">;
525 def VST1q16 : VST1Q<0b0100, "16">;
526 def VST1q32 : VST1Q<0b1000, "32">;
527 def VST1q64 : VST1Q<0b1100, "64">;
529 def VST1q8Pseudo : VSTQPseudo;
530 def VST1q16Pseudo : VSTQPseudo;
531 def VST1q32Pseudo : VSTQPseudo;
532 def VST1q64Pseudo : VSTQPseudo;
534 // ...with address register writeback:
535 class VST1DWB<bits<4> op7_4, string Dt>
536 : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
537 (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
538 "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
539 class VST1QWB<bits<4> op7_4, string Dt>
540 : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
541 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
542 "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
544 def VST1d8_UPD : VST1DWB<0b0000, "8">;
545 def VST1d16_UPD : VST1DWB<0b0100, "16">;
546 def VST1d32_UPD : VST1DWB<0b1000, "32">;
547 def VST1d64_UPD : VST1DWB<0b1100, "64">;
549 def VST1q8_UPD : VST1QWB<0b0000, "8">;
550 def VST1q16_UPD : VST1QWB<0b0100, "16">;
551 def VST1q32_UPD : VST1QWB<0b1000, "32">;
552 def VST1q64_UPD : VST1QWB<0b1100, "64">;
554 def VST1q8Pseudo_UPD : VSTQWBPseudo;
555 def VST1q16Pseudo_UPD : VSTQWBPseudo;
556 def VST1q32Pseudo_UPD : VSTQWBPseudo;
557 def VST1q64Pseudo_UPD : VSTQWBPseudo;
559 // ...with 3 registers (some of these are only for the disassembler):
560 class VST1D3<bits<4> op7_4, string Dt>
561 : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
562 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
563 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
564 class VST1D3WB<bits<4> op7_4, string Dt>
565 : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
566 (ins addrmode6:$addr, am6offset:$offset,
567 DPR:$src1, DPR:$src2, DPR:$src3),
568 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
569 "$addr.addr = $wb", []>;
571 def VST1d8T : VST1D3<0b0000, "8">;
572 def VST1d16T : VST1D3<0b0100, "16">;
573 def VST1d32T : VST1D3<0b1000, "32">;
574 def VST1d64T : VST1D3<0b1100, "64">;
576 def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
577 def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
578 def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
579 def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
581 def VST1d64TPseudo : VSTQQPseudo;
582 def VST1d64TPseudo_UPD : VSTQQWBPseudo;
584 // ...with 4 registers (some of these are only for the disassembler):
585 class VST1D4<bits<4> op7_4, string Dt>
586 : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
587 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
588 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
590 class VST1D4WB<bits<4> op7_4, string Dt>
591 : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
592 (ins addrmode6:$addr, am6offset:$offset,
593 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
594 IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
595 "$addr.addr = $wb", []>;
597 def VST1d8Q : VST1D4<0b0000, "8">;
598 def VST1d16Q : VST1D4<0b0100, "16">;
599 def VST1d32Q : VST1D4<0b1000, "32">;
600 def VST1d64Q : VST1D4<0b1100, "64">;
602 def VST1d8Q_UPD : VST1D4WB<0b0000, "8">;
603 def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
604 def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
605 def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
607 def VST1d64QPseudo : VSTQQPseudo;
608 def VST1d64QPseudo_UPD : VSTQQWBPseudo;
610 // VST2 : Vector Store (multiple 2-element structures)
611 class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
612 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
613 (ins addrmode6:$addr, DPR:$src1, DPR:$src2),
614 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
615 class VST2Q<bits<4> op7_4, string Dt>
616 : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
617 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
618 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
621 def VST2d8 : VST2D<0b1000, 0b0000, "8">;
622 def VST2d16 : VST2D<0b1000, 0b0100, "16">;
623 def VST2d32 : VST2D<0b1000, 0b1000, "32">;
625 def VST2q8 : VST2Q<0b0000, "8">;
626 def VST2q16 : VST2Q<0b0100, "16">;
627 def VST2q32 : VST2Q<0b1000, "32">;
629 def VST2d8Pseudo : VSTQPseudo;
630 def VST2d16Pseudo : VSTQPseudo;
631 def VST2d32Pseudo : VSTQPseudo;
633 def VST2q8Pseudo : VSTQQPseudo;
634 def VST2q16Pseudo : VSTQQPseudo;
635 def VST2q32Pseudo : VSTQQPseudo;
637 // ...with address register writeback:
638 class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
639 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
640 (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
641 IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
642 "$addr.addr = $wb", []>;
643 class VST2QWB<bits<4> op7_4, string Dt>
644 : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
645 (ins addrmode6:$addr, am6offset:$offset,
646 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
647 IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
648 "$addr.addr = $wb", []>;
650 def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
651 def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
652 def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
654 def VST2q8_UPD : VST2QWB<0b0000, "8">;
655 def VST2q16_UPD : VST2QWB<0b0100, "16">;
656 def VST2q32_UPD : VST2QWB<0b1000, "32">;
658 def VST2d8Pseudo_UPD : VSTQWBPseudo;
659 def VST2d16Pseudo_UPD : VSTQWBPseudo;
660 def VST2d32Pseudo_UPD : VSTQWBPseudo;
662 def VST2q8Pseudo_UPD : VSTQQWBPseudo;
663 def VST2q16Pseudo_UPD : VSTQQWBPseudo;
664 def VST2q32Pseudo_UPD : VSTQQWBPseudo;
666 // ...with double-spaced registers (for disassembly only):
667 def VST2b8 : VST2D<0b1001, 0b0000, "8">;
668 def VST2b16 : VST2D<0b1001, 0b0100, "16">;
669 def VST2b32 : VST2D<0b1001, 0b1000, "32">;
670 def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">;
671 def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">;
672 def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">;
674 // VST3 : Vector Store (multiple 3-element structures)
675 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
676 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
677 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
678 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
680 def VST3d8 : VST3D<0b0100, 0b0000, "8">;
681 def VST3d16 : VST3D<0b0100, 0b0100, "16">;
682 def VST3d32 : VST3D<0b0100, 0b1000, "32">;
684 def VST3d8Pseudo : VSTQQPseudo;
685 def VST3d16Pseudo : VSTQQPseudo;
686 def VST3d32Pseudo : VSTQQPseudo;
688 // ...with address register writeback:
689 class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
690 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
691 (ins addrmode6:$addr, am6offset:$offset,
692 DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
693 "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
694 "$addr.addr = $wb", []>;
696 def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
697 def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
698 def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
700 def VST3d8Pseudo_UPD : VSTQQWBPseudo;
701 def VST3d16Pseudo_UPD : VSTQQWBPseudo;
702 def VST3d32Pseudo_UPD : VSTQQWBPseudo;
704 // ...with double-spaced registers (non-updating versions for disassembly only):
705 def VST3q8 : VST3D<0b0101, 0b0000, "8">;
706 def VST3q16 : VST3D<0b0101, 0b0100, "16">;
707 def VST3q32 : VST3D<0b0101, 0b1000, "32">;
708 def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
709 def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
710 def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
712 def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
713 def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
714 def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
716 // ...alternate versions to be allocated odd register numbers:
717 def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
718 def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
719 def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
721 // VST4 : Vector Store (multiple 4-element structures)
722 class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
723 : NLdSt<0, 0b00, op11_8, op7_4, (outs),
724 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
725 IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
728 def VST4d8 : VST4D<0b0000, 0b0000, "8">;
729 def VST4d16 : VST4D<0b0000, 0b0100, "16">;
730 def VST4d32 : VST4D<0b0000, 0b1000, "32">;
732 def VST4d8Pseudo : VSTQQPseudo;
733 def VST4d16Pseudo : VSTQQPseudo;
734 def VST4d32Pseudo : VSTQQPseudo;
736 // ...with address register writeback:
737 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
738 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
739 (ins addrmode6:$addr, am6offset:$offset,
740 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
741 "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
742 "$addr.addr = $wb", []>;
744 def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
745 def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
746 def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
748 def VST4d8Pseudo_UPD : VSTQQWBPseudo;
749 def VST4d16Pseudo_UPD : VSTQQWBPseudo;
750 def VST4d32Pseudo_UPD : VSTQQWBPseudo;
752 // ...with double-spaced registers (non-updating versions for disassembly only):
753 def VST4q8 : VST4D<0b0001, 0b0000, "8">;
754 def VST4q16 : VST4D<0b0001, 0b0100, "16">;
755 def VST4q32 : VST4D<0b0001, 0b1000, "32">;
756 def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
757 def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
758 def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
760 def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
761 def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
762 def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
764 // ...alternate versions to be allocated odd register numbers:
765 def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
766 def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
767 def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
769 // VST1LN : Vector Store (single element from one lane)
770 // FIXME: Not yet implemented.
772 // VST2LN : Vector Store (single 2-element structure from one lane)
773 class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
774 : NLdSt<1, 0b00, op11_8, op7_4, (outs),
775 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
776 IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
779 def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
780 def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
781 def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
783 // ...with double-spaced registers:
784 def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
785 def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
787 // ...alternate versions to be allocated odd register numbers:
788 def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
789 def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
791 // ...with address register writeback:
792 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
793 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
794 (ins addrmode6:$addr, am6offset:$offset,
795 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
796 "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
797 "$addr.addr = $wb", []>;
799 def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
800 def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
801 def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
803 def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
804 def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
806 // VST3LN : Vector Store (single 3-element structure from one lane)
807 class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
808 : NLdSt<1, 0b00, op11_8, op7_4, (outs),
809 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
810 nohash_imm:$lane), IIC_VST, "vst3", Dt,
811 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
813 def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
814 def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
815 def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
817 // ...with double-spaced registers:
818 def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
819 def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
821 // ...alternate versions to be allocated odd register numbers:
822 def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
823 def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
825 // ...with address register writeback:
826 class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
827 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
828 (ins addrmode6:$addr, am6offset:$offset,
829 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
831 "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
832 "$addr.addr = $wb", []>;
834 def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
835 def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
836 def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
838 def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
839 def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
841 // VST4LN : Vector Store (single 4-element structure from one lane)
842 class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
843 : NLdSt<1, 0b00, op11_8, op7_4, (outs),
844 (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
845 nohash_imm:$lane), IIC_VST, "vst4", Dt,
846 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
849 def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
850 def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
851 def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
853 // ...with double-spaced registers:
854 def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
855 def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
857 // ...alternate versions to be allocated odd register numbers:
858 def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
859 def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
861 // ...with address register writeback:
862 class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
863 : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
864 (ins addrmode6:$addr, am6offset:$offset,
865 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
867 "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
868 "$addr.addr = $wb", []>;
870 def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
871 def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
872 def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
874 def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
875 def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
877 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
880 //===----------------------------------------------------------------------===//
881 // NEON pattern fragments
882 //===----------------------------------------------------------------------===//
884 // Extract D sub-registers of Q registers.
885 def DSubReg_i8_reg : SDNodeXForm<imm, [{
886 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
887 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
889 def DSubReg_i16_reg : SDNodeXForm<imm, [{
890 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
891 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
893 def DSubReg_i32_reg : SDNodeXForm<imm, [{
894 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
895 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
897 def DSubReg_f64_reg : SDNodeXForm<imm, [{
898 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
899 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
902 // Extract S sub-registers of Q/D registers.
903 def SSubReg_f32_reg : SDNodeXForm<imm, [{
904 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
905 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
908 // Translate lane numbers from Q registers to D subregs.
909 def SubReg_i8_lane : SDNodeXForm<imm, [{
910 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
912 def SubReg_i16_lane : SDNodeXForm<imm, [{
913 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
915 def SubReg_i32_lane : SDNodeXForm<imm, [{
916 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
919 //===----------------------------------------------------------------------===//
920 // Instruction Classes
921 //===----------------------------------------------------------------------===//
923 // Basic 2-register operations: single-, double- and quad-register.
924 class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
925 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
926 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
927 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
928 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
929 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
930 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
931 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
932 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
933 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
934 (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
935 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
936 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
937 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
938 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
939 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
940 (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
941 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
943 // Basic 2-register intrinsics, both double- and quad-register.
944 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
945 bits<2> op17_16, bits<5> op11_7, bit op4,
946 InstrItinClass itin, string OpcodeStr, string Dt,
947 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
948 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
949 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
950 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
951 class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
952 bits<2> op17_16, bits<5> op11_7, bit op4,
953 InstrItinClass itin, string OpcodeStr, string Dt,
954 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
955 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
956 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
957 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
959 // Narrow 2-register intrinsics.
960 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
961 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
962 InstrItinClass itin, string OpcodeStr, string Dt,
963 ValueType TyD, ValueType TyQ, Intrinsic IntOp>
964 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
965 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
966 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
968 // Long 2-register operations (currently only used for VMOVL).
969 class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
970 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
971 InstrItinClass itin, string OpcodeStr, string Dt,
972 ValueType TyQ, ValueType TyD, SDNode OpNode>
973 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
974 (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
975 [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
977 // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
978 class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
979 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2),
980 (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
981 OpcodeStr, Dt, "$dst1, $dst2",
982 "$src1 = $dst1, $src2 = $dst2", []>;
983 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
984 InstrItinClass itin, string OpcodeStr, string Dt>
985 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
986 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
987 "$src1 = $dst1, $src2 = $dst2", []>;
989 // Basic 3-register operations: single-, double- and quad-register.
990 class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
991 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
992 SDNode OpNode, bit Commutable>
993 : N3V<op24, op23, op21_20, op11_8, 0, op4,
994 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
995 IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
996 let isCommutable = Commutable;
999 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1000 InstrItinClass itin, string OpcodeStr, string Dt,
1001 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1002 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1003 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
1004 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1005 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
1006 let isCommutable = Commutable;
1008 // Same as N3VD but no data type.
1009 class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1010 InstrItinClass itin, string OpcodeStr,
1011 ValueType ResTy, ValueType OpTy,
1012 SDNode OpNode, bit Commutable>
1013 : N3VX<op24, op23, op21_20, op11_8, 0, op4,
1014 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
1015 OpcodeStr, "$dst, $src1, $src2", "",
1016 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
1017 let isCommutable = Commutable;
1020 class N3VDSL<bits<2> op21_20, bits<4> op11_8,
1021 InstrItinClass itin, string OpcodeStr, string Dt,
1022 ValueType Ty, SDNode ShOp>
1023 : N3V<0, 1, op21_20, op11_8, 1, 0,
1024 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1025 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1026 [(set (Ty DPR:$dst),
1027 (Ty (ShOp (Ty DPR:$src1),
1028 (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
1029 let isCommutable = 0;
1031 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
1032 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1033 : N3V<0, 1, op21_20, op11_8, 1, 0,
1034 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1035 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
1036 [(set (Ty DPR:$dst),
1037 (Ty (ShOp (Ty DPR:$src1),
1038 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
1039 let isCommutable = 0;
1042 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1043 InstrItinClass itin, string OpcodeStr, string Dt,
1044 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1045 : N3V<op24, op23, op21_20, op11_8, 1, op4,
1046 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
1047 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1048 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
1049 let isCommutable = Commutable;
1051 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1052 InstrItinClass itin, string OpcodeStr,
1053 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
1054 : N3VX<op24, op23, op21_20, op11_8, 1, op4,
1055 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
1056 OpcodeStr, "$dst, $src1, $src2", "",
1057 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
1058 let isCommutable = Commutable;
1060 class N3VQSL<bits<2> op21_20, bits<4> op11_8,
1061 InstrItinClass itin, string OpcodeStr, string Dt,
1062 ValueType ResTy, ValueType OpTy, SDNode ShOp>
1063 : N3V<1, 1, op21_20, op11_8, 1, 0,
1064 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1065 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1066 [(set (ResTy QPR:$dst),
1067 (ResTy (ShOp (ResTy QPR:$src1),
1068 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1070 let isCommutable = 0;
1072 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
1073 ValueType ResTy, ValueType OpTy, SDNode ShOp>
1074 : N3V<1, 1, op21_20, op11_8, 1, 0,
1075 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1076 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
1077 [(set (ResTy QPR:$dst),
1078 (ResTy (ShOp (ResTy QPR:$src1),
1079 (ResTy (NEONvduplane (OpTy DPR_8:$src2),
1081 let isCommutable = 0;
1084 // Basic 3-register intrinsics, both double- and quad-register.
1085 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1086 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1087 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
1088 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1089 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
1090 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1091 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
1092 let isCommutable = Commutable;
1094 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1095 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
1096 : N3V<0, 1, op21_20, op11_8, 1, 0,
1097 (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1098 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1099 [(set (Ty DPR:$dst),
1100 (Ty (IntOp (Ty DPR:$src1),
1101 (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
1103 let isCommutable = 0;
1105 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1106 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
1107 : N3V<0, 1, op21_20, op11_8, 1, 0,
1108 (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1109 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1110 [(set (Ty DPR:$dst),
1111 (Ty (IntOp (Ty DPR:$src1),
1112 (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
1113 let isCommutable = 0;
1116 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1117 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1118 ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
1119 : N3V<op24, op23, op21_20, op11_8, 1, op4,
1120 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
1121 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1122 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
1123 let isCommutable = Commutable;
1125 class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1126 string OpcodeStr, string Dt,
1127 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1128 : N3V<1, 1, op21_20, op11_8, 1, 0,
1129 (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1130 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1131 [(set (ResTy QPR:$dst),
1132 (ResTy (IntOp (ResTy QPR:$src1),
1133 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1135 let isCommutable = 0;
1137 class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1138 string OpcodeStr, string Dt,
1139 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1140 : N3V<1, 1, op21_20, op11_8, 1, 0,
1141 (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1142 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1143 [(set (ResTy QPR:$dst),
1144 (ResTy (IntOp (ResTy QPR:$src1),
1145 (ResTy (NEONvduplane (OpTy DPR_8:$src2),
1147 let isCommutable = 0;
1150 // Multiply-Add/Sub operations: single-, double- and quad-register.
1151 class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1152 InstrItinClass itin, string OpcodeStr, string Dt,
1153 ValueType Ty, SDNode MulOp, SDNode OpNode>
1154 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1155 (outs DPR_VFP2:$dst),
1156 (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
1157 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
1159 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1160 InstrItinClass itin, string OpcodeStr, string Dt,
1161 ValueType Ty, SDNode MulOp, SDNode OpNode>
1162 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1163 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1164 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1165 [(set DPR:$dst, (Ty (OpNode DPR:$src1,
1166 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
1167 class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1168 string OpcodeStr, string Dt,
1169 ValueType Ty, SDNode MulOp, SDNode ShOp>
1170 : N3V<0, 1, op21_20, op11_8, 1, 0,
1172 (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1174 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1175 [(set (Ty DPR:$dst),
1176 (Ty (ShOp (Ty DPR:$src1),
1177 (Ty (MulOp DPR:$src2,
1178 (Ty (NEONvduplane (Ty DPR_VFP2:$src3),
1180 class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1181 string OpcodeStr, string Dt,
1182 ValueType Ty, SDNode MulOp, SDNode ShOp>
1183 : N3V<0, 1, op21_20, op11_8, 1, 0,
1185 (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1187 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1188 [(set (Ty DPR:$dst),
1189 (Ty (ShOp (Ty DPR:$src1),
1190 (Ty (MulOp DPR:$src2,
1191 (Ty (NEONvduplane (Ty DPR_8:$src3),
1194 class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1195 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
1196 SDNode MulOp, SDNode OpNode>
1197 : N3V<op24, op23, op21_20, op11_8, 1, op4,
1198 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
1199 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1200 [(set QPR:$dst, (Ty (OpNode QPR:$src1,
1201 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
1202 class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1203 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1204 SDNode MulOp, SDNode ShOp>
1205 : N3V<1, 1, op21_20, op11_8, 1, 0,
1207 (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1209 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1210 [(set (ResTy QPR:$dst),
1211 (ResTy (ShOp (ResTy QPR:$src1),
1212 (ResTy (MulOp QPR:$src2,
1213 (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
1215 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1216 string OpcodeStr, string Dt,
1217 ValueType ResTy, ValueType OpTy,
1218 SDNode MulOp, SDNode ShOp>
1219 : N3V<1, 1, op21_20, op11_8, 1, 0,
1221 (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1223 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1224 [(set (ResTy QPR:$dst),
1225 (ResTy (ShOp (ResTy QPR:$src1),
1226 (ResTy (MulOp QPR:$src2,
1227 (ResTy (NEONvduplane (OpTy DPR_8:$src3),
1230 // Neon 3-argument intrinsics, both double- and quad-register.
1231 // The destination register is also used as the first source operand register.
1232 class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1233 InstrItinClass itin, string OpcodeStr, string Dt,
1234 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1235 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1236 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1237 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1238 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
1239 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
1240 class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1241 InstrItinClass itin, string OpcodeStr, string Dt,
1242 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1243 : N3V<op24, op23, op21_20, op11_8, 1, op4,
1244 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
1245 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1246 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
1247 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
1249 // Neon Long 3-argument intrinsic. The destination register is
1250 // a quad-register and is also used as the first source operand register.
1251 class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1252 InstrItinClass itin, string OpcodeStr, string Dt,
1253 ValueType TyQ, ValueType TyD, Intrinsic IntOp>
1254 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1255 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
1256 OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
1258 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
1259 class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1260 string OpcodeStr, string Dt,
1261 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1262 : N3V<op24, 1, op21_20, op11_8, 1, 0,
1264 (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
1266 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1267 [(set (ResTy QPR:$dst),
1268 (ResTy (IntOp (ResTy QPR:$src1),
1270 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
1272 class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
1273 InstrItinClass itin, string OpcodeStr, string Dt,
1274 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1275 : N3V<op24, 1, op21_20, op11_8, 1, 0,
1277 (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
1279 OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
1280 [(set (ResTy QPR:$dst),
1281 (ResTy (IntOp (ResTy QPR:$src1),
1283 (OpTy (NEONvduplane (OpTy DPR_8:$src3),
1286 // Narrowing 3-register intrinsics.
1287 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1288 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
1289 Intrinsic IntOp, bit Commutable>
1290 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1291 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
1292 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1293 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
1294 let isCommutable = Commutable;
1297 // Long 3-register operations.
1298 class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1299 InstrItinClass itin, string OpcodeStr, string Dt,
1300 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
1302 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1303 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
1304 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1305 [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
1306 (TyQ (ExtOp (TyD DPR:$src2)))))]> {
1307 let isCommutable = Commutable;
1310 // Long 3-register intrinsics.
1311 class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1312 InstrItinClass itin, string OpcodeStr, string Dt,
1313 ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
1314 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1315 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
1316 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1317 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
1318 let isCommutable = Commutable;
1320 class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
1321 string OpcodeStr, string Dt,
1322 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1323 : N3V<op24, 1, op21_20, op11_8, 1, 0,
1324 (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
1325 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1326 [(set (ResTy QPR:$dst),
1327 (ResTy (IntOp (OpTy DPR:$src1),
1328 (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
1330 class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
1331 InstrItinClass itin, string OpcodeStr, string Dt,
1332 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1333 : N3V<op24, 1, op21_20, op11_8, 1, 0,
1334 (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
1335 NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
1336 [(set (ResTy QPR:$dst),
1337 (ResTy (IntOp (OpTy DPR:$src1),
1338 (OpTy (NEONvduplane (OpTy DPR_8:$src2),
1341 // Wide 3-register operations.
1342 class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
1343 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
1344 SDNode OpNode, SDNode ExtOp, bit Commutable>
1345 : N3V<op24, op23, op21_20, op11_8, 0, op4,
1346 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
1347 OpcodeStr, Dt, "$dst, $src1, $src2", "",
1348 [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
1349 (TyQ (ExtOp (TyD DPR:$src2)))))]> {
1350 let isCommutable = Commutable;
1353 // Pairwise long 2-register intrinsics, both double- and quad-register.
1354 class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1355 bits<2> op17_16, bits<5> op11_7, bit op4,
1356 string OpcodeStr, string Dt,
1357 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1358 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
1359 (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
1360 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
1361 class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1362 bits<2> op17_16, bits<5> op11_7, bit op4,
1363 string OpcodeStr, string Dt,
1364 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1365 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
1366 (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
1367 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
1369 // Pairwise long 2-register accumulate intrinsics,
1370 // both double- and quad-register.
1371 // The destination register is also used as the first source operand register.
1372 class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1373 bits<2> op17_16, bits<5> op11_7, bit op4,
1374 string OpcodeStr, string Dt,
1375 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1376 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
1377 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
1378 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
1379 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
1380 class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
1381 bits<2> op17_16, bits<5> op11_7, bit op4,
1382 string OpcodeStr, string Dt,
1383 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
1384 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
1385 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
1386 OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
1387 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
1389 // Shift by immediate,
1390 // both double- and quad-register.
1391 class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1392 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1393 ValueType Ty, SDNode OpNode>
1394 : N2VImm<op24, op23, op11_8, op7, 0, op4,
1395 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
1396 OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1397 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
1398 class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1399 Format f, InstrItinClass itin, string OpcodeStr, string Dt,
1400 ValueType Ty, SDNode OpNode>
1401 : N2VImm<op24, op23, op11_8, op7, 1, op4,
1402 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
1403 OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1404 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
1406 // Long shift by immediate.
1407 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
1408 string OpcodeStr, string Dt,
1409 ValueType ResTy, ValueType OpTy, SDNode OpNode>
1410 : N2VImm<op24, op23, op11_8, op7, op6, op4,
1411 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
1412 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1413 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
1414 (i32 imm:$SIMM))))]>;
1416 // Narrow shift by immediate.
1417 class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
1418 InstrItinClass itin, string OpcodeStr, string Dt,
1419 ValueType ResTy, ValueType OpTy, SDNode OpNode>
1420 : N2VImm<op24, op23, op11_8, op7, op6, op4,
1421 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
1422 OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1423 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
1424 (i32 imm:$SIMM))))]>;
1426 // Shift right by immediate and accumulate,
1427 // both double- and quad-register.
1428 class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1429 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1430 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
1431 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
1432 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1433 [(set DPR:$dst, (Ty (add DPR:$src1,
1434 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
1435 class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1436 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
1437 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
1438 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
1439 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1440 [(set QPR:$dst, (Ty (add QPR:$src1,
1441 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
1443 // Shift by immediate and insert,
1444 // both double- and quad-register.
1445 class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1446 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
1447 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
1448 (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
1449 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1450 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
1451 class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1452 Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
1453 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
1454 (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
1455 OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
1456 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
1458 // Convert, with fractional bits immediate,
1459 // both double- and quad-register.
1460 class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1461 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1463 : N2VImm<op24, op23, op11_8, op7, 0, op4,
1464 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
1465 IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1466 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
1467 class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
1468 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
1470 : N2VImm<op24, op23, op11_8, op7, 1, op4,
1471 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
1472 IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
1473 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
1475 //===----------------------------------------------------------------------===//
1477 //===----------------------------------------------------------------------===//
1479 // Abbreviations used in multiclass suffixes:
1480 // Q = quarter int (8 bit) elements
1481 // H = half int (16 bit) elements
1482 // S = single int (32 bit) elements
1483 // D = double int (64 bit) elements
1485 // Neon 2-register vector operations -- for disassembly only.
1487 // First with only element sizes of 8, 16 and 32 bits:
1488 multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1489 bits<5> op11_7, bit op4, string opc, string Dt,
1491 // 64-bit vector types.
1492 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
1493 (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1494 opc, !strconcat(Dt, "8"), asm, "", []>;
1495 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
1496 (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1497 opc, !strconcat(Dt, "16"), asm, "", []>;
1498 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
1499 (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1500 opc, !strconcat(Dt, "32"), asm, "", []>;
1501 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
1502 (outs DPR:$dst), (ins DPR:$src), NoItinerary,
1503 opc, "f32", asm, "", []> {
1504 let Inst{10} = 1; // overwrite F = 1
1507 // 128-bit vector types.
1508 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
1509 (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1510 opc, !strconcat(Dt, "8"), asm, "", []>;
1511 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
1512 (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1513 opc, !strconcat(Dt, "16"), asm, "", []>;
1514 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
1515 (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1516 opc, !strconcat(Dt, "32"), asm, "", []>;
1517 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
1518 (outs QPR:$dst), (ins QPR:$src), NoItinerary,
1519 opc, "f32", asm, "", []> {
1520 let Inst{10} = 1; // overwrite F = 1
1524 // Neon 3-register vector operations.
1526 // First with only element sizes of 8, 16 and 32 bits:
1527 multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1528 InstrItinClass itinD16, InstrItinClass itinD32,
1529 InstrItinClass itinQ16, InstrItinClass itinQ32,
1530 string OpcodeStr, string Dt,
1531 SDNode OpNode, bit Commutable = 0> {
1532 // 64-bit vector types.
1533 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
1534 OpcodeStr, !strconcat(Dt, "8"),
1535 v8i8, v8i8, OpNode, Commutable>;
1536 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
1537 OpcodeStr, !strconcat(Dt, "16"),
1538 v4i16, v4i16, OpNode, Commutable>;
1539 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
1540 OpcodeStr, !strconcat(Dt, "32"),
1541 v2i32, v2i32, OpNode, Commutable>;
1543 // 128-bit vector types.
1544 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
1545 OpcodeStr, !strconcat(Dt, "8"),
1546 v16i8, v16i8, OpNode, Commutable>;
1547 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
1548 OpcodeStr, !strconcat(Dt, "16"),
1549 v8i16, v8i16, OpNode, Commutable>;
1550 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
1551 OpcodeStr, !strconcat(Dt, "32"),
1552 v4i32, v4i32, OpNode, Commutable>;
1555 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
1556 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
1558 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
1560 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
1561 v8i16, v4i16, ShOp>;
1562 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
1563 v4i32, v2i32, ShOp>;
1566 // ....then also with element size 64 bits:
1567 multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1568 InstrItinClass itinD, InstrItinClass itinQ,
1569 string OpcodeStr, string Dt,
1570 SDNode OpNode, bit Commutable = 0>
1571 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
1572 OpcodeStr, Dt, OpNode, Commutable> {
1573 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
1574 OpcodeStr, !strconcat(Dt, "64"),
1575 v1i64, v1i64, OpNode, Commutable>;
1576 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
1577 OpcodeStr, !strconcat(Dt, "64"),
1578 v2i64, v2i64, OpNode, Commutable>;
1582 // Neon Narrowing 2-register vector intrinsics,
1583 // source operand element sizes of 16, 32 and 64 bits:
1584 multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1585 bits<5> op11_7, bit op6, bit op4,
1586 InstrItinClass itin, string OpcodeStr, string Dt,
1588 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
1589 itin, OpcodeStr, !strconcat(Dt, "16"),
1590 v8i8, v8i16, IntOp>;
1591 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
1592 itin, OpcodeStr, !strconcat(Dt, "32"),
1593 v4i16, v4i32, IntOp>;
1594 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
1595 itin, OpcodeStr, !strconcat(Dt, "64"),
1596 v2i32, v2i64, IntOp>;
1600 // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
1601 // source operand element sizes of 16, 32 and 64 bits:
1602 multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
1603 string OpcodeStr, string Dt, SDNode OpNode> {
1604 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1605 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
1606 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1607 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
1608 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
1609 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
1613 // Neon 3-register vector intrinsics.
1615 // First with only element sizes of 16 and 32 bits:
1616 multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1617 InstrItinClass itinD16, InstrItinClass itinD32,
1618 InstrItinClass itinQ16, InstrItinClass itinQ32,
1619 string OpcodeStr, string Dt,
1620 Intrinsic IntOp, bit Commutable = 0> {
1621 // 64-bit vector types.
1622 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
1623 OpcodeStr, !strconcat(Dt, "16"),
1624 v4i16, v4i16, IntOp, Commutable>;
1625 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
1626 OpcodeStr, !strconcat(Dt, "32"),
1627 v2i32, v2i32, IntOp, Commutable>;
1629 // 128-bit vector types.
1630 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
1631 OpcodeStr, !strconcat(Dt, "16"),
1632 v8i16, v8i16, IntOp, Commutable>;
1633 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
1634 OpcodeStr, !strconcat(Dt, "32"),
1635 v4i32, v4i32, IntOp, Commutable>;
1638 multiclass N3VIntSL_HS<bits<4> op11_8,
1639 InstrItinClass itinD16, InstrItinClass itinD32,
1640 InstrItinClass itinQ16, InstrItinClass itinQ32,
1641 string OpcodeStr, string Dt, Intrinsic IntOp> {
1642 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
1643 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
1644 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
1645 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
1646 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
1647 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
1648 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
1649 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
1652 // ....then also with element size of 8 bits:
1653 multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1654 InstrItinClass itinD16, InstrItinClass itinD32,
1655 InstrItinClass itinQ16, InstrItinClass itinQ32,
1656 string OpcodeStr, string Dt,
1657 Intrinsic IntOp, bit Commutable = 0>
1658 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
1659 OpcodeStr, Dt, IntOp, Commutable> {
1660 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
1661 OpcodeStr, !strconcat(Dt, "8"),
1662 v8i8, v8i8, IntOp, Commutable>;
1663 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
1664 OpcodeStr, !strconcat(Dt, "8"),
1665 v16i8, v16i8, IntOp, Commutable>;
1668 // ....then also with element size of 64 bits:
1669 multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
1670 InstrItinClass itinD16, InstrItinClass itinD32,
1671 InstrItinClass itinQ16, InstrItinClass itinQ32,
1672 string OpcodeStr, string Dt,
1673 Intrinsic IntOp, bit Commutable = 0>
1674 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
1675 OpcodeStr, Dt, IntOp, Commutable> {
1676 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
1677 OpcodeStr, !strconcat(Dt, "64"),
1678 v1i64, v1i64, IntOp, Commutable>;
1679 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
1680 OpcodeStr, !strconcat(Dt, "64"),
1681 v2i64, v2i64, IntOp, Commutable>;
1684 // Neon Narrowing 3-register vector intrinsics,
1685 // source operand element sizes of 16, 32 and 64 bits:
1686 multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1687 string OpcodeStr, string Dt,
1688 Intrinsic IntOp, bit Commutable = 0> {
1689 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4,
1690 OpcodeStr, !strconcat(Dt, "16"),
1691 v8i8, v8i16, IntOp, Commutable>;
1692 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
1693 OpcodeStr, !strconcat(Dt, "32"),
1694 v4i16, v4i32, IntOp, Commutable>;
1695 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
1696 OpcodeStr, !strconcat(Dt, "64"),
1697 v2i32, v2i64, IntOp, Commutable>;
1701 // Neon Long 3-register vector operations.
1703 multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1704 InstrItinClass itin16, InstrItinClass itin32,
1705 string OpcodeStr, string Dt,
1706 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
1707 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
1708 OpcodeStr, !strconcat(Dt, "16"),
1709 v4i32, v4i16, OpNode, ExtOp, Commutable>;
1710 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
1711 OpcodeStr, !strconcat(Dt, "32"),
1712 v2i64, v2i32, OpNode, ExtOp, Commutable>;
1713 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
1714 OpcodeStr, !strconcat(Dt, "8"),
1715 v8i16, v8i8, OpNode, ExtOp, Commutable>;
1718 // Neon Long 3-register vector intrinsics.
1720 // First with only element sizes of 16 and 32 bits:
1721 multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
1722 InstrItinClass itin16, InstrItinClass itin32,
1723 string OpcodeStr, string Dt,
1724 Intrinsic IntOp, bit Commutable = 0> {
1725 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
1726 OpcodeStr, !strconcat(Dt, "16"),
1727 v4i32, v4i16, IntOp, Commutable>;
1728 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
1729 OpcodeStr, !strconcat(Dt, "32"),
1730 v2i64, v2i32, IntOp, Commutable>;
1733 multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
1734 InstrItinClass itin, string OpcodeStr, string Dt,
1736 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
1737 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
1738 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
1739 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1742 // ....then also with element size of 8 bits:
1743 multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1744 InstrItinClass itin16, InstrItinClass itin32,
1745 string OpcodeStr, string Dt,
1746 Intrinsic IntOp, bit Commutable = 0>
1747 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
1748 IntOp, Commutable> {
1749 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
1750 OpcodeStr, !strconcat(Dt, "8"),
1751 v8i16, v8i8, IntOp, Commutable>;
1755 // Neon Wide 3-register vector intrinsics,
1756 // source operand element sizes of 8, 16 and 32 bits:
1757 multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1758 string OpcodeStr, string Dt,
1759 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
1760 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
1761 OpcodeStr, !strconcat(Dt, "8"),
1762 v8i16, v8i8, OpNode, ExtOp, Commutable>;
1763 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
1764 OpcodeStr, !strconcat(Dt, "16"),
1765 v4i32, v4i16, OpNode, ExtOp, Commutable>;
1766 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
1767 OpcodeStr, !strconcat(Dt, "32"),
1768 v2i64, v2i32, OpNode, ExtOp, Commutable>;
1772 // Neon Multiply-Op vector operations,
1773 // element sizes of 8, 16 and 32 bits:
1774 multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1775 InstrItinClass itinD16, InstrItinClass itinD32,
1776 InstrItinClass itinQ16, InstrItinClass itinQ32,
1777 string OpcodeStr, string Dt, SDNode OpNode> {
1778 // 64-bit vector types.
1779 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
1780 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
1781 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
1782 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
1783 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
1784 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
1786 // 128-bit vector types.
1787 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
1788 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
1789 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
1790 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
1791 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
1792 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
1795 multiclass N3VMulOpSL_HS<bits<4> op11_8,
1796 InstrItinClass itinD16, InstrItinClass itinD32,
1797 InstrItinClass itinQ16, InstrItinClass itinQ32,
1798 string OpcodeStr, string Dt, SDNode ShOp> {
1799 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
1800 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
1801 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
1802 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
1803 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
1804 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
1806 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
1807 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
1811 // Neon 3-argument intrinsics,
1812 // element sizes of 8, 16 and 32 bits:
1813 multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1814 InstrItinClass itinD, InstrItinClass itinQ,
1815 string OpcodeStr, string Dt, Intrinsic IntOp> {
1816 // 64-bit vector types.
1817 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
1818 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
1819 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
1820 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
1821 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
1822 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
1824 // 128-bit vector types.
1825 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
1826 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
1827 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
1828 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
1829 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
1830 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
1834 // Neon Long 3-argument intrinsics.
1836 // First with only element sizes of 16 and 32 bits:
1837 multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
1838 InstrItinClass itin16, InstrItinClass itin32,
1839 string OpcodeStr, string Dt, Intrinsic IntOp> {
1840 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
1841 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
1842 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
1843 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1846 multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
1847 string OpcodeStr, string Dt, Intrinsic IntOp> {
1848 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
1849 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
1850 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
1851 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
1854 // ....then also with element size of 8 bits:
1855 multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
1856 InstrItinClass itin16, InstrItinClass itin32,
1857 string OpcodeStr, string Dt, Intrinsic IntOp>
1858 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
1859 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
1860 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
1864 // Neon 2-register vector intrinsics,
1865 // element sizes of 8, 16 and 32 bits:
1866 multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1867 bits<5> op11_7, bit op4,
1868 InstrItinClass itinD, InstrItinClass itinQ,
1869 string OpcodeStr, string Dt, Intrinsic IntOp> {
1870 // 64-bit vector types.
1871 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1872 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
1873 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1874 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
1875 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1876 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
1878 // 128-bit vector types.
1879 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1880 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
1881 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1882 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
1883 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1884 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
1888 // Neon Pairwise long 2-register intrinsics,
1889 // element sizes of 8, 16 and 32 bits:
1890 multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1891 bits<5> op11_7, bit op4,
1892 string OpcodeStr, string Dt, Intrinsic IntOp> {
1893 // 64-bit vector types.
1894 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1895 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
1896 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1897 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
1898 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1899 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
1901 // 128-bit vector types.
1902 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1903 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
1904 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1905 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
1906 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1907 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
1911 // Neon Pairwise long 2-register accumulate intrinsics,
1912 // element sizes of 8, 16 and 32 bits:
1913 multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
1914 bits<5> op11_7, bit op4,
1915 string OpcodeStr, string Dt, Intrinsic IntOp> {
1916 // 64-bit vector types.
1917 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1918 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
1919 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1920 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
1921 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1922 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
1924 // 128-bit vector types.
1925 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
1926 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
1927 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
1928 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
1929 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
1930 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
1934 // Neon 2-register vector shift by immediate,
1935 // with f of either N2RegVShLFrm or N2RegVShRFrm
1936 // element sizes of 8, 16, 32 and 64 bits:
1937 multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1938 InstrItinClass itin, string OpcodeStr, string Dt,
1939 SDNode OpNode, Format f> {
1940 // 64-bit vector types.
1941 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1942 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
1943 let Inst{21-19} = 0b001; // imm6 = 001xxx
1945 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1946 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
1947 let Inst{21-20} = 0b01; // imm6 = 01xxxx
1949 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
1950 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
1951 let Inst{21} = 0b1; // imm6 = 1xxxxx
1953 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
1954 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
1957 // 128-bit vector types.
1958 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1959 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
1960 let Inst{21-19} = 0b001; // imm6 = 001xxx
1962 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1963 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
1964 let Inst{21-20} = 0b01; // imm6 = 01xxxx
1966 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
1967 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
1968 let Inst{21} = 0b1; // imm6 = 1xxxxx
1970 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
1971 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
1975 // Neon Shift-Accumulate vector operations,
1976 // element sizes of 8, 16, 32 and 64 bits:
1977 multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
1978 string OpcodeStr, string Dt, SDNode ShOp> {
1979 // 64-bit vector types.
1980 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4,
1981 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
1982 let Inst{21-19} = 0b001; // imm6 = 001xxx
1984 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
1985 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
1986 let Inst{21-20} = 0b01; // imm6 = 01xxxx
1988 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
1989 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
1990 let Inst{21} = 0b1; // imm6 = 1xxxxx
1992 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
1993 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
1996 // 128-bit vector types.
1997 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
1998 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
1999 let Inst{21-19} = 0b001; // imm6 = 001xxx
2001 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
2002 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
2003 let Inst{21-20} = 0b01; // imm6 = 01xxxx
2005 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
2006 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
2007 let Inst{21} = 0b1; // imm6 = 1xxxxx
2009 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
2010 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
2015 // Neon Shift-Insert vector operations,
2016 // with f of either N2RegVShLFrm or N2RegVShRFrm
2017 // element sizes of 8, 16, 32 and 64 bits:
2018 multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
2019 string OpcodeStr, SDNode ShOp,
2021 // 64-bit vector types.
2022 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
2023 f, OpcodeStr, "8", v8i8, ShOp> {
2024 let Inst{21-19} = 0b001; // imm6 = 001xxx
2026 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
2027 f, OpcodeStr, "16", v4i16, ShOp> {
2028 let Inst{21-20} = 0b01; // imm6 = 01xxxx
2030 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
2031 f, OpcodeStr, "32", v2i32, ShOp> {
2032 let Inst{21} = 0b1; // imm6 = 1xxxxx
2034 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
2035 f, OpcodeStr, "64", v1i64, ShOp>;
2038 // 128-bit vector types.
2039 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
2040 f, OpcodeStr, "8", v16i8, ShOp> {
2041 let Inst{21-19} = 0b001; // imm6 = 001xxx
2043 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
2044 f, OpcodeStr, "16", v8i16, ShOp> {
2045 let Inst{21-20} = 0b01; // imm6 = 01xxxx
2047 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
2048 f, OpcodeStr, "32", v4i32, ShOp> {
2049 let Inst{21} = 0b1; // imm6 = 1xxxxx
2051 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
2052 f, OpcodeStr, "64", v2i64, ShOp>;
2056 // Neon Shift Long operations,
2057 // element sizes of 8, 16, 32 bits:
2058 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
2059 bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
2060 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
2061 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
2062 let Inst{21-19} = 0b001; // imm6 = 001xxx
2064 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
2065 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
2066 let Inst{21-20} = 0b01; // imm6 = 01xxxx
2068 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
2069 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
2070 let Inst{21} = 0b1; // imm6 = 1xxxxx
2074 // Neon Shift Narrow operations,
2075 // element sizes of 16, 32, 64 bits:
2076 multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
2077 bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
2079 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2080 OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
2081 let Inst{21-19} = 0b001; // imm6 = 001xxx
2083 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2084 OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
2085 let Inst{21-20} = 0b01; // imm6 = 01xxxx
2087 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
2088 OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
2089 let Inst{21} = 0b1; // imm6 = 1xxxxx
2093 //===----------------------------------------------------------------------===//
2094 // Instruction Definitions.
2095 //===----------------------------------------------------------------------===//
2097 // Vector Add Operations.
2099 // VADD : Vector Add (integer and floating-point)
2100 defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
2102 def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
2103 v2f32, v2f32, fadd, 1>;
2104 def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
2105 v4f32, v4f32, fadd, 1>;
2106 // VADDL : Vector Add Long (Q = D + D)
2107 defm VADDLs : N3VL_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
2108 "vaddl", "s", add, sext, 1>;
2109 defm VADDLu : N3VL_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
2110 "vaddl", "u", add, zext, 1>;
2111 // VADDW : Vector Add Wide (Q = Q + D)
2112 defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
2113 defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
2114 // VHADD : Vector Halving Add
2115 defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
2116 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2117 "vhadd", "s", int_arm_neon_vhadds, 1>;
2118 defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
2119 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2120 "vhadd", "u", int_arm_neon_vhaddu, 1>;
2121 // VRHADD : Vector Rounding Halving Add
2122 defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
2123 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2124 "vrhadd", "s", int_arm_neon_vrhadds, 1>;
2125 defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
2126 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2127 "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
2128 // VQADD : Vector Saturating Add
2129 defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
2130 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2131 "vqadd", "s", int_arm_neon_vqadds, 1>;
2132 defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
2133 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
2134 "vqadd", "u", int_arm_neon_vqaddu, 1>;
2135 // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
2136 defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
2137 int_arm_neon_vaddhn, 1>;
2138 // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
2139 defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
2140 int_arm_neon_vraddhn, 1>;
2142 // Vector Multiply Operations.
2144 // VMUL : Vector Multiply (integer, polynomial and floating-point)
2145 defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
2146 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
2147 def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
2148 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
2149 def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
2150 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
2151 def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
2152 v2f32, v2f32, fmul, 1>;
2153 def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
2154 v4f32, v4f32, fmul, 1>;
2155 defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
2156 def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
2157 def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
2160 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
2161 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
2162 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
2163 (v4i16 (EXTRACT_SUBREG QPR:$src2,
2164 (DSubReg_i16_reg imm:$lane))),
2165 (SubReg_i16_lane imm:$lane)))>;
2166 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
2167 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
2168 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
2169 (v2i32 (EXTRACT_SUBREG QPR:$src2,
2170 (DSubReg_i32_reg imm:$lane))),
2171 (SubReg_i32_lane imm:$lane)))>;
2172 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
2173 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
2174 (v4f32 (VMULslfq (v4f32 QPR:$src1),
2175 (v2f32 (EXTRACT_SUBREG QPR:$src2,
2176 (DSubReg_i32_reg imm:$lane))),
2177 (SubReg_i32_lane imm:$lane)))>;
2179 // VQDMULH : Vector Saturating Doubling Multiply Returning High Half
2180 defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
2181 IIC_VMULi16Q, IIC_VMULi32Q,
2182 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
2183 defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
2184 IIC_VMULi16Q, IIC_VMULi32Q,
2185 "vqdmulh", "s", int_arm_neon_vqdmulh>;
2186 def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
2187 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
2189 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
2190 (v4i16 (EXTRACT_SUBREG QPR:$src2,
2191 (DSubReg_i16_reg imm:$lane))),
2192 (SubReg_i16_lane imm:$lane)))>;
2193 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
2194 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
2196 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
2197 (v2i32 (EXTRACT_SUBREG QPR:$src2,
2198 (DSubReg_i32_reg imm:$lane))),
2199 (SubReg_i32_lane imm:$lane)))>;
2201 // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
2202 defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
2203 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
2204 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
2205 defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
2206 IIC_VMULi16Q, IIC_VMULi32Q,
2207 "vqrdmulh", "s", int_arm_neon_vqrdmulh>;
2208 def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
2209 (v8i16 (NEONvduplane (v8i16 QPR:$src2),
2211 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
2212 (v4i16 (EXTRACT_SUBREG QPR:$src2,
2213 (DSubReg_i16_reg imm:$lane))),
2214 (SubReg_i16_lane imm:$lane)))>;
2215 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
2216 (v4i32 (NEONvduplane (v4i32 QPR:$src2),
2218 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
2219 (v2i32 (EXTRACT_SUBREG QPR:$src2,
2220 (DSubReg_i32_reg imm:$lane))),
2221 (SubReg_i32_lane imm:$lane)))>;
2223 // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
2224 defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
2225 "vmull", "s", int_arm_neon_vmulls, 1>;
2226 defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
2227 "vmull", "u", int_arm_neon_vmullu, 1>;
2228 def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
2229 v8i16, v8i8, int_arm_neon_vmullp, 1>;
2230 defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
2231 int_arm_neon_vmulls>;
2232 defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
2233 int_arm_neon_vmullu>;
2235 // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
2236 defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
2237 "vqdmull", "s", int_arm_neon_vqdmull, 1>;
2238 defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
2239 "vqdmull", "s", int_arm_neon_vqdmull>;
2241 // Vector Multiply-Accumulate and Multiply-Subtract Operations.
2243 // VMLA : Vector Multiply Accumulate (integer and floating-point)
2244 defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2245 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
2246 def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
2248 def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
2250 defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
2251 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
2252 def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
2254 def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
2255 v4f32, v2f32, fmul, fadd>;
2257 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
2258 (mul (v8i16 QPR:$src2),
2259 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
2260 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
2261 (v4i16 (EXTRACT_SUBREG QPR:$src3,
2262 (DSubReg_i16_reg imm:$lane))),
2263 (SubReg_i16_lane imm:$lane)))>;
2265 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
2266 (mul (v4i32 QPR:$src2),
2267 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
2268 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
2269 (v2i32 (EXTRACT_SUBREG QPR:$src3,
2270 (DSubReg_i32_reg imm:$lane))),
2271 (SubReg_i32_lane imm:$lane)))>;
2273 def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
2274 (fmul (v4f32 QPR:$src2),
2275 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
2276 (v4f32 (VMLAslfq (v4f32 QPR:$src1),
2278 (v2f32 (EXTRACT_SUBREG QPR:$src3,
2279 (DSubReg_i32_reg imm:$lane))),
2280 (SubReg_i32_lane imm:$lane)))>;
2282 // VMLAL : Vector Multiply Accumulate Long (Q += D * D)
2283 defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
2284 "vmlal", "s", int_arm_neon_vmlals>;
2285 defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
2286 "vmlal", "u", int_arm_neon_vmlalu>;
2288 defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
2289 defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
2291 // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
2292 defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2293 "vqdmlal", "s", int_arm_neon_vqdmlal>;
2294 defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
2296 // VMLS : Vector Multiply Subtract (integer and floating-point)
2297 defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
2298 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
2299 def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
2301 def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
2303 defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
2304 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
2305 def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
2307 def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
2308 v4f32, v2f32, fmul, fsub>;
2310 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
2311 (mul (v8i16 QPR:$src2),
2312 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
2313 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
2314 (v4i16 (EXTRACT_SUBREG QPR:$src3,
2315 (DSubReg_i16_reg imm:$lane))),
2316 (SubReg_i16_lane imm:$lane)))>;
2318 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
2319 (mul (v4i32 QPR:$src2),
2320 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
2321 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
2322 (v2i32 (EXTRACT_SUBREG QPR:$src3,
2323 (DSubReg_i32_reg imm:$lane))),
2324 (SubReg_i32_lane imm:$lane)))>;
2326 def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
2327 (fmul (v4f32 QPR:$src2),
2328 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
2329 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
2330 (v2f32 (EXTRACT_SUBREG QPR:$src3,
2331 (DSubReg_i32_reg imm:$lane))),
2332 (SubReg_i32_lane imm:$lane)))>;
2334 // VMLSL : Vector Multiply Subtract Long (Q -= D * D)
2335 defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
2336 "vmlsl", "s", int_arm_neon_vmlsls>;
2337 defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
2338 "vmlsl", "u", int_arm_neon_vmlslu>;
2340 defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
2341 defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
2343 // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
2344 defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
2345 "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
2346 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
2348 // Vector Subtract Operations.
2350 // VSUB : Vector Subtract (integer and floating-point)
2351 defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
2352 "vsub", "i", sub, 0>;
2353 def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
2354 v2f32, v2f32, fsub, 0>;
2355 def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
2356 v4f32, v4f32, fsub, 0>;
2357 // VSUBL : Vector Subtract Long (Q = D - D)
2358 defm VSUBLs : N3VL_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
2359 "vsubl", "s", sub, sext, 0>;
2360 defm VSUBLu : N3VL_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
2361 "vsubl", "u", sub, zext, 0>;
2362 // VSUBW : Vector Subtract Wide (Q = Q - D)
2363 defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
2364 defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
2365 // VHSUB : Vector Halving Subtract
2366 defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
2367 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2368 "vhsub", "s", int_arm_neon_vhsubs, 0>;
2369 defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
2370 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2371 "vhsub", "u", int_arm_neon_vhsubu, 0>;
2372 // VQSUB : Vector Saturing Subtract
2373 defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
2374 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2375 "vqsub", "s", int_arm_neon_vqsubs, 0>;
2376 defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
2377 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2378 "vqsub", "u", int_arm_neon_vqsubu, 0>;
2379 // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
2380 defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
2381 int_arm_neon_vsubhn, 0>;
2382 // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
2383 defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
2384 int_arm_neon_vrsubhn, 0>;
2386 // Vector Comparisons.
2388 // VCEQ : Vector Compare Equal
2389 defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2390 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
2391 def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
2393 def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
2395 // For disassembly only.
2396 defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
2399 // VCGE : Vector Compare Greater Than or Equal
2400 defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2401 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
2402 defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2403 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
2404 def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
2406 def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
2408 // For disassembly only.
2409 defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
2411 // For disassembly only.
2412 defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
2415 // VCGT : Vector Compare Greater Than
2416 defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2417 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
2418 defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
2419 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
2420 def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
2422 def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
2424 // For disassembly only.
2425 defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
2427 // For disassembly only.
2428 defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
2431 // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
2432 def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
2433 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
2434 def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
2435 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
2436 // VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
2437 def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
2438 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
2439 def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
2440 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
2441 // VTST : Vector Test Bits
2442 defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
2443 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
2445 // Vector Bitwise Operations.
2447 def vnotd : PatFrag<(ops node:$in),
2448 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
2449 def vnotq : PatFrag<(ops node:$in),
2450 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
2453 // VAND : Vector Bitwise AND
2454 def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
2455 v2i32, v2i32, and, 1>;
2456 def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
2457 v4i32, v4i32, and, 1>;
2459 // VEOR : Vector Bitwise Exclusive OR
2460 def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
2461 v2i32, v2i32, xor, 1>;
2462 def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
2463 v4i32, v4i32, xor, 1>;
2465 // VORR : Vector Bitwise OR
2466 def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
2467 v2i32, v2i32, or, 1>;
2468 def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
2469 v4i32, v4i32, or, 1>;
2471 // VBIC : Vector Bitwise Bit Clear (AND NOT)
2472 def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
2473 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
2474 "vbic", "$dst, $src1, $src2", "",
2475 [(set DPR:$dst, (v2i32 (and DPR:$src1,
2476 (vnotd DPR:$src2))))]>;
2477 def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
2478 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
2479 "vbic", "$dst, $src1, $src2", "",
2480 [(set QPR:$dst, (v4i32 (and QPR:$src1,
2481 (vnotq QPR:$src2))))]>;
2483 // VORN : Vector Bitwise OR NOT
2484 def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
2485 (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
2486 "vorn", "$dst, $src1, $src2", "",
2487 [(set DPR:$dst, (v2i32 (or DPR:$src1,
2488 (vnotd DPR:$src2))))]>;
2489 def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
2490 (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
2491 "vorn", "$dst, $src1, $src2", "",
2492 [(set QPR:$dst, (v4i32 (or QPR:$src1,
2493 (vnotq QPR:$src2))))]>;
2495 // VMVN : Vector Bitwise NOT (Immediate)
2497 let isReMaterializable = 1 in {
2498 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
2499 (ins nModImm:$SIMM), IIC_VMOVImm,
2500 "vmvn", "i16", "$dst, $SIMM", "",
2501 [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
2502 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
2503 (ins nModImm:$SIMM), IIC_VMOVImm,
2504 "vmvn", "i16", "$dst, $SIMM", "",
2505 [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
2507 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
2508 (ins nModImm:$SIMM), IIC_VMOVImm,
2509 "vmvn", "i32", "$dst, $SIMM", "",
2510 [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
2511 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
2512 (ins nModImm:$SIMM), IIC_VMOVImm,
2513 "vmvn", "i32", "$dst, $SIMM", "",
2514 [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
2517 // VMVN : Vector Bitwise NOT
2518 def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
2519 (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
2520 "vmvn", "$dst, $src", "",
2521 [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
2522 def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
2523 (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
2524 "vmvn", "$dst, $src", "",
2525 [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
2526 def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
2527 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
2529 // VBSL : Vector Bitwise Select
2530 def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
2531 (ins DPR:$src1, DPR:$src2, DPR:$src3),
2532 N3RegFrm, IIC_VCNTiD,
2533 "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
2535 (v2i32 (or (and DPR:$src2, DPR:$src1),
2536 (and DPR:$src3, (vnotd DPR:$src1)))))]>;
2537 def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
2538 (ins QPR:$src1, QPR:$src2, QPR:$src3),
2539 N3RegFrm, IIC_VCNTiQ,
2540 "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
2542 (v4i32 (or (and QPR:$src2, QPR:$src1),
2543 (and QPR:$src3, (vnotq QPR:$src1)))))]>;
2545 // VBIF : Vector Bitwise Insert if False
2546 // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
2547 def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
2548 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
2549 N3RegFrm, IIC_VBINiD,
2550 "vbif", "$dst, $src2, $src3", "$src1 = $dst",
2551 [/* For disassembly only; pattern left blank */]>;
2552 def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
2553 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
2554 N3RegFrm, IIC_VBINiQ,
2555 "vbif", "$dst, $src2, $src3", "$src1 = $dst",
2556 [/* For disassembly only; pattern left blank */]>;
2558 // VBIT : Vector Bitwise Insert if True
2559 // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
2560 def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
2561 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
2562 N3RegFrm, IIC_VBINiD,
2563 "vbit", "$dst, $src2, $src3", "$src1 = $dst",
2564 [/* For disassembly only; pattern left blank */]>;
2565 def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
2566 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
2567 N3RegFrm, IIC_VBINiQ,
2568 "vbit", "$dst, $src2, $src3", "$src1 = $dst",
2569 [/* For disassembly only; pattern left blank */]>;
2571 // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
2572 // for equivalent operations with different register constraints; it just
2575 // Vector Absolute Differences.
2577 // VABD : Vector Absolute Difference
2578 defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
2579 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2580 "vabd", "s", int_arm_neon_vabds, 0>;
2581 defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
2582 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2583 "vabd", "u", int_arm_neon_vabdu, 0>;
2584 def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
2585 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
2586 def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
2587 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
2589 // VABDL : Vector Absolute Difference Long (Q = | D - D |)
2590 defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
2591 "vabdl", "s", int_arm_neon_vabdls, 0>;
2592 defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
2593 "vabdl", "u", int_arm_neon_vabdlu, 0>;
2595 // VABA : Vector Absolute Difference and Accumulate
2596 defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
2597 "vaba", "s", int_arm_neon_vabas>;
2598 defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
2599 "vaba", "u", int_arm_neon_vabau>;
2601 // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
2602 defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
2603 "vabal", "s", int_arm_neon_vabals>;
2604 defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
2605 "vabal", "u", int_arm_neon_vabalu>;
2607 // Vector Maximum and Minimum.
2609 // VMAX : Vector Maximum
2610 defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
2611 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2612 "vmax", "s", int_arm_neon_vmaxs, 1>;
2613 defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
2614 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2615 "vmax", "u", int_arm_neon_vmaxu, 1>;
2616 def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
2618 v2f32, v2f32, int_arm_neon_vmaxs, 1>;
2619 def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
2621 v4f32, v4f32, int_arm_neon_vmaxs, 1>;
2623 // VMIN : Vector Minimum
2624 defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
2625 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2626 "vmin", "s", int_arm_neon_vmins, 1>;
2627 defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
2628 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
2629 "vmin", "u", int_arm_neon_vminu, 1>;
2630 def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
2632 v2f32, v2f32, int_arm_neon_vmins, 1>;
2633 def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
2635 v4f32, v4f32, int_arm_neon_vmins, 1>;
2637 // Vector Pairwise Operations.
2639 // VPADD : Vector Pairwise Add
2640 def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2642 v8i8, v8i8, int_arm_neon_vpadd, 0>;
2643 def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2645 v4i16, v4i16, int_arm_neon_vpadd, 0>;
2646 def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
2648 v2i32, v2i32, int_arm_neon_vpadd, 0>;
2649 def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
2650 IIC_VBIND, "vpadd", "f32",
2651 v2f32, v2f32, int_arm_neon_vpadd, 0>;
2653 // VPADDL : Vector Pairwise Add Long
2654 defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
2655 int_arm_neon_vpaddls>;
2656 defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
2657 int_arm_neon_vpaddlu>;
2659 // VPADAL : Vector Pairwise Add and Accumulate Long
2660 defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
2661 int_arm_neon_vpadals>;
2662 defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
2663 int_arm_neon_vpadalu>;
2665 // VPMAX : Vector Pairwise Maximum
2666 def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2667 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
2668 def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2669 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
2670 def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2671 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
2672 def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2673 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
2674 def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2675 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
2676 def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2677 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
2678 def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
2679 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
2681 // VPMIN : Vector Pairwise Minimum
2682 def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2683 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
2684 def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2685 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
2686 def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2687 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
2688 def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2689 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
2690 def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2691 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
2692 def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
2693 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
2694 def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
2695 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
2697 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
2699 // VRECPE : Vector Reciprocal Estimate
2700 def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
2701 IIC_VUNAD, "vrecpe", "u32",
2702 v2i32, v2i32, int_arm_neon_vrecpe>;
2703 def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
2704 IIC_VUNAQ, "vrecpe", "u32",
2705 v4i32, v4i32, int_arm_neon_vrecpe>;
2706 def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
2707 IIC_VUNAD, "vrecpe", "f32",
2708 v2f32, v2f32, int_arm_neon_vrecpe>;
2709 def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
2710 IIC_VUNAQ, "vrecpe", "f32",
2711 v4f32, v4f32, int_arm_neon_vrecpe>;
2713 // VRECPS : Vector Reciprocal Step
2714 def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
2715 IIC_VRECSD, "vrecps", "f32",
2716 v2f32, v2f32, int_arm_neon_vrecps, 1>;
2717 def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
2718 IIC_VRECSQ, "vrecps", "f32",
2719 v4f32, v4f32, int_arm_neon_vrecps, 1>;
2721 // VRSQRTE : Vector Reciprocal Square Root Estimate
2722 def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
2723 IIC_VUNAD, "vrsqrte", "u32",
2724 v2i32, v2i32, int_arm_neon_vrsqrte>;
2725 def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
2726 IIC_VUNAQ, "vrsqrte", "u32",
2727 v4i32, v4i32, int_arm_neon_vrsqrte>;
2728 def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
2729 IIC_VUNAD, "vrsqrte", "f32",
2730 v2f32, v2f32, int_arm_neon_vrsqrte>;
2731 def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
2732 IIC_VUNAQ, "vrsqrte", "f32",
2733 v4f32, v4f32, int_arm_neon_vrsqrte>;
2735 // VRSQRTS : Vector Reciprocal Square Root Step
2736 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
2737 IIC_VRECSD, "vrsqrts", "f32",
2738 v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
2739 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
2740 IIC_VRECSQ, "vrsqrts", "f32",
2741 v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
2745 // VSHL : Vector Shift
2746 defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
2747 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
2748 "vshl", "s", int_arm_neon_vshifts, 0>;
2749 defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
2750 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
2751 "vshl", "u", int_arm_neon_vshiftu, 0>;
2752 // VSHL : Vector Shift Left (Immediate)
2753 defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
2755 // VSHR : Vector Shift Right (Immediate)
2756 defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
2758 defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
2761 // VSHLL : Vector Shift Left Long
2762 defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
2763 defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
2765 // VSHLL : Vector Shift Left Long (with maximum shift count)
2766 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
2767 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
2768 ValueType OpTy, SDNode OpNode>
2769 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
2770 ResTy, OpTy, OpNode> {
2771 let Inst{21-16} = op21_16;
2773 def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
2774 v8i16, v8i8, NEONvshlli>;
2775 def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
2776 v4i32, v4i16, NEONvshlli>;
2777 def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
2778 v2i64, v2i32, NEONvshlli>;
2780 // VSHRN : Vector Shift Right and Narrow
2781 defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
2784 // VRSHL : Vector Rounding Shift
2785 defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
2786 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2787 "vrshl", "s", int_arm_neon_vrshifts, 0>;
2788 defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
2789 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2790 "vrshl", "u", int_arm_neon_vrshiftu, 0>;
2791 // VRSHR : Vector Rounding Shift Right
2792 defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
2794 defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
2797 // VRSHRN : Vector Rounding Shift Right and Narrow
2798 defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
2801 // VQSHL : Vector Saturating Shift
2802 defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
2803 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2804 "vqshl", "s", int_arm_neon_vqshifts, 0>;
2805 defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
2806 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2807 "vqshl", "u", int_arm_neon_vqshiftu, 0>;
2808 // VQSHL : Vector Saturating Shift Left (Immediate)
2809 defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
2811 defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
2813 // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
2814 defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
2817 // VQSHRN : Vector Saturating Shift Right and Narrow
2818 defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
2820 defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
2823 // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
2824 defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
2827 // VQRSHL : Vector Saturating Rounding Shift
2828 defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
2829 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2830 "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
2831 defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
2832 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
2833 "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
2835 // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
2836 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
2838 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
2841 // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
2842 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
2845 // VSRA : Vector Shift Right and Accumulate
2846 defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
2847 defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
2848 // VRSRA : Vector Rounding Shift Right and Accumulate
2849 defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
2850 defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
2852 // VSLI : Vector Shift Left and Insert
2853 defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
2854 // VSRI : Vector Shift Right and Insert
2855 defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
2857 // Vector Absolute and Saturating Absolute.
2859 // VABS : Vector Absolute Value
2860 defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
2861 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
2863 def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
2864 IIC_VUNAD, "vabs", "f32",
2865 v2f32, v2f32, int_arm_neon_vabs>;
2866 def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
2867 IIC_VUNAQ, "vabs", "f32",
2868 v4f32, v4f32, int_arm_neon_vabs>;
2870 // VQABS : Vector Saturating Absolute Value
2871 defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
2872 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
2873 int_arm_neon_vqabs>;
2877 def vnegd : PatFrag<(ops node:$in),
2878 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
2879 def vnegq : PatFrag<(ops node:$in),
2880 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
2882 class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
2883 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
2884 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
2885 [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
2886 class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
2887 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
2888 IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
2889 [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
2891 // VNEG : Vector Negate (integer)
2892 def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
2893 def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
2894 def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
2895 def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>;
2896 def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
2897 def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
2899 // VNEG : Vector Negate (floating-point)
2900 def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
2901 (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
2902 "vneg", "f32", "$dst, $src", "",
2903 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
2904 def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
2905 (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ,
2906 "vneg", "f32", "$dst, $src", "",
2907 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
2909 def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
2910 def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
2911 def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
2912 def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
2913 def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
2914 def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
2916 // VQNEG : Vector Saturating Negate
2917 defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
2918 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
2919 int_arm_neon_vqneg>;
2921 // Vector Bit Counting Operations.
2923 // VCLS : Vector Count Leading Sign Bits
2924 defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
2925 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
2927 // VCLZ : Vector Count Leading Zeros
2928 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
2929 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
2931 // VCNT : Vector Count One Bits
2932 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
2933 IIC_VCNTiD, "vcnt", "8",
2934 v8i8, v8i8, int_arm_neon_vcnt>;
2935 def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
2936 IIC_VCNTiQ, "vcnt", "8",
2937 v16i8, v16i8, int_arm_neon_vcnt>;
2939 // Vector Swap -- for disassembly only.
2940 def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
2941 (outs DPR:$dst), (ins DPR:$src), NoItinerary,
2942 "vswp", "$dst, $src", "", []>;
2943 def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
2944 (outs QPR:$dst), (ins QPR:$src), NoItinerary,
2945 "vswp", "$dst, $src", "", []>;
2947 // Vector Move Operations.
2949 // VMOV : Vector Move (Register)
2951 let neverHasSideEffects = 1 in {
2952 def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
2953 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
2954 def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
2955 N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
2957 // Pseudo vector move instructions for QQ and QQQQ registers. This should
2958 // be expanded after register allocation is completed.
2959 def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
2960 NoItinerary, "${:comment} vmov\t$dst, $src", []>;
2962 def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
2963 NoItinerary, "${:comment} vmov\t$dst, $src", []>;
2964 } // neverHasSideEffects
2966 // VMOV : Vector Move (Immediate)
2968 let isReMaterializable = 1 in {
2969 def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
2970 (ins nModImm:$SIMM), IIC_VMOVImm,
2971 "vmov", "i8", "$dst, $SIMM", "",
2972 [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
2973 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
2974 (ins nModImm:$SIMM), IIC_VMOVImm,
2975 "vmov", "i8", "$dst, $SIMM", "",
2976 [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
2978 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
2979 (ins nModImm:$SIMM), IIC_VMOVImm,
2980 "vmov", "i16", "$dst, $SIMM", "",
2981 [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
2982 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
2983 (ins nModImm:$SIMM), IIC_VMOVImm,
2984 "vmov", "i16", "$dst, $SIMM", "",
2985 [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
2987 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
2988 (ins nModImm:$SIMM), IIC_VMOVImm,
2989 "vmov", "i32", "$dst, $SIMM", "",
2990 [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
2991 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
2992 (ins nModImm:$SIMM), IIC_VMOVImm,
2993 "vmov", "i32", "$dst, $SIMM", "",
2994 [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
2996 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
2997 (ins nModImm:$SIMM), IIC_VMOVImm,
2998 "vmov", "i64", "$dst, $SIMM", "",
2999 [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
3000 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
3001 (ins nModImm:$SIMM), IIC_VMOVImm,
3002 "vmov", "i64", "$dst, $SIMM", "",
3003 [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
3004 } // isReMaterializable
3006 // VMOV : Vector Get Lane (move scalar to ARM core register)
3008 def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
3009 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3010 IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
3011 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
3013 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
3014 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3015 IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
3016 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
3018 def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
3019 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3020 IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
3021 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
3023 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
3024 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3025 IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
3026 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
3028 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
3029 (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3030 IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
3031 [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
3033 // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
3034 def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
3035 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
3036 (DSubReg_i8_reg imm:$lane))),
3037 (SubReg_i8_lane imm:$lane))>;
3038 def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
3039 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
3040 (DSubReg_i16_reg imm:$lane))),
3041 (SubReg_i16_lane imm:$lane))>;
3042 def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
3043 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
3044 (DSubReg_i8_reg imm:$lane))),
3045 (SubReg_i8_lane imm:$lane))>;
3046 def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
3047 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
3048 (DSubReg_i16_reg imm:$lane))),
3049 (SubReg_i16_lane imm:$lane))>;
3050 def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
3051 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
3052 (DSubReg_i32_reg imm:$lane))),
3053 (SubReg_i32_lane imm:$lane))>;
3054 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
3055 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
3056 (SSubReg_f32_reg imm:$src2))>;
3057 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
3058 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
3059 (SSubReg_f32_reg imm:$src2))>;
3060 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
3061 // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
3062 def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
3063 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
3066 // VMOV : Vector Set Lane (move ARM core register to scalar)
3068 let Constraints = "$src1 = $dst" in {
3069 def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
3070 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
3071 IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
3072 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
3073 GPR:$src2, imm:$lane))]>;
3074 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
3075 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
3076 IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
3077 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
3078 GPR:$src2, imm:$lane))]>;
3079 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
3080 (ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
3081 IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
3082 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
3083 GPR:$src2, imm:$lane))]>;
3085 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
3086 (v16i8 (INSERT_SUBREG QPR:$src1,
3087 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
3088 (DSubReg_i8_reg imm:$lane))),
3089 GPR:$src2, (SubReg_i8_lane imm:$lane))),
3090 (DSubReg_i8_reg imm:$lane)))>;
3091 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
3092 (v8i16 (INSERT_SUBREG QPR:$src1,
3093 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
3094 (DSubReg_i16_reg imm:$lane))),
3095 GPR:$src2, (SubReg_i16_lane imm:$lane))),
3096 (DSubReg_i16_reg imm:$lane)))>;
3097 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
3098 (v4i32 (INSERT_SUBREG QPR:$src1,
3099 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
3100 (DSubReg_i32_reg imm:$lane))),
3101 GPR:$src2, (SubReg_i32_lane imm:$lane))),
3102 (DSubReg_i32_reg imm:$lane)))>;
3104 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
3105 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
3106 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
3107 def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
3108 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
3109 SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
3111 //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
3112 // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
3113 def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
3114 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
3116 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
3117 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
3118 def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
3119 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
3120 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
3121 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
3123 def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
3124 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3125 def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
3126 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3127 def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
3128 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
3130 def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
3131 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3132 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3134 def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
3135 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3136 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3138 def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
3139 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3140 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
3143 // VDUP : Vector Duplicate (from ARM core register to all elements)
3145 class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
3146 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
3147 IIC_VMOVIS, "vdup", Dt, "$dst, $src",
3148 [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
3149 class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
3150 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
3151 IIC_VMOVIS, "vdup", Dt, "$dst, $src",
3152 [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>;
3154 def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>;
3155 def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>;
3156 def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>;
3157 def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
3158 def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
3159 def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
3161 def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
3162 IIC_VMOVIS, "vdup", "32", "$dst, $src",
3163 [(set DPR:$dst, (v2f32 (NEONvdup
3164 (f32 (bitconvert GPR:$src)))))]>;
3165 def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
3166 IIC_VMOVIS, "vdup", "32", "$dst, $src",
3167 [(set QPR:$dst, (v4f32 (NEONvdup
3168 (f32 (bitconvert GPR:$src)))))]>;
3170 // VDUP : Vector Duplicate Lane (from scalar to all elements)
3172 class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
3174 : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3175 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
3176 [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
3178 class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
3179 ValueType ResTy, ValueType OpTy>
3180 : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
3181 IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
3182 [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
3185 // Inst{19-16} is partially specified depending on the element size.
3187 def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
3188 def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
3189 def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
3190 def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
3191 def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
3192 def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
3193 def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
3194 def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
3196 def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
3197 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
3198 (DSubReg_i8_reg imm:$lane))),
3199 (SubReg_i8_lane imm:$lane)))>;
3200 def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
3201 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
3202 (DSubReg_i16_reg imm:$lane))),
3203 (SubReg_i16_lane imm:$lane)))>;
3204 def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
3205 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
3206 (DSubReg_i32_reg imm:$lane))),
3207 (SubReg_i32_lane imm:$lane)))>;
3208 def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
3209 (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
3210 (DSubReg_i32_reg imm:$lane))),
3211 (SubReg_i32_lane imm:$lane)))>;
3213 def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
3214 (outs DPR:$dst), (ins SPR:$src),
3215 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
3216 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
3218 def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
3219 (outs QPR:$dst), (ins SPR:$src),
3220 IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
3221 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
3223 // VMOVN : Vector Narrowing Move
3224 defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
3225 "vmovn", "i", int_arm_neon_vmovn>;
3226 // VQMOVN : Vector Saturating Narrowing Move
3227 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
3228 "vqmovn", "s", int_arm_neon_vqmovns>;
3229 defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
3230 "vqmovn", "u", int_arm_neon_vqmovnu>;
3231 defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
3232 "vqmovun", "s", int_arm_neon_vqmovnsu>;
3233 // VMOVL : Vector Lengthening Move
3234 defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
3235 defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
3237 // Vector Conversions.
3239 // VCVT : Vector Convert Between Floating-Point and Integers
3240 def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3241 v2i32, v2f32, fp_to_sint>;
3242 def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3243 v2i32, v2f32, fp_to_uint>;
3244 def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3245 v2f32, v2i32, sint_to_fp>;
3246 def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3247 v2f32, v2i32, uint_to_fp>;
3249 def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3250 v4i32, v4f32, fp_to_sint>;
3251 def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3252 v4i32, v4f32, fp_to_uint>;
3253 def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3254 v4f32, v4i32, sint_to_fp>;
3255 def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3256 v4f32, v4i32, uint_to_fp>;
3258 // VCVT : Vector Convert Between Floating-Point and Fixed-Point.
3259 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
3260 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
3261 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
3262 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
3263 def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
3264 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
3265 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
3266 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
3268 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
3269 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
3270 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
3271 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
3272 def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
3273 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
3274 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
3275 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
3279 // VREV64 : Vector Reverse elements within 64-bit doublewords
3281 class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3282 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst),
3283 (ins DPR:$src), IIC_VMOVD,
3284 OpcodeStr, Dt, "$dst, $src", "",
3285 [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
3286 class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3287 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
3288 (ins QPR:$src), IIC_VMOVD,
3289 OpcodeStr, Dt, "$dst, $src", "",
3290 [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
3292 def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
3293 def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
3294 def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
3295 def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>;
3297 def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
3298 def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
3299 def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
3300 def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>;
3302 // VREV32 : Vector Reverse elements within 32-bit words
3304 class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3305 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst),
3306 (ins DPR:$src), IIC_VMOVD,
3307 OpcodeStr, Dt, "$dst, $src", "",
3308 [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
3309 class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3310 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
3311 (ins QPR:$src), IIC_VMOVD,
3312 OpcodeStr, Dt, "$dst, $src", "",
3313 [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
3315 def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>;
3316 def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
3318 def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>;
3319 def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
3321 // VREV16 : Vector Reverse elements within 16-bit halfwords
3323 class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3324 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst),
3325 (ins DPR:$src), IIC_VMOVD,
3326 OpcodeStr, Dt, "$dst, $src", "",
3327 [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
3328 class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
3329 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
3330 (ins QPR:$src), IIC_VMOVD,
3331 OpcodeStr, Dt, "$dst, $src", "",
3332 [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
3334 def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>;
3335 def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
3337 // Other Vector Shuffles.
3339 // VEXT : Vector Extract
3341 class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
3342 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
3343 (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
3344 IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
3345 [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
3346 (Ty DPR:$rhs), imm:$index)))]>;
3348 class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
3349 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
3350 (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
3351 IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
3352 [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
3353 (Ty QPR:$rhs), imm:$index)))]>;
3355 def VEXTd8 : VEXTd<"vext", "8", v8i8>;
3356 def VEXTd16 : VEXTd<"vext", "16", v4i16>;
3357 def VEXTd32 : VEXTd<"vext", "32", v2i32>;
3358 def VEXTdf : VEXTd<"vext", "32", v2f32>;
3360 def VEXTq8 : VEXTq<"vext", "8", v16i8>;
3361 def VEXTq16 : VEXTq<"vext", "16", v8i16>;
3362 def VEXTq32 : VEXTq<"vext", "32", v4i32>;
3363 def VEXTqf : VEXTq<"vext", "32", v4f32>;
3365 // VTRN : Vector Transpose
3367 def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
3368 def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
3369 def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
3371 def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
3372 def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
3373 def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
3375 // VUZP : Vector Unzip (Deinterleave)
3377 def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
3378 def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
3379 def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
3381 def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
3382 def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
3383 def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
3385 // VZIP : Vector Zip (Interleave)
3387 def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
3388 def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
3389 def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
3391 def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
3392 def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
3393 def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
3395 // Vector Table Lookup and Table Extension.
3397 // VTBL : Vector Table Lookup
3399 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
3400 (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
3401 "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
3402 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
3403 let hasExtraSrcRegAllocReq = 1 in {
3405 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
3406 (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
3407 "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
3409 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
3410 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
3411 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
3413 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
3414 (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
3416 "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
3417 } // hasExtraSrcRegAllocReq = 1
3419 // VTBX : Vector Table Extension
3421 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
3422 (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
3423 "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
3424 [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
3425 DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
3426 let hasExtraSrcRegAllocReq = 1 in {
3428 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
3429 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
3430 "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
3432 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
3433 (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
3434 NVTBLFrm, IIC_VTBX3,
3435 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
3436 "$orig = $dst", []>;
3438 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
3439 DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
3440 "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
3441 "$orig = $dst", []>;
3442 } // hasExtraSrcRegAllocReq = 1
3444 //===----------------------------------------------------------------------===//
3445 // NEON instructions for single-precision FP math
3446 //===----------------------------------------------------------------------===//
3448 class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
3449 : NEONFPPat<(ResTy (OpNode SPR:$a)),
3450 (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
3454 class N3VSPat<SDNode OpNode, NeonI Inst>
3455 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
3456 (EXTRACT_SUBREG (v2f32
3457 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3459 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3463 class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
3464 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
3465 (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3467 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3469 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
3473 // These need separate instructions because they must use DPR_VFP2 register
3474 // class which have SPR sub-registers.
3476 // Vector Add Operations used for single-precision FP
3477 let neverHasSideEffects = 1 in
3478 def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
3479 def : N3VSPat<fadd, VADDfd_sfp>;
3481 // Vector Sub Operations used for single-precision FP
3482 let neverHasSideEffects = 1 in
3483 def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
3484 def : N3VSPat<fsub, VSUBfd_sfp>;
3486 // Vector Multiply Operations used for single-precision FP
3487 let neverHasSideEffects = 1 in
3488 def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
3489 def : N3VSPat<fmul, VMULfd_sfp>;
3491 // Vector Multiply-Accumulate/Subtract used for single-precision FP
3492 // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
3493 // we want to avoid them for now. e.g., alternating vmla/vadd instructions.
3495 //let neverHasSideEffects = 1 in
3496 //def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
3497 // v2f32, fmul, fadd>;
3498 //def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
3500 //let neverHasSideEffects = 1 in
3501 //def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
3502 // v2f32, fmul, fsub>;
3503 //def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
3505 // Vector Absolute used for single-precision FP
3506 let neverHasSideEffects = 1 in
3507 def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
3508 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
3509 "vabs", "f32", "$dst, $src", "", []>;
3510 def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
3512 // Vector Negate used for single-precision FP
3513 let neverHasSideEffects = 1 in
3514 def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
3515 (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
3516 "vneg", "f32", "$dst, $src", "", []>;
3517 def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
3519 // Vector Maximum used for single-precision FP
3520 let neverHasSideEffects = 1 in
3521 def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
3522 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
3523 "vmax", "f32", "$dst, $src1, $src2", "", []>;
3524 def : N3VSPat<NEONfmax, VMAXfd_sfp>;
3526 // Vector Minimum used for single-precision FP
3527 let neverHasSideEffects = 1 in
3528 def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
3529 (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
3530 "vmin", "f32", "$dst, $src1, $src2", "", []>;
3531 def : N3VSPat<NEONfmin, VMINfd_sfp>;
3533 // Vector Convert between single-precision FP and integer
3534 let neverHasSideEffects = 1 in
3535 def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
3536 v2i32, v2f32, fp_to_sint>;
3537 def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
3539 let neverHasSideEffects = 1 in
3540 def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
3541 v2i32, v2f32, fp_to_uint>;
3542 def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
3544 let neverHasSideEffects = 1 in
3545 def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
3546 v2f32, v2i32, sint_to_fp>;
3547 def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
3549 let neverHasSideEffects = 1 in
3550 def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
3551 v2f32, v2i32, uint_to_fp>;
3552 def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
3554 //===----------------------------------------------------------------------===//
3555 // Non-Instruction Patterns
3556 //===----------------------------------------------------------------------===//
3559 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
3560 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
3561 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
3562 def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
3563 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
3564 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
3565 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
3566 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
3567 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
3568 def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
3569 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
3570 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
3571 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
3572 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
3573 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
3574 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
3575 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
3576 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
3577 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
3578 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
3579 def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
3580 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
3581 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
3582 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
3583 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
3584 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
3585 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
3586 def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
3587 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
3588 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
3590 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
3591 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
3592 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
3593 def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
3594 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
3595 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
3596 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
3597 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
3598 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
3599 def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
3600 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
3601 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
3602 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
3603 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
3604 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
3605 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
3606 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
3607 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
3608 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
3609 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
3610 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
3611 def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
3612 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
3613 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
3614 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
3615 def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
3616 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
3617 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
3618 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
3619 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;