1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
46 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
49 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
51 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
52 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
54 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
58 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
59 string asmop, SDPatternOperator opnode8B,
60 SDPatternOperator opnode16B,
62 let isCommutable = Commutable in {
63 def _8B : NeonI_3VSame<0b0, u, size, opcode,
64 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66 [(set (v8i8 VPR64:$Rd),
67 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
70 def _16B : NeonI_3VSame<0b1, u, size, opcode,
71 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73 [(set (v16i8 VPR128:$Rd),
74 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81 string asmop, SDPatternOperator opnode,
83 let isCommutable = Commutable in {
84 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
85 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
86 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
87 [(set (v4i16 VPR64:$Rd),
88 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
91 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
92 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
93 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
94 [(set (v8i16 VPR128:$Rd),
95 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
98 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
99 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
100 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
101 [(set (v2i32 VPR64:$Rd),
102 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
105 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
106 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
107 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
108 [(set (v4i32 VPR128:$Rd),
109 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
113 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
114 string asmop, SDPatternOperator opnode,
116 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
117 let isCommutable = Commutable in {
118 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
119 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
120 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
121 [(set (v8i8 VPR64:$Rd),
122 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
125 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
126 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
127 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
128 [(set (v16i8 VPR128:$Rd),
129 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
134 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
135 string asmop, SDPatternOperator opnode,
137 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
138 let isCommutable = Commutable in {
139 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
140 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
141 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
142 [(set (v2i64 VPR128:$Rd),
143 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
148 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
149 // but Result types can be integer or floating point types.
150 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
151 string asmop, SDPatternOperator opnode2S,
152 SDPatternOperator opnode4S,
153 SDPatternOperator opnode2D,
154 ValueType ResTy2S, ValueType ResTy4S,
155 ValueType ResTy2D, bit Commutable = 0> {
156 let isCommutable = Commutable in {
157 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
158 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
159 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
160 [(set (ResTy2S VPR64:$Rd),
161 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
164 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
165 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
166 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
167 [(set (ResTy4S VPR128:$Rd),
168 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
171 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
172 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
173 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
174 [(set (ResTy2D VPR128:$Rd),
175 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
180 //===----------------------------------------------------------------------===//
181 // Instruction Definitions
182 //===----------------------------------------------------------------------===//
184 // Vector Arithmetic Instructions
186 // Vector Add (Integer and Floating-Point)
188 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
189 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
190 v2f32, v4f32, v2f64, 1>;
192 // Vector Sub (Integer and Floating-Point)
194 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
195 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
196 v2f32, v4f32, v2f64, 0>;
198 // Vector Multiply (Integer and Floating-Point)
200 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
201 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
202 v2f32, v4f32, v2f64, 1>;
204 // Vector Multiply (Polynomial)
206 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
207 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
209 // Vector Multiply-accumulate and Multiply-subtract (Integer)
211 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
212 // two operands constraints.
213 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
214 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
215 bits<5> opcode, SDPatternOperator opnode>
216 : NeonI_3VSame<q, u, size, opcode,
217 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
218 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
219 [(set (OpTy VPRC:$Rd),
220 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
222 let Constraints = "$src = $Rd";
225 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
226 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
228 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
229 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
232 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
233 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
234 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
235 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
236 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
237 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
238 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
239 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
240 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
241 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
242 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
243 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
245 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
246 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
247 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
248 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
249 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
250 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
251 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
252 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
253 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
254 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
255 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
256 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
258 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
260 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
261 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
263 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
264 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
266 let Predicates = [HasNEON, UseFusedMAC] in {
267 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
268 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
269 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
270 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
271 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
272 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
274 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
275 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
276 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
277 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
278 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
279 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
282 // We're also allowed to match the fma instruction regardless of compile
284 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
285 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
286 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
287 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
288 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
289 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
291 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
292 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
293 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
294 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
296 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
298 // Vector Divide (Floating-Point)
300 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
301 v2f32, v4f32, v2f64, 0>;
303 // Vector Bitwise Operations
305 // Vector Bitwise AND
307 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
309 // Vector Bitwise Exclusive OR
311 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
315 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
317 // ORR disassembled as MOV if Vn==Vm
319 // Vector Move - register
320 // Alias for ORR if Vn=Vm.
321 // FIXME: This is actually the preferred syntax but TableGen can't deal with
322 // custom printing of aliases.
323 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
324 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
325 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
326 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
328 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
329 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
330 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
332 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
333 OpCmodeConstVal->getZExtValue(), EltBits);
334 return (EltBits == 8 && EltVal == 0xff);
337 def Neon_immAllZeros: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
338 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
339 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
341 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
342 OpCmodeConstVal->getZExtValue(), EltBits);
343 return (EltBits == 8 && EltVal == 0x0);
347 def Neon_not8B : PatFrag<(ops node:$in),
348 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
349 def Neon_not16B : PatFrag<(ops node:$in),
350 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
352 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
353 (or node:$Rn, (Neon_not8B node:$Rm))>;
355 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
356 (or node:$Rn, (Neon_not16B node:$Rm))>;
358 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
359 (and node:$Rn, (Neon_not8B node:$Rm))>;
361 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
362 (and node:$Rn, (Neon_not16B node:$Rm))>;
365 // Vector Bitwise OR NOT - register
367 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
368 Neon_orn8B, Neon_orn16B, 0>;
370 // Vector Bitwise Bit Clear (AND NOT) - register
372 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
373 Neon_bic8B, Neon_bic16B, 0>;
375 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
376 SDPatternOperator opnode16B,
378 Instruction INST16B> {
379 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
380 (INST8B VPR64:$Rn, VPR64:$Rm)>;
381 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
382 (INST8B VPR64:$Rn, VPR64:$Rm)>;
383 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
384 (INST8B VPR64:$Rn, VPR64:$Rm)>;
385 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
386 (INST16B VPR128:$Rn, VPR128:$Rm)>;
387 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
388 (INST16B VPR128:$Rn, VPR128:$Rm)>;
389 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
390 (INST16B VPR128:$Rn, VPR128:$Rm)>;
393 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
394 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
395 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
396 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
397 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
398 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
400 // Vector Bitwise Select
401 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
402 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
404 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
405 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
407 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
409 Instruction INST16B> {
410 // Disassociate type from instruction definition
411 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
412 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
413 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
414 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
415 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
416 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
417 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
418 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
419 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
420 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
421 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
422 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
424 // Allow to match BSL instruction pattern with non-constant operand
425 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
426 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
427 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
428 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
429 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
430 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
431 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
432 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
433 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
434 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
435 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
436 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
437 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
438 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
439 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
440 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
441 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
442 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
443 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
444 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
445 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
446 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
447 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
448 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
450 // Allow to match llvm.arm.* intrinsics.
451 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
452 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
453 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
454 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
455 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
456 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
457 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
458 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
459 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
460 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
461 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
462 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
463 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
464 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
465 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
467 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
468 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
469 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
470 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
473 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
474 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
475 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
476 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
477 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
478 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
479 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
480 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
481 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
482 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
483 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
486 // Additional patterns for bitwise instruction BSL
487 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
489 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
490 (Neon_bsl node:$src, node:$Rn, node:$Rm),
491 [{ (void)N; return false; }]>;
493 // Vector Bitwise Insert if True
495 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
496 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
497 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
498 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
500 // Vector Bitwise Insert if False
502 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
503 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
504 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
505 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
507 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
509 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
510 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
511 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
512 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
514 // Vector Absolute Difference and Accumulate (Unsigned)
515 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
516 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
517 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
518 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
519 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
520 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
521 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
522 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
523 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
524 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
525 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
526 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
528 // Vector Absolute Difference and Accumulate (Signed)
529 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
530 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
531 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
532 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
533 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
534 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
535 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
536 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
537 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
538 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
539 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
540 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
543 // Vector Absolute Difference (Signed, Unsigned)
544 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
545 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
547 // Vector Absolute Difference (Floating Point)
548 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
549 int_arm_neon_vabds, int_arm_neon_vabds,
550 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
552 // Vector Reciprocal Step (Floating Point)
553 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
554 int_arm_neon_vrecps, int_arm_neon_vrecps,
556 v2f32, v4f32, v2f64, 0>;
558 // Vector Reciprocal Square Root Step (Floating Point)
559 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
560 int_arm_neon_vrsqrts,
561 int_arm_neon_vrsqrts,
562 int_arm_neon_vrsqrts,
563 v2f32, v4f32, v2f64, 0>;
565 // Vector Comparisons
567 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
568 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
569 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
570 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
571 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
572 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
573 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
574 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
575 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
576 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
578 // NeonI_compare_aliases class: swaps register operands to implement
579 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
580 class NeonI_compare_aliases<string asmop, string asmlane,
581 Instruction inst, RegisterOperand VPRC>
582 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
584 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
586 // Vector Comparisons (Integer)
588 // Vector Compare Mask Equal (Integer)
589 let isCommutable =1 in {
590 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
593 // Vector Compare Mask Higher or Same (Unsigned Integer)
594 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
596 // Vector Compare Mask Greater Than or Equal (Integer)
597 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
599 // Vector Compare Mask Higher (Unsigned Integer)
600 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
602 // Vector Compare Mask Greater Than (Integer)
603 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
605 // Vector Compare Mask Bitwise Test (Integer)
606 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
608 // Vector Compare Mask Less or Same (Unsigned Integer)
609 // CMLS is alias for CMHS with operands reversed.
610 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
611 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
612 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
613 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
614 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
615 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
616 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
618 // Vector Compare Mask Less Than or Equal (Integer)
619 // CMLE is alias for CMGE with operands reversed.
620 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
621 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
622 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
623 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
624 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
625 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
626 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
628 // Vector Compare Mask Lower (Unsigned Integer)
629 // CMLO is alias for CMHI with operands reversed.
630 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
631 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
632 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
633 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
634 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
635 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
636 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
638 // Vector Compare Mask Less Than (Integer)
639 // CMLT is alias for CMGT with operands reversed.
640 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
641 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
642 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
643 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
644 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
645 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
646 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
649 def neon_uimm0_asmoperand : AsmOperandClass
652 let PredicateMethod = "isUImm<0>";
653 let RenderMethod = "addImmOperands";
656 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
657 let ParserMatchClass = neon_uimm0_asmoperand;
658 let PrintMethod = "printNeonUImm0Operand";
662 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
664 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
665 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
666 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
667 [(set (v8i8 VPR64:$Rd),
668 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
671 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
672 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
673 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
674 [(set (v16i8 VPR128:$Rd),
675 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
678 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
679 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
680 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
681 [(set (v4i16 VPR64:$Rd),
682 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
685 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
686 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
687 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
688 [(set (v8i16 VPR128:$Rd),
689 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
692 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
693 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
694 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
695 [(set (v2i32 VPR64:$Rd),
696 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
699 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
700 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
701 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
702 [(set (v4i32 VPR128:$Rd),
703 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
706 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
707 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
708 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
709 [(set (v2i64 VPR128:$Rd),
710 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
714 // Vector Compare Mask Equal to Zero (Integer)
715 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
717 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
718 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
720 // Vector Compare Mask Greater Than Zero (Signed Integer)
721 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
723 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
724 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
726 // Vector Compare Mask Less Than Zero (Signed Integer)
727 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
729 // Vector Comparisons (Floating Point)
731 // Vector Compare Mask Equal (Floating Point)
732 let isCommutable =1 in {
733 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
734 Neon_cmeq, Neon_cmeq,
735 v2i32, v4i32, v2i64, 0>;
738 // Vector Compare Mask Greater Than Or Equal (Floating Point)
739 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
740 Neon_cmge, Neon_cmge,
741 v2i32, v4i32, v2i64, 0>;
743 // Vector Compare Mask Greater Than (Floating Point)
744 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
745 Neon_cmgt, Neon_cmgt,
746 v2i32, v4i32, v2i64, 0>;
748 // Vector Compare Mask Less Than Or Equal (Floating Point)
749 // FCMLE is alias for FCMGE with operands reversed.
750 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
751 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
752 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
754 // Vector Compare Mask Less Than (Floating Point)
755 // FCMLT is alias for FCMGT with operands reversed.
756 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
757 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
758 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
761 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
762 string asmop, CondCode CC>
764 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
765 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
766 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
767 [(set (v2i32 VPR64:$Rd),
768 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
771 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
772 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
773 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
774 [(set (v4i32 VPR128:$Rd),
775 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
778 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
779 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
780 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
781 [(set (v2i64 VPR128:$Rd),
782 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
786 // Vector Compare Mask Equal to Zero (Floating Point)
787 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
789 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
790 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
792 // Vector Compare Mask Greater Than Zero (Floating Point)
793 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
795 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
796 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
798 // Vector Compare Mask Less Than Zero (Floating Point)
799 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
801 // Vector Absolute Comparisons (Floating Point)
803 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
804 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
805 int_arm_neon_vacged, int_arm_neon_vacgeq,
806 int_aarch64_neon_vacgeq,
807 v2i32, v4i32, v2i64, 0>;
809 // Vector Absolute Compare Mask Greater Than (Floating Point)
810 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
811 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
812 int_aarch64_neon_vacgtq,
813 v2i32, v4i32, v2i64, 0>;
815 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
816 // FACLE is alias for FACGE with operands reversed.
817 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
818 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
819 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
821 // Vector Absolute Compare Mask Less Than (Floating Point)
822 // FACLT is alias for FACGT with operands reversed.
823 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
824 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
825 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
827 // Vector halving add (Integer Signed, Unsigned)
828 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
829 int_arm_neon_vhadds, 1>;
830 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
831 int_arm_neon_vhaddu, 1>;
833 // Vector halving sub (Integer Signed, Unsigned)
834 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
835 int_arm_neon_vhsubs, 0>;
836 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
837 int_arm_neon_vhsubu, 0>;
839 // Vector rouding halving add (Integer Signed, Unsigned)
840 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
841 int_arm_neon_vrhadds, 1>;
842 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
843 int_arm_neon_vrhaddu, 1>;
845 // Vector Saturating add (Integer Signed, Unsigned)
846 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
847 int_arm_neon_vqadds, 1>;
848 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
849 int_arm_neon_vqaddu, 1>;
851 // Vector Saturating sub (Integer Signed, Unsigned)
852 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
853 int_arm_neon_vqsubs, 1>;
854 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
855 int_arm_neon_vqsubu, 1>;
857 // Vector Shift Left (Signed and Unsigned Integer)
858 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
859 int_arm_neon_vshifts, 1>;
860 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
861 int_arm_neon_vshiftu, 1>;
863 // Vector Saturating Shift Left (Signed and Unsigned Integer)
864 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
865 int_arm_neon_vqshifts, 1>;
866 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
867 int_arm_neon_vqshiftu, 1>;
869 // Vector Rouding Shift Left (Signed and Unsigned Integer)
870 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
871 int_arm_neon_vrshifts, 1>;
872 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
873 int_arm_neon_vrshiftu, 1>;
875 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
876 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
877 int_arm_neon_vqrshifts, 1>;
878 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
879 int_arm_neon_vqrshiftu, 1>;
881 // Vector Maximum (Signed and Unsigned Integer)
882 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
883 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
885 // Vector Minimum (Signed and Unsigned Integer)
886 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
887 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
889 // Vector Maximum (Floating Point)
890 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
891 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
892 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
894 // Vector Minimum (Floating Point)
895 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
896 int_arm_neon_vmins, int_arm_neon_vmins,
897 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
899 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
900 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
901 int_aarch64_neon_vmaxnm,
902 int_aarch64_neon_vmaxnm,
903 int_aarch64_neon_vmaxnm,
904 v2f32, v4f32, v2f64, 1>;
906 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
907 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
908 int_aarch64_neon_vminnm,
909 int_aarch64_neon_vminnm,
910 int_aarch64_neon_vminnm,
911 v2f32, v4f32, v2f64, 1>;
913 // Vector Maximum Pairwise (Signed and Unsigned Integer)
914 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
915 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
917 // Vector Minimum Pairwise (Signed and Unsigned Integer)
918 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
919 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
921 // Vector Maximum Pairwise (Floating Point)
922 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
923 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
924 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
926 // Vector Minimum Pairwise (Floating Point)
927 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
928 int_arm_neon_vpmins, int_arm_neon_vpmins,
929 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
931 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
932 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
933 int_aarch64_neon_vpmaxnm,
934 int_aarch64_neon_vpmaxnm,
935 int_aarch64_neon_vpmaxnm,
936 v2f32, v4f32, v2f64, 1>;
938 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
939 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
940 int_aarch64_neon_vpminnm,
941 int_aarch64_neon_vpminnm,
942 int_aarch64_neon_vpminnm,
943 v2f32, v4f32, v2f64, 1>;
945 // Vector Addition Pairwise (Integer)
946 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
948 // Vector Addition Pairwise (Floating Point)
949 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
953 v2f32, v4f32, v2f64, 1>;
955 // Vector Saturating Doubling Multiply High
956 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
957 int_arm_neon_vqdmulh, 1>;
959 // Vector Saturating Rouding Doubling Multiply High
960 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
961 int_arm_neon_vqrdmulh, 1>;
963 // Vector Multiply Extended (Floating Point)
964 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
965 int_aarch64_neon_vmulx,
966 int_aarch64_neon_vmulx,
967 int_aarch64_neon_vmulx,
968 v2f32, v4f32, v2f64, 1>;
970 // Vector Immediate Instructions
972 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
974 def _asmoperand : AsmOperandClass
976 let Name = "NeonMovImmShift" # PREFIX;
977 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
978 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
982 // Definition of vector immediates shift operands
984 // The selectable use-cases extract the shift operation
985 // information from the OpCmode fields encoded in the immediate.
986 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
987 uint64_t OpCmode = N->getZExtValue();
989 unsigned ShiftOnesIn;
991 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
992 if (!HasShift) return SDValue();
993 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
996 // Vector immediates shift operands which accept LSL and MSL
997 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
998 // or 0, 8 (LSLH) or 8, 16 (MSL).
999 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1000 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1001 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1002 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1004 multiclass neon_mov_imm_shift_operands<string PREFIX,
1005 string HALF, string ISHALF, code pred>
1007 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1010 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1012 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1013 let ParserMatchClass =
1014 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1018 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1020 unsigned ShiftOnesIn;
1022 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1023 return (HasShift && !ShiftOnesIn);
1026 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1028 unsigned ShiftOnesIn;
1030 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1031 return (HasShift && ShiftOnesIn);
1034 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1036 unsigned ShiftOnesIn;
1038 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1039 return (HasShift && !ShiftOnesIn);
1042 def neon_uimm1_asmoperand : AsmOperandClass
1045 let PredicateMethod = "isUImm<1>";
1046 let RenderMethod = "addImmOperands";
1049 def neon_uimm2_asmoperand : AsmOperandClass
1052 let PredicateMethod = "isUImm<2>";
1053 let RenderMethod = "addImmOperands";
1056 def neon_uimm8_asmoperand : AsmOperandClass
1059 let PredicateMethod = "isUImm<8>";
1060 let RenderMethod = "addImmOperands";
1063 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1064 let ParserMatchClass = neon_uimm8_asmoperand;
1065 let PrintMethod = "printNeonUImm8Operand";
1068 def neon_uimm64_mask_asmoperand : AsmOperandClass
1070 let Name = "NeonUImm64Mask";
1071 let PredicateMethod = "isNeonUImm64Mask";
1072 let RenderMethod = "addNeonUImm64MaskOperands";
1075 // MCOperand for 64-bit bytemask with each byte having only the
1076 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1077 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1078 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1079 let PrintMethod = "printNeonUImm64MaskOperand";
1082 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1083 SDPatternOperator opnode>
1085 // shift zeros, per word
1086 def _2S : NeonI_1VModImm<0b0, op,
1088 (ins neon_uimm8:$Imm,
1089 neon_mov_imm_LSL_operand:$Simm),
1090 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1091 [(set (v2i32 VPR64:$Rd),
1092 (v2i32 (opnode (timm:$Imm),
1093 (neon_mov_imm_LSL_operand:$Simm))))],
1096 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1099 def _4S : NeonI_1VModImm<0b1, op,
1101 (ins neon_uimm8:$Imm,
1102 neon_mov_imm_LSL_operand:$Simm),
1103 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1104 [(set (v4i32 VPR128:$Rd),
1105 (v4i32 (opnode (timm:$Imm),
1106 (neon_mov_imm_LSL_operand:$Simm))))],
1109 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1112 // shift zeros, per halfword
1113 def _4H : NeonI_1VModImm<0b0, op,
1115 (ins neon_uimm8:$Imm,
1116 neon_mov_imm_LSLH_operand:$Simm),
1117 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1118 [(set (v4i16 VPR64:$Rd),
1119 (v4i16 (opnode (timm:$Imm),
1120 (neon_mov_imm_LSLH_operand:$Simm))))],
1123 let cmode = {0b1, 0b0, Simm, 0b0};
1126 def _8H : NeonI_1VModImm<0b1, op,
1128 (ins neon_uimm8:$Imm,
1129 neon_mov_imm_LSLH_operand:$Simm),
1130 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1131 [(set (v8i16 VPR128:$Rd),
1132 (v8i16 (opnode (timm:$Imm),
1133 (neon_mov_imm_LSLH_operand:$Simm))))],
1136 let cmode = {0b1, 0b0, Simm, 0b0};
1140 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1141 SDPatternOperator opnode,
1142 SDPatternOperator neonopnode>
1144 let Constraints = "$src = $Rd" in {
1145 // shift zeros, per word
1146 def _2S : NeonI_1VModImm<0b0, op,
1148 (ins VPR64:$src, neon_uimm8:$Imm,
1149 neon_mov_imm_LSL_operand:$Simm),
1150 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1151 [(set (v2i32 VPR64:$Rd),
1152 (v2i32 (opnode (v2i32 VPR64:$src),
1153 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1154 neon_mov_imm_LSL_operand:$Simm)))))))],
1157 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1160 def _4S : NeonI_1VModImm<0b1, op,
1162 (ins VPR128:$src, neon_uimm8:$Imm,
1163 neon_mov_imm_LSL_operand:$Simm),
1164 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1165 [(set (v4i32 VPR128:$Rd),
1166 (v4i32 (opnode (v4i32 VPR128:$src),
1167 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1168 neon_mov_imm_LSL_operand:$Simm)))))))],
1171 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1174 // shift zeros, per halfword
1175 def _4H : NeonI_1VModImm<0b0, op,
1177 (ins VPR64:$src, neon_uimm8:$Imm,
1178 neon_mov_imm_LSLH_operand:$Simm),
1179 !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1180 [(set (v4i16 VPR64:$Rd),
1181 (v4i16 (opnode (v4i16 VPR64:$src),
1182 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1183 neon_mov_imm_LSL_operand:$Simm)))))))],
1186 let cmode = {0b1, 0b0, Simm, 0b1};
1189 def _8H : NeonI_1VModImm<0b1, op,
1191 (ins VPR128:$src, neon_uimm8:$Imm,
1192 neon_mov_imm_LSLH_operand:$Simm),
1193 !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1194 [(set (v8i16 VPR128:$Rd),
1195 (v8i16 (opnode (v8i16 VPR128:$src),
1196 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1197 neon_mov_imm_LSL_operand:$Simm)))))))],
1200 let cmode = {0b1, 0b0, Simm, 0b1};
1205 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1206 SDPatternOperator opnode>
1208 // shift ones, per word
1209 def _2S : NeonI_1VModImm<0b0, op,
1211 (ins neon_uimm8:$Imm,
1212 neon_mov_imm_MSL_operand:$Simm),
1213 !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1214 [(set (v2i32 VPR64:$Rd),
1215 (v2i32 (opnode (timm:$Imm),
1216 (neon_mov_imm_MSL_operand:$Simm))))],
1219 let cmode = {0b1, 0b1, 0b0, Simm};
1222 def _4S : NeonI_1VModImm<0b1, op,
1224 (ins neon_uimm8:$Imm,
1225 neon_mov_imm_MSL_operand:$Simm),
1226 !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1227 [(set (v4i32 VPR128:$Rd),
1228 (v4i32 (opnode (timm:$Imm),
1229 (neon_mov_imm_MSL_operand:$Simm))))],
1232 let cmode = {0b1, 0b1, 0b0, Simm};
1236 // Vector Move Immediate Shifted
1237 let isReMaterializable = 1 in {
1238 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1241 // Vector Move Inverted Immediate Shifted
1242 let isReMaterializable = 1 in {
1243 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1246 // Vector Bitwise Bit Clear (AND NOT) - immediate
1247 let isReMaterializable = 1 in {
1248 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1252 // Vector Bitwise OR - immedidate
1254 let isReMaterializable = 1 in {
1255 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1259 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1260 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1261 // BIC immediate instructions selection requires additional patterns to
1262 // transform Neon_movi operands into BIC immediate operands
1264 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1265 uint64_t OpCmode = N->getZExtValue();
1267 unsigned ShiftOnesIn;
1268 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1269 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1270 // Transform encoded shift amount 0 to 1 and 1 to 0.
1271 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1274 def neon_mov_imm_LSLH_transform_operand
1277 unsigned ShiftOnesIn;
1279 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1280 return (HasShift && !ShiftOnesIn); }],
1281 neon_mov_imm_LSLH_transform_XFORM>;
1283 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1284 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1285 def : Pat<(v4i16 (and VPR64:$src,
1286 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1287 (BICvi_lsl_4H VPR64:$src, 0,
1288 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1290 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1291 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1292 def : Pat<(v8i16 (and VPR128:$src,
1293 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1294 (BICvi_lsl_8H VPR128:$src, 0,
1295 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1298 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1299 SDPatternOperator neonopnode,
1301 Instruction INST8H> {
1302 def : Pat<(v8i8 (opnode VPR64:$src,
1303 (bitconvert(v4i16 (neonopnode timm:$Imm,
1304 neon_mov_imm_LSLH_operand:$Simm))))),
1305 (INST4H VPR64:$src, neon_uimm8:$Imm,
1306 neon_mov_imm_LSLH_operand:$Simm)>;
1307 def : Pat<(v1i64 (opnode VPR64:$src,
1308 (bitconvert(v4i16 (neonopnode timm:$Imm,
1309 neon_mov_imm_LSLH_operand:$Simm))))),
1310 (INST4H VPR64:$src, neon_uimm8:$Imm,
1311 neon_mov_imm_LSLH_operand:$Simm)>;
1313 def : Pat<(v16i8 (opnode VPR128:$src,
1314 (bitconvert(v8i16 (neonopnode timm:$Imm,
1315 neon_mov_imm_LSLH_operand:$Simm))))),
1316 (INST8H VPR128:$src, neon_uimm8:$Imm,
1317 neon_mov_imm_LSLH_operand:$Simm)>;
1318 def : Pat<(v4i32 (opnode VPR128:$src,
1319 (bitconvert(v8i16 (neonopnode timm:$Imm,
1320 neon_mov_imm_LSLH_operand:$Simm))))),
1321 (INST8H VPR128:$src, neon_uimm8:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm)>;
1323 def : Pat<(v2i64 (opnode VPR128:$src,
1324 (bitconvert(v8i16 (neonopnode timm:$Imm,
1325 neon_mov_imm_LSLH_operand:$Simm))))),
1326 (INST8H VPR128:$src, neon_uimm8:$Imm,
1327 neon_mov_imm_LSLH_operand:$Simm)>;
1330 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1331 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1333 // Additional patterns for Vector Bitwise OR - immedidate
1334 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1337 // Vector Move Immediate Masked
1338 let isReMaterializable = 1 in {
1339 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1342 // Vector Move Inverted Immediate Masked
1343 let isReMaterializable = 1 in {
1344 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1347 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1348 Instruction inst, RegisterOperand VPRC>
1349 : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1350 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1352 // Aliases for Vector Move Immediate Shifted
1353 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1354 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1355 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1356 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1358 // Aliases for Vector Move Inverted Immediate Shifted
1359 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1360 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1361 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1362 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1364 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1365 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1366 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1367 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1368 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1370 // Aliases for Vector Bitwise OR - immedidate
1371 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1372 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1373 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1374 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1376 // Vector Move Immediate - per byte
1377 let isReMaterializable = 1 in {
1378 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1379 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1380 "movi\t$Rd.8b, $Imm",
1381 [(set (v8i8 VPR64:$Rd),
1382 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1387 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1388 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1389 "movi\t$Rd.16b, $Imm",
1390 [(set (v16i8 VPR128:$Rd),
1391 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1397 // Vector Move Immediate - bytemask, per double word
1398 let isReMaterializable = 1 in {
1399 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1400 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1401 "movi\t $Rd.2d, $Imm",
1402 [(set (v2i64 VPR128:$Rd),
1403 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1409 // Vector Move Immediate - bytemask, one doubleword
1411 let isReMaterializable = 1 in {
1412 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1413 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1415 [(set (f64 FPR64:$Rd),
1417 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1423 // Vector Floating Point Move Immediate
1425 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1426 Operand immOpType, bit q, bit op>
1427 : NeonI_1VModImm<q, op,
1428 (outs VPRC:$Rd), (ins immOpType:$Imm),
1429 "fmov\t$Rd" # asmlane # ", $Imm",
1430 [(set (OpTy VPRC:$Rd),
1431 (OpTy (Neon_fmovi (timm:$Imm))))],
1436 let isReMaterializable = 1 in {
1437 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1438 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1439 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1442 // Vector Shift (Immediate)
1443 // Immediate in [0, 63]
1444 def imm0_63 : Operand<i32> {
1445 let ParserMatchClass = uimm6_asmoperand;
1448 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1452 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1453 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1454 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1455 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1457 // The shift right immediate amount, in the range 1 to element bits, is computed
1458 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1459 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1461 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1462 let Name = "ShrImm" # OFFSET;
1463 let RenderMethod = "addImmOperands";
1464 let DiagnosticType = "ShrImm" # OFFSET;
1467 class shr_imm<string OFFSET> : Operand<i32> {
1468 let EncoderMethod = "getShiftRightImm" # OFFSET;
1469 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1470 let ParserMatchClass =
1471 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1474 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1475 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1476 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1477 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1479 def shr_imm8 : shr_imm<"8">;
1480 def shr_imm16 : shr_imm<"16">;
1481 def shr_imm32 : shr_imm<"32">;
1482 def shr_imm64 : shr_imm<"64">;
1484 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1485 let Name = "ShlImm" # OFFSET;
1486 let RenderMethod = "addImmOperands";
1487 let DiagnosticType = "ShlImm" # OFFSET;
1490 class shl_imm<string OFFSET> : Operand<i32> {
1491 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1492 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1493 let ParserMatchClass =
1494 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1497 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1498 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1499 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1500 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1502 def shl_imm8 : shl_imm<"8">;
1503 def shl_imm16 : shl_imm<"16">;
1504 def shl_imm32 : shl_imm<"32">;
1505 def shl_imm64 : shl_imm<"64">;
1507 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1508 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1509 : NeonI_2VShiftImm<q, u, opcode,
1510 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1511 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1512 [(set (Ty VPRC:$Rd),
1513 (Ty (OpNode (Ty VPRC:$Rn),
1514 (Ty (Neon_vdup (i32 imm:$Imm))))))],
1517 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1518 // 64-bit vector types.
1519 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1520 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1523 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1524 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1527 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1528 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1531 // 128-bit vector types.
1532 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1533 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1536 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1537 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1540 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1541 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1544 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1545 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1549 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1550 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1552 let Inst{22-19} = 0b0001;
1555 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1557 let Inst{22-20} = 0b001;
1560 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1562 let Inst{22-21} = 0b01;
1565 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1567 let Inst{22-19} = 0b0001;
1570 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1572 let Inst{22-20} = 0b001;
1575 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1577 let Inst{22-21} = 0b01;
1580 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1587 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1590 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1591 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1593 def Neon_High16B : PatFrag<(ops node:$in),
1594 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1595 def Neon_High8H : PatFrag<(ops node:$in),
1596 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1597 def Neon_High4S : PatFrag<(ops node:$in),
1598 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1600 def Neon_low8H : PatFrag<(ops node:$in),
1601 (v4i16 (extract_subvector (v8i16 node:$in),
1603 def Neon_low4S : PatFrag<(ops node:$in),
1604 (v2i32 (extract_subvector (v4i32 node:$in),
1606 def Neon_low4f : PatFrag<(ops node:$in),
1607 (v2f32 (extract_subvector (v4f32 node:$in),
1610 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1611 string SrcT, ValueType DestTy, ValueType SrcTy,
1612 Operand ImmTy, SDPatternOperator ExtOp>
1613 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1614 (ins VPR64:$Rn, ImmTy:$Imm),
1615 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1616 [(set (DestTy VPR128:$Rd),
1618 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1619 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1622 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1623 string SrcT, ValueType DestTy, ValueType SrcTy,
1624 int StartIndex, Operand ImmTy,
1625 SDPatternOperator ExtOp, PatFrag getTop>
1626 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1627 (ins VPR128:$Rn, ImmTy:$Imm),
1628 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1629 [(set (DestTy VPR128:$Rd),
1632 (SrcTy (getTop VPR128:$Rn)))),
1633 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1636 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1638 // 64-bit vector types.
1639 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1641 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1644 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1646 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1649 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1651 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1654 // 128-bit vector types
1655 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1656 v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
1657 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1660 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1661 v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
1662 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1665 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1666 v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
1667 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1670 // Use other patterns to match when the immediate is 0.
1671 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1672 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1674 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1675 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1677 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1678 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1680 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1681 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1683 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1684 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1686 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1687 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1691 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1692 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1694 // Rounding/Saturating shift
1695 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1696 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1697 SDPatternOperator OpNode>
1698 : NeonI_2VShiftImm<q, u, opcode,
1699 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1700 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1701 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1705 // shift right (vector by immediate)
1706 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1707 SDPatternOperator OpNode> {
1708 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1710 let Inst{22-19} = 0b0001;
1713 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1715 let Inst{22-20} = 0b001;
1718 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1720 let Inst{22-21} = 0b01;
1723 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1725 let Inst{22-19} = 0b0001;
1728 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1730 let Inst{22-20} = 0b001;
1733 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1735 let Inst{22-21} = 0b01;
1738 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1744 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1745 SDPatternOperator OpNode> {
1746 // 64-bit vector types.
1747 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1749 let Inst{22-19} = 0b0001;
1752 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1754 let Inst{22-20} = 0b001;
1757 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1759 let Inst{22-21} = 0b01;
1762 // 128-bit vector types.
1763 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1765 let Inst{22-19} = 0b0001;
1768 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1770 let Inst{22-20} = 0b001;
1773 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1775 let Inst{22-21} = 0b01;
1778 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1784 // Rounding shift right
1785 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1786 int_aarch64_neon_vsrshr>;
1787 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1788 int_aarch64_neon_vurshr>;
1790 // Saturating shift left unsigned
1791 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1793 // Saturating shift left
1794 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1795 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1797 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1798 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1800 : NeonI_2VShiftImm<q, u, opcode,
1801 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1802 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1803 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1804 (Ty (OpNode (Ty VPRC:$Rn),
1805 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1807 let Constraints = "$src = $Rd";
1810 // Shift Right accumulate
1811 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1812 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1814 let Inst{22-19} = 0b0001;
1817 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1819 let Inst{22-20} = 0b001;
1822 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1824 let Inst{22-21} = 0b01;
1827 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1829 let Inst{22-19} = 0b0001;
1832 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1834 let Inst{22-20} = 0b001;
1837 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1839 let Inst{22-21} = 0b01;
1842 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1848 // Shift right and accumulate
1849 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1850 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1852 // Rounding shift accumulate
1853 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1854 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1855 SDPatternOperator OpNode>
1856 : NeonI_2VShiftImm<q, u, opcode,
1857 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1858 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1859 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1860 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1862 let Constraints = "$src = $Rd";
1865 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1866 SDPatternOperator OpNode> {
1867 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1869 let Inst{22-19} = 0b0001;
1872 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1874 let Inst{22-20} = 0b001;
1877 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1879 let Inst{22-21} = 0b01;
1882 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1884 let Inst{22-19} = 0b0001;
1887 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1889 let Inst{22-20} = 0b001;
1892 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1894 let Inst{22-21} = 0b01;
1897 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1903 // Rounding shift right and accumulate
1904 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1905 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1907 // Shift insert by immediate
1908 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1909 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1910 SDPatternOperator OpNode>
1911 : NeonI_2VShiftImm<q, u, opcode,
1912 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1913 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1914 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1917 let Constraints = "$src = $Rd";
1920 // shift left insert (vector by immediate)
1921 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1922 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1923 int_aarch64_neon_vsli> {
1924 let Inst{22-19} = 0b0001;
1927 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1928 int_aarch64_neon_vsli> {
1929 let Inst{22-20} = 0b001;
1932 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1933 int_aarch64_neon_vsli> {
1934 let Inst{22-21} = 0b01;
1937 // 128-bit vector types
1938 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1939 int_aarch64_neon_vsli> {
1940 let Inst{22-19} = 0b0001;
1943 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1944 int_aarch64_neon_vsli> {
1945 let Inst{22-20} = 0b001;
1948 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1949 int_aarch64_neon_vsli> {
1950 let Inst{22-21} = 0b01;
1953 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1954 int_aarch64_neon_vsli> {
1959 // shift right insert (vector by immediate)
1960 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1961 // 64-bit vector types.
1962 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1963 int_aarch64_neon_vsri> {
1964 let Inst{22-19} = 0b0001;
1967 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1968 int_aarch64_neon_vsri> {
1969 let Inst{22-20} = 0b001;
1972 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1973 int_aarch64_neon_vsri> {
1974 let Inst{22-21} = 0b01;
1977 // 128-bit vector types
1978 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1979 int_aarch64_neon_vsri> {
1980 let Inst{22-19} = 0b0001;
1983 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1984 int_aarch64_neon_vsri> {
1985 let Inst{22-20} = 0b001;
1988 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1989 int_aarch64_neon_vsri> {
1990 let Inst{22-21} = 0b01;
1993 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1994 int_aarch64_neon_vsri> {
1999 // Shift left and insert
2000 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2002 // Shift right and insert
2003 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2005 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2006 string SrcT, Operand ImmTy>
2007 : NeonI_2VShiftImm<q, u, opcode,
2008 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2009 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2012 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2013 string SrcT, Operand ImmTy>
2014 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2015 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2016 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2018 let Constraints = "$src = $Rd";
2021 // left long shift by immediate
2022 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2023 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2024 let Inst{22-19} = 0b0001;
2027 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2028 let Inst{22-20} = 0b001;
2031 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2032 let Inst{22-21} = 0b01;
2035 // Shift Narrow High
2036 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2038 let Inst{22-19} = 0b0001;
2041 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2043 let Inst{22-20} = 0b001;
2046 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2048 let Inst{22-21} = 0b01;
2052 // Shift right narrow
2053 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2055 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2056 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2057 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2058 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2059 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2060 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2061 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2062 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2064 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2065 (v2i64 (concat_vectors (v1i64 node:$Rm),
2066 (v1i64 node:$Rn)))>;
2067 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2068 (v8i16 (concat_vectors (v4i16 node:$Rm),
2069 (v4i16 node:$Rn)))>;
2070 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2071 (v4i32 (concat_vectors (v2i32 node:$Rm),
2072 (v2i32 node:$Rn)))>;
2073 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2074 (v4f32 (concat_vectors (v2f32 node:$Rm),
2075 (v2f32 node:$Rn)))>;
2076 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2077 (v2f64 (concat_vectors (v1f64 node:$Rm),
2078 (v1f64 node:$Rn)))>;
2080 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2081 (v8i16 (srl (v8i16 node:$lhs),
2082 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2083 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2084 (v4i32 (srl (v4i32 node:$lhs),
2085 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2086 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2087 (v2i64 (srl (v2i64 node:$lhs),
2088 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2089 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2090 (v8i16 (sra (v8i16 node:$lhs),
2091 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2092 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2093 (v4i32 (sra (v4i32 node:$lhs),
2094 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2095 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2096 (v2i64 (sra (v2i64 node:$lhs),
2097 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2099 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2100 multiclass Neon_shiftNarrow_patterns<string shr> {
2101 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2103 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2104 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2106 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2107 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2109 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2111 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2112 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2113 VPR128:$Rn, (i32 imm:$Imm))))))),
2114 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2115 VPR128:$Rn, imm:$Imm)>;
2116 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2117 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2118 VPR128:$Rn, (i32 imm:$Imm))))))),
2119 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2120 VPR128:$Rn, imm:$Imm)>;
2121 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2122 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2123 VPR128:$Rn, (i32 imm:$Imm))))))),
2124 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2125 VPR128:$Rn, imm:$Imm)>;
2128 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2129 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2130 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2131 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2132 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2133 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2134 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2136 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2137 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2138 (!cast<Instruction>(prefix # "_16B")
2139 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2140 VPR128:$Rn, imm:$Imm)>;
2141 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2142 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2143 (!cast<Instruction>(prefix # "_8H")
2144 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2145 VPR128:$Rn, imm:$Imm)>;
2146 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2147 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2148 (!cast<Instruction>(prefix # "_4S")
2149 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2150 VPR128:$Rn, imm:$Imm)>;
2153 defm : Neon_shiftNarrow_patterns<"lshr">;
2154 defm : Neon_shiftNarrow_patterns<"ashr">;
2156 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2157 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2158 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2159 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2160 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2161 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2162 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2164 // Convert fix-point and float-pointing
2165 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2166 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2167 Operand ImmTy, SDPatternOperator IntOp>
2168 : NeonI_2VShiftImm<q, u, opcode,
2169 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2170 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2171 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2175 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2176 SDPatternOperator IntOp> {
2177 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2179 let Inst{22-21} = 0b01;
2182 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2184 let Inst{22-21} = 0b01;
2187 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2193 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2194 SDPatternOperator IntOp> {
2195 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2197 let Inst{22-21} = 0b01;
2200 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2202 let Inst{22-21} = 0b01;
2205 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2211 // Convert fixed-point to floating-point
2212 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2213 int_arm_neon_vcvtfxs2fp>;
2214 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2215 int_arm_neon_vcvtfxu2fp>;
2217 // Convert floating-point to fixed-point
2218 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2219 int_arm_neon_vcvtfp2fxs>;
2220 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2221 int_arm_neon_vcvtfp2fxu>;
2223 multiclass Neon_sshll2_0<SDNode ext>
2225 def _v8i8 : PatFrag<(ops node:$Rn),
2226 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2227 def _v4i16 : PatFrag<(ops node:$Rn),
2228 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2229 def _v2i32 : PatFrag<(ops node:$Rn),
2230 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2233 defm NI_sext_high : Neon_sshll2_0<sext>;
2234 defm NI_zext_high : Neon_sshll2_0<zext>;
2237 //===----------------------------------------------------------------------===//
2238 // Multiclasses for NeonI_Across
2239 //===----------------------------------------------------------------------===//
2243 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2244 string asmop, SDPatternOperator opnode>
2246 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2247 (outs FPR16:$Rd), (ins VPR64:$Rn),
2248 asmop # "\t$Rd, $Rn.8b",
2249 [(set (v1i16 FPR16:$Rd),
2250 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2253 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2254 (outs FPR16:$Rd), (ins VPR128:$Rn),
2255 asmop # "\t$Rd, $Rn.16b",
2256 [(set (v1i16 FPR16:$Rd),
2257 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2260 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2261 (outs FPR32:$Rd), (ins VPR64:$Rn),
2262 asmop # "\t$Rd, $Rn.4h",
2263 [(set (v1i32 FPR32:$Rd),
2264 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2267 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2268 (outs FPR32:$Rd), (ins VPR128:$Rn),
2269 asmop # "\t$Rd, $Rn.8h",
2270 [(set (v1i32 FPR32:$Rd),
2271 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2274 // _1d2s doesn't exist!
2276 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2277 (outs FPR64:$Rd), (ins VPR128:$Rn),
2278 asmop # "\t$Rd, $Rn.4s",
2279 [(set (v1i64 FPR64:$Rd),
2280 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2284 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2285 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2289 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2290 string asmop, SDPatternOperator opnode>
2292 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2293 (outs FPR8:$Rd), (ins VPR64:$Rn),
2294 asmop # "\t$Rd, $Rn.8b",
2295 [(set (v1i8 FPR8:$Rd),
2296 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2299 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2300 (outs FPR8:$Rd), (ins VPR128:$Rn),
2301 asmop # "\t$Rd, $Rn.16b",
2302 [(set (v1i8 FPR8:$Rd),
2303 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2306 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2307 (outs FPR16:$Rd), (ins VPR64:$Rn),
2308 asmop # "\t$Rd, $Rn.4h",
2309 [(set (v1i16 FPR16:$Rd),
2310 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2313 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2314 (outs FPR16:$Rd), (ins VPR128:$Rn),
2315 asmop # "\t$Rd, $Rn.8h",
2316 [(set (v1i16 FPR16:$Rd),
2317 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2320 // _1s2s doesn't exist!
2322 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2323 (outs FPR32:$Rd), (ins VPR128:$Rn),
2324 asmop # "\t$Rd, $Rn.4s",
2325 [(set (v1i32 FPR32:$Rd),
2326 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2330 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2331 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2333 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2334 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2336 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2340 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2341 string asmop, SDPatternOperator opnode> {
2342 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2343 (outs FPR32:$Rd), (ins VPR128:$Rn),
2344 asmop # "\t$Rd, $Rn.4s",
2345 [(set (v1f32 FPR32:$Rd),
2346 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2350 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2351 int_aarch64_neon_vmaxnmv>;
2352 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2353 int_aarch64_neon_vminnmv>;
2355 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2356 int_aarch64_neon_vmaxv>;
2357 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2358 int_aarch64_neon_vminv>;
2360 // The followings are for instruction class (3V Diff)
2362 // normal long/long2 pattern
2363 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2364 string asmop, string ResS, string OpS,
2365 SDPatternOperator opnode, SDPatternOperator ext,
2366 RegisterOperand OpVPR,
2367 ValueType ResTy, ValueType OpTy>
2368 : NeonI_3VDiff<q, u, size, opcode,
2369 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2370 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2371 [(set (ResTy VPR128:$Rd),
2372 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2373 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2376 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2377 string asmop, SDPatternOperator opnode,
2378 bit Commutable = 0> {
2379 let isCommutable = Commutable in {
2380 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2381 opnode, sext, VPR64, v8i16, v8i8>;
2382 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2383 opnode, sext, VPR64, v4i32, v4i16>;
2384 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2385 opnode, sext, VPR64, v2i64, v2i32>;
2389 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2390 SDPatternOperator opnode, bit Commutable = 0> {
2391 let isCommutable = Commutable in {
2392 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2393 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2394 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2395 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2396 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2397 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2401 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2402 SDPatternOperator opnode, bit Commutable = 0> {
2403 let isCommutable = Commutable in {
2404 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2405 opnode, zext, VPR64, v8i16, v8i8>;
2406 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2407 opnode, zext, VPR64, v4i32, v4i16>;
2408 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2409 opnode, zext, VPR64, v2i64, v2i32>;
2413 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2414 SDPatternOperator opnode, bit Commutable = 0> {
2415 let isCommutable = Commutable in {
2416 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2417 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2418 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2419 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2420 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2421 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2425 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2426 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2428 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2429 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2431 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2432 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2434 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2435 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2437 // normal wide/wide2 pattern
2438 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2439 string asmop, string ResS, string OpS,
2440 SDPatternOperator opnode, SDPatternOperator ext,
2441 RegisterOperand OpVPR,
2442 ValueType ResTy, ValueType OpTy>
2443 : NeonI_3VDiff<q, u, size, opcode,
2444 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2445 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2446 [(set (ResTy VPR128:$Rd),
2447 (ResTy (opnode (ResTy VPR128:$Rn),
2448 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2451 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2452 SDPatternOperator opnode> {
2453 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2454 opnode, sext, VPR64, v8i16, v8i8>;
2455 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2456 opnode, sext, VPR64, v4i32, v4i16>;
2457 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2458 opnode, sext, VPR64, v2i64, v2i32>;
2461 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2462 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2464 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2465 SDPatternOperator opnode> {
2466 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2467 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2468 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2469 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2470 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2471 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2474 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2475 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2477 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2478 SDPatternOperator opnode> {
2479 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2480 opnode, zext, VPR64, v8i16, v8i8>;
2481 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2482 opnode, zext, VPR64, v4i32, v4i16>;
2483 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2484 opnode, zext, VPR64, v2i64, v2i32>;
2487 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2488 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2490 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2491 SDPatternOperator opnode> {
2492 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2493 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2494 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2495 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2496 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2497 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2500 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2501 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2503 // Get the high half part of the vector element.
2504 multiclass NeonI_get_high {
2505 def _8h : PatFrag<(ops node:$Rn),
2506 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2507 (v8i16 (Neon_vdup (i32 8)))))))>;
2508 def _4s : PatFrag<(ops node:$Rn),
2509 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2510 (v4i32 (Neon_vdup (i32 16)))))))>;
2511 def _2d : PatFrag<(ops node:$Rn),
2512 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2513 (v2i64 (Neon_vdup (i32 32)))))))>;
2516 defm NI_get_hi : NeonI_get_high;
2518 // pattern for addhn/subhn with 2 operands
2519 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2520 string asmop, string ResS, string OpS,
2521 SDPatternOperator opnode, SDPatternOperator get_hi,
2522 ValueType ResTy, ValueType OpTy>
2523 : NeonI_3VDiff<q, u, size, opcode,
2524 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2525 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2526 [(set (ResTy VPR64:$Rd),
2528 (OpTy (opnode (OpTy VPR128:$Rn),
2529 (OpTy VPR128:$Rm))))))],
2532 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2533 SDPatternOperator opnode, bit Commutable = 0> {
2534 let isCommutable = Commutable in {
2535 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2536 opnode, NI_get_hi_8h, v8i8, v8i16>;
2537 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2538 opnode, NI_get_hi_4s, v4i16, v4i32>;
2539 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2540 opnode, NI_get_hi_2d, v2i32, v2i64>;
2544 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2545 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2547 // pattern for operation with 2 operands
2548 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2549 string asmop, string ResS, string OpS,
2550 SDPatternOperator opnode,
2551 RegisterOperand ResVPR, RegisterOperand OpVPR,
2552 ValueType ResTy, ValueType OpTy>
2553 : NeonI_3VDiff<q, u, size, opcode,
2554 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2555 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2556 [(set (ResTy ResVPR:$Rd),
2557 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2560 // normal narrow pattern
2561 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2562 SDPatternOperator opnode, bit Commutable = 0> {
2563 let isCommutable = Commutable in {
2564 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2565 opnode, VPR64, VPR128, v8i8, v8i16>;
2566 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2567 opnode, VPR64, VPR128, v4i16, v4i32>;
2568 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2569 opnode, VPR64, VPR128, v2i32, v2i64>;
2573 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2574 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2576 // pattern for acle intrinsic with 3 operands
2577 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2578 string asmop, string ResS, string OpS>
2579 : NeonI_3VDiff<q, u, size, opcode,
2580 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2581 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2583 let Constraints = "$src = $Rd";
2584 let neverHasSideEffects = 1;
2587 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2588 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2589 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2590 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2593 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2594 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2596 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2597 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2599 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2601 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2602 SDPatternOperator coreop>
2603 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2604 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2605 (SrcTy VPR128:$Rm)))))),
2606 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2607 VPR128:$Rn, VPR128:$Rm)>;
2610 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2611 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2612 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2613 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2614 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2615 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2618 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2619 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2620 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2621 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2622 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2623 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2626 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2627 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2628 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2631 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2632 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2633 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2635 // pattern that need to extend result
2636 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2637 string asmop, string ResS, string OpS,
2638 SDPatternOperator opnode,
2639 RegisterOperand OpVPR,
2640 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2641 : NeonI_3VDiff<q, u, size, opcode,
2642 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2643 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2644 [(set (ResTy VPR128:$Rd),
2645 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2646 (OpTy OpVPR:$Rm))))))],
2649 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2650 SDPatternOperator opnode, bit Commutable = 0> {
2651 let isCommutable = Commutable in {
2652 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2653 opnode, VPR64, v8i16, v8i8, v8i8>;
2654 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2655 opnode, VPR64, v4i32, v4i16, v4i16>;
2656 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2657 opnode, VPR64, v2i64, v2i32, v2i32>;
2661 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2662 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2664 multiclass NeonI_Op_High<SDPatternOperator op> {
2665 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2666 (op (v8i8 (Neon_High16B node:$Rn)), (v8i8 (Neon_High16B node:$Rm)))>;
2667 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2668 (op (v4i16 (Neon_High8H node:$Rn)), (v4i16 (Neon_High8H node:$Rm)))>;
2669 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2670 (op (v2i32 (Neon_High4S node:$Rn)), (v2i32 (Neon_High4S node:$Rm)))>;
2673 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2674 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2675 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2676 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2677 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2678 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2680 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2681 bit Commutable = 0> {
2682 let isCommutable = Commutable in {
2683 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2684 !cast<PatFrag>(opnode # "_16B"),
2685 VPR128, v8i16, v16i8, v8i8>;
2686 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2687 !cast<PatFrag>(opnode # "_8H"),
2688 VPR128, v4i32, v8i16, v4i16>;
2689 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2690 !cast<PatFrag>(opnode # "_4S"),
2691 VPR128, v2i64, v4i32, v2i32>;
2695 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2696 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2698 // For pattern that need two operators being chained.
2699 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2700 string asmop, string ResS, string OpS,
2701 SDPatternOperator opnode, SDPatternOperator subop,
2702 RegisterOperand OpVPR,
2703 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2704 : NeonI_3VDiff<q, u, size, opcode,
2705 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2706 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2707 [(set (ResTy VPR128:$Rd),
2709 (ResTy VPR128:$src),
2710 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2711 (OpTy OpVPR:$Rm))))))))],
2713 let Constraints = "$src = $Rd";
2716 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2717 SDPatternOperator opnode, SDPatternOperator subop>{
2718 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2719 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2720 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2721 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2722 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2723 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2726 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2727 add, int_arm_neon_vabds>;
2728 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2729 add, int_arm_neon_vabdu>;
2731 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2732 SDPatternOperator opnode, string subop> {
2733 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2734 opnode, !cast<PatFrag>(subop # "_16B"),
2735 VPR128, v8i16, v16i8, v8i8>;
2736 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2737 opnode, !cast<PatFrag>(subop # "_8H"),
2738 VPR128, v4i32, v8i16, v4i16>;
2739 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2740 opnode, !cast<PatFrag>(subop # "_4S"),
2741 VPR128, v2i64, v4i32, v2i32>;
2744 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2746 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2749 // Long pattern with 2 operands
2750 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2751 SDPatternOperator opnode, bit Commutable = 0> {
2752 let isCommutable = Commutable in {
2753 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2754 opnode, VPR128, VPR64, v8i16, v8i8>;
2755 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2756 opnode, VPR128, VPR64, v4i32, v4i16>;
2757 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2758 opnode, VPR128, VPR64, v2i64, v2i32>;
2762 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2763 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2765 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2766 string asmop, string ResS, string OpS,
2767 SDPatternOperator opnode,
2768 ValueType ResTy, ValueType OpTy>
2769 : NeonI_3VDiff<q, u, size, opcode,
2770 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2771 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2772 [(set (ResTy VPR128:$Rd),
2773 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2776 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2777 string opnode, bit Commutable = 0> {
2778 let isCommutable = Commutable in {
2779 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2780 !cast<PatFrag>(opnode # "_16B"),
2782 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2783 !cast<PatFrag>(opnode # "_8H"),
2785 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2786 !cast<PatFrag>(opnode # "_4S"),
2791 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2793 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2796 // Long pattern with 3 operands
2797 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2798 string asmop, string ResS, string OpS,
2799 SDPatternOperator opnode,
2800 ValueType ResTy, ValueType OpTy>
2801 : NeonI_3VDiff<q, u, size, opcode,
2802 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2803 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2804 [(set (ResTy VPR128:$Rd),
2806 (ResTy VPR128:$src),
2807 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2809 let Constraints = "$src = $Rd";
2812 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2813 SDPatternOperator opnode> {
2814 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2815 opnode, v8i16, v8i8>;
2816 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2817 opnode, v4i32, v4i16>;
2818 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2819 opnode, v2i64, v2i32>;
2822 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2824 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2826 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2828 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2830 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2832 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2834 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2836 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2838 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2839 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2841 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2842 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2844 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2845 string asmop, string ResS, string OpS,
2846 SDPatternOperator subop, SDPatternOperator opnode,
2847 RegisterOperand OpVPR,
2848 ValueType ResTy, ValueType OpTy>
2849 : NeonI_3VDiff<q, u, size, opcode,
2850 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2851 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2852 [(set (ResTy VPR128:$Rd),
2854 (ResTy VPR128:$src),
2855 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2857 let Constraints = "$src = $Rd";
2860 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
2861 SDPatternOperator subop, string opnode> {
2862 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2863 subop, !cast<PatFrag>(opnode # "_16B"),
2864 VPR128, v8i16, v16i8>;
2865 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2866 subop, !cast<PatFrag>(opnode # "_8H"),
2867 VPR128, v4i32, v8i16>;
2868 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2869 subop, !cast<PatFrag>(opnode # "_4S"),
2870 VPR128, v2i64, v4i32>;
2873 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2874 add, "NI_smull_hi">;
2875 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2876 add, "NI_umull_hi">;
2878 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2879 sub, "NI_smull_hi">;
2880 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2881 sub, "NI_umull_hi">;
2883 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
2884 SDPatternOperator opnode> {
2885 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2886 opnode, int_arm_neon_vqdmull,
2887 VPR64, v4i32, v4i16>;
2888 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2889 opnode, int_arm_neon_vqdmull,
2890 VPR64, v2i64, v2i32>;
2893 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2894 int_arm_neon_vqadds>;
2895 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2896 int_arm_neon_vqsubs>;
2898 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
2899 SDPatternOperator opnode, bit Commutable = 0> {
2900 let isCommutable = Commutable in {
2901 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2902 opnode, VPR128, VPR64, v4i32, v4i16>;
2903 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2904 opnode, VPR128, VPR64, v2i64, v2i32>;
2908 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2909 int_arm_neon_vqdmull, 1>;
2911 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
2912 string opnode, bit Commutable = 0> {
2913 let isCommutable = Commutable in {
2914 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2915 !cast<PatFrag>(opnode # "_8H"),
2917 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2918 !cast<PatFrag>(opnode # "_4S"),
2923 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2926 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
2927 SDPatternOperator opnode> {
2928 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2929 opnode, NI_qdmull_hi_8H,
2930 VPR128, v4i32, v8i16>;
2931 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2932 opnode, NI_qdmull_hi_4S,
2933 VPR128, v2i64, v4i32>;
2936 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2937 int_arm_neon_vqadds>;
2938 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2939 int_arm_neon_vqsubs>;
2941 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
2942 SDPatternOperator opnode, bit Commutable = 0> {
2943 let isCommutable = Commutable in {
2944 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2945 opnode, VPR128, VPR64, v8i16, v8i8>;
2949 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2951 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
2952 string opnode, bit Commutable = 0> {
2953 let isCommutable = Commutable in {
2954 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2955 !cast<PatFrag>(opnode # "_16B"),
2960 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
2963 // End of implementation for instruction class (3V Diff)
2965 // The followings are vector load/store multiple N-element structure
2966 // (class SIMD lselem).
2968 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
2969 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
2970 // The structure consists of a sequence of sets of N values.
2971 // The first element of the structure is placed in the first lane
2972 // of the first first vector, the second element in the first lane
2973 // of the second vector, and so on.
2974 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
2975 // the three 64-bit vectors list {BA, DC, FE}.
2976 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
2977 // 64-bit vectors list {DA, EB, FC}.
2978 // Store instructions store multiple structure to N registers like load.
2981 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
2982 RegisterOperand VecList, string asmop>
2983 : NeonI_LdStMult<q, 1, opcode, size,
2984 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
2985 asmop # "\t$Rt, [$Rn]",
2989 let neverHasSideEffects = 1;
2992 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
2993 def _8B : NeonI_LDVList<0, opcode, 0b00,
2994 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
2996 def _4H : NeonI_LDVList<0, opcode, 0b01,
2997 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
2999 def _2S : NeonI_LDVList<0, opcode, 0b10,
3000 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3002 def _16B : NeonI_LDVList<1, opcode, 0b00,
3003 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3005 def _8H : NeonI_LDVList<1, opcode, 0b01,
3006 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3008 def _4S : NeonI_LDVList<1, opcode, 0b10,
3009 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3011 def _2D : NeonI_LDVList<1, opcode, 0b11,
3012 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3015 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3016 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3017 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3019 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3021 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3023 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3025 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3026 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3027 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3029 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3030 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3032 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3033 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3035 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3036 RegisterOperand VecList, string asmop>
3037 : NeonI_LdStMult<q, 0, opcode, size,
3038 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3039 asmop # "\t$Rt, [$Rn]",
3043 let neverHasSideEffects = 1;
3046 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3047 def _8B : NeonI_STVList<0, opcode, 0b00,
3048 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3050 def _4H : NeonI_STVList<0, opcode, 0b01,
3051 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3053 def _2S : NeonI_STVList<0, opcode, 0b10,
3054 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3056 def _16B : NeonI_STVList<1, opcode, 0b00,
3057 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3059 def _8H : NeonI_STVList<1, opcode, 0b01,
3060 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3062 def _4S : NeonI_STVList<1, opcode, 0b10,
3063 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3065 def _2D : NeonI_STVList<1, opcode, 0b11,
3066 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3069 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3070 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3071 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3073 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3075 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3077 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3079 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3080 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3081 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3083 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3084 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3086 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3087 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3089 // End of vector load/store multiple N-element structure(class SIMD lselem)
3091 // Scalar Three Same
3093 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3095 : NeonI_Scalar3Same<u, size, opcode,
3096 (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
3097 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3101 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3102 : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3104 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
3105 bit Commutable = 0> {
3106 let isCommutable = Commutable in {
3107 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3108 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3112 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3113 string asmop, bit Commutable = 0> {
3114 let isCommutable = Commutable in {
3115 def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
3116 def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
3120 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3121 string asmop, bit Commutable = 0> {
3122 let isCommutable = Commutable in {
3123 def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
3124 def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
3125 def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
3126 def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
3130 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3131 Instruction INSTD> {
3132 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3133 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3136 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3141 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3142 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3143 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3145 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3146 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3148 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3149 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3152 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3154 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3155 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3157 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3159 Instruction INSTS> {
3160 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3161 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3162 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3163 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3166 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3168 Instruction INSTD> {
3169 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3170 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3171 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3172 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3175 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3177 Instruction INSTD> {
3178 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3179 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3180 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3181 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3184 // Scalar Three Different
3186 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
3187 RegisterClass FPRCD, RegisterClass FPRCS>
3188 : NeonI_Scalar3Diff<u, size, opcode,
3189 (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
3190 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3194 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3195 def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
3196 def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
3199 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3200 let Constraints = "$Src = $Rd" in {
3201 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3202 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3203 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3206 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3207 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3208 !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
3214 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3216 Instruction INSTS> {
3217 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3218 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3219 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3220 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3223 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3225 Instruction INSTS> {
3226 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3227 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3228 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3229 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3232 // Scalar Two Registers Miscellaneous
3234 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
3235 RegisterClass FPRCD, RegisterClass FPRCS>
3236 : NeonI_Scalar2SameMisc<u, size, opcode,
3237 (outs FPRCD:$Rd), (ins FPRCS:$Rn),
3238 !strconcat(asmop, "\t$Rd, $Rn"),
3242 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3244 def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
3246 def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
3250 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3251 def dd: NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
3254 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3255 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3256 def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
3257 def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
3258 def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
3261 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3263 def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
3264 def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
3265 def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
3268 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
3269 string asmop, RegisterClass FPRC>
3270 : NeonI_Scalar2SameMisc<u, size, opcode,
3271 (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
3272 !strconcat(asmop, "\t$Rd, $Rn"),
3276 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3279 let Constraints = "$Src = $Rd" in {
3280 def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
3281 def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
3282 def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
3283 def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
3287 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3288 SDPatternOperator Dopnode,
3290 Instruction INSTD> {
3291 def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
3293 def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
3297 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3299 Instruction INSTD> {
3300 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3302 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3306 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3307 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3308 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3309 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3313 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3315 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3316 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3317 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3320 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3321 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3322 !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
3327 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3329 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3330 (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
3331 (INSTD FPR64:$Rn, 0)>;
3333 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3335 Instruction INSTD> {
3336 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3337 (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3338 (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3339 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3340 (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))),
3341 (INSTD FPR64:$Rn, 0)>;
3344 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3345 Instruction INSTD> {
3346 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3350 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3355 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3356 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3358 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3360 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3364 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3365 SDPatternOperator opnode,
3368 Instruction INSTD> {
3369 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3371 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3373 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3378 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3379 SDPatternOperator opnode,
3383 Instruction INSTD> {
3384 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3385 (INSTB FPR8:$Src, FPR8:$Rn)>;
3386 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3387 (INSTH FPR16:$Src, FPR16:$Rn)>;
3388 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3389 (INSTS FPR32:$Src, FPR32:$Rn)>;
3390 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3391 (INSTD FPR64:$Src, FPR64:$Rn)>;
3394 // Scalar Shift By Immediate
3396 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
3397 RegisterClass FPRC, Operand ImmTy>
3398 : NeonI_ScalarShiftImm<u, opcode,
3399 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
3400 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3403 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
3405 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3407 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3408 let Inst{21-16} = Imm;
3412 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
3414 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
3415 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
3417 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3418 let Inst{18-16} = Imm;
3420 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
3422 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3423 let Inst{19-16} = Imm;
3425 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3427 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3428 let Inst{20-16} = Imm;
3432 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
3434 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
3436 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3437 let Inst{21-16} = Imm;
3441 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
3443 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
3444 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
3446 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3447 let Inst{18-16} = Imm;
3449 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
3451 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3452 let Inst{19-16} = Imm;
3454 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
3456 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3457 let Inst{20-16} = Imm;
3461 class NeonI_ScalarShiftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
3462 : NeonI_ScalarShiftImm<u, opcode,
3463 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
3464 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3467 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3468 let Inst{21-16} = Imm;
3469 let Constraints = "$Src = $Rd";
3472 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
3473 RegisterClass FPRCD, RegisterClass FPRCS,
3475 : NeonI_ScalarShiftImm<u, opcode,
3476 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
3477 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3480 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
3482 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
3485 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3486 let Inst{18-16} = Imm;
3488 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
3491 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3492 let Inst{19-16} = Imm;
3494 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
3497 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3498 let Inst{20-16} = Imm;
3502 multiclass NeonI_ScalarShiftImm_scvtf_SD_size<bit u, bits<5> opcode, string asmop> {
3503 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3505 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3506 let Inst{20-16} = Imm;
3508 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3510 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3511 let Inst{21-16} = Imm;
3515 multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
3516 Instruction INSTD> {
3517 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3518 (INSTD FPR64:$Rn, imm:$Imm)>;
3521 multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
3526 : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
3527 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
3528 (INSTB FPR8:$Rn, imm:$Imm)>;
3529 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3530 (INSTH FPR16:$Rn, imm:$Imm)>;
3531 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3532 (INSTS FPR32:$Rn, imm:$Imm)>;
3535 class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
3537 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3538 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
3540 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
3541 SDPatternOperator opnode,
3544 Instruction INSTD> {
3545 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3546 (INSTH FPR16:$Rn, imm:$Imm)>;
3547 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3548 (INSTS FPR32:$Rn, imm:$Imm)>;
3549 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3550 (INSTD FPR64:$Rn, imm:$Imm)>;
3553 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
3554 SDPatternOperator Dopnode,
3556 Instruction INSTD> {
3557 def ssi : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3558 (INSTS FPR32:$Rn, imm:$Imm)>;
3559 def ddi : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3560 (INSTD FPR64:$Rn, imm:$Imm)>;
3563 // Scalar Signed Shift Right (Immediate)
3564 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
3565 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
3567 // Scalar Unsigned Shift Right (Immediate)
3568 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
3569 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
3571 // Scalar Signed Rounding Shift Right (Immediate)
3572 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
3573 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrds_n, SRSHRddi>;
3575 // Scalar Unigned Rounding Shift Right (Immediate)
3576 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
3577 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrdu_n, URSHRddi>;
3579 // Scalar Signed Shift Right and Accumulate (Immediate)
3580 def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">;
3581 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
3583 // Scalar Unsigned Shift Right and Accumulate (Immediate)
3584 def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">;
3585 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
3587 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
3588 def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">;
3589 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
3591 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
3592 def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">;
3593 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
3595 // Scalar Shift Left (Immediate)
3596 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
3597 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
3599 // Signed Saturating Shift Left (Immediate)
3600 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
3601 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
3603 SQSHLssi, SQSHLddi>;
3605 // Unsigned Saturating Shift Left (Immediate)
3606 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
3607 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
3609 UQSHLssi, UQSHLddi>;
3611 // Signed Saturating Shift Left Unsigned (Immediate)
3612 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
3613 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlus_n,
3614 SQSHLUbbi, SQSHLUhhi,
3615 SQSHLUssi, SQSHLUddi>;
3617 // Shift Right And Insert (Immediate)
3618 defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">;
3619 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrid_n, SRIddi>;
3621 // Shift Left And Insert (Immediate)
3622 defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">;
3623 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vslid_n, SLIddi>;
3625 // Signed Saturating Shift Right Narrow (Immediate)
3626 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
3627 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
3628 SQSHRNbhi, SQSHRNhsi,
3631 // Unsigned Saturating Shift Right Narrow (Immediate)
3632 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
3633 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
3634 UQSHRNbhi, UQSHRNhsi,
3637 // Signed Saturating Rounded Shift Right Narrow (Immediate)
3638 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
3639 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
3640 SQRSHRNbhi, SQRSHRNhsi,
3643 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
3644 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
3645 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
3646 UQRSHRNbhi, UQRSHRNhsi,
3649 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
3650 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
3651 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
3652 SQSHRUNbhi, SQSHRUNhsi,
3655 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
3656 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
3657 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
3658 SQRSHRUNbhi, SQRSHRUNhsi,
3661 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
3662 defm SCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b0, 0b11100, "scvtf">;
3663 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
3664 int_aarch64_neon_vcvtf64_n_s64,
3665 SCVTF_Nssi, SCVTF_Nddi>;
3667 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
3668 defm UCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b1, 0b11100, "ucvtf">;
3669 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
3670 int_aarch64_neon_vcvtf64_n_u64,
3671 UCVTF_Nssi, UCVTF_Nddi>;
3673 // Scalar Integer Add
3674 let isCommutable = 1 in {
3675 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
3678 // Scalar Integer Sub
3679 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
3681 // Pattern for Scalar Integer Add and Sub with D register only
3682 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
3683 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
3685 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
3686 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
3687 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
3688 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
3689 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
3691 // Scalar Integer Saturating Add (Signed, Unsigned)
3692 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
3693 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
3695 // Scalar Integer Saturating Sub (Signed, Unsigned)
3696 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
3697 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
3699 // Patterns to match llvm.arm.* intrinsic for
3700 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3701 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
3702 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
3703 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
3704 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
3706 // Patterns to match llvm.aarch64.* intrinsic for
3707 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3708 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
3709 SQADDhhh, SQADDsss, SQADDddd>;
3710 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
3711 UQADDhhh, UQADDsss, UQADDddd>;
3712 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
3713 SQSUBhhh, SQSUBsss, SQSUBddd>;
3714 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
3715 UQSUBhhh, UQSUBsss, UQSUBddd>;
3717 // Scalar Integer Saturating Doubling Multiply Half High
3718 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
3720 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3721 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
3723 // Patterns to match llvm.arm.* intrinsic for
3724 // Scalar Integer Saturating Doubling Multiply Half High and
3725 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3726 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
3728 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
3731 // Scalar Floating-point Multiply Extended
3732 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
3734 // Scalar Floating-point Reciprocal Step
3735 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
3737 // Scalar Floating-point Reciprocal Square Root Step
3738 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
3740 // Patterns to match llvm.arm.* intrinsic for
3741 // Scalar Floating-point Reciprocal Step and
3742 // Scalar Floating-point Reciprocal Square Root Step
3743 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
3745 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
3748 // Patterns to match llvm.aarch64.* intrinsic for
3749 // Scalar Floating-point Multiply Extended,
3750 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
3753 // Scalar Integer Shift Left (Signed, Unsigned)
3754 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
3755 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
3757 // Patterns to match llvm.arm.* intrinsic for
3758 // Scalar Integer Shift Left (Signed, Unsigned)
3759 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
3760 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
3762 // Patterns to match llvm.aarch64.* intrinsic for
3763 // Scalar Integer Shift Left (Signed, Unsigned)
3764 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
3765 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
3767 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
3768 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
3769 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
3771 // Patterns to match llvm.aarch64.* intrinsic for
3772 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3773 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
3774 SQSHLhhh, SQSHLsss, SQSHLddd>;
3775 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
3776 UQSHLhhh, UQSHLsss, UQSHLddd>;
3778 // Patterns to match llvm.arm.* intrinsic for
3779 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3780 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
3781 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
3783 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3784 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
3785 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
3787 // Patterns to match llvm.aarch64.* intrinsic for
3788 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3789 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
3790 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
3792 // Patterns to match llvm.arm.* intrinsic for
3793 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3794 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
3795 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
3797 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3798 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
3799 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
3801 // Patterns to match llvm.aarch64.* intrinsic for
3802 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3803 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
3804 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
3805 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
3806 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
3808 // Patterns to match llvm.arm.* intrinsic for
3809 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3810 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
3811 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
3813 // Signed Saturating Doubling Multiply-Add Long
3814 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
3815 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
3816 SQDMLALshh, SQDMLALdss>;
3818 // Signed Saturating Doubling Multiply-Subtract Long
3819 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
3820 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
3821 SQDMLSLshh, SQDMLSLdss>;
3823 // Signed Saturating Doubling Multiply Long
3824 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
3825 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
3826 SQDMULLshh, SQDMULLdss>;
3828 // Scalar Signed Integer Convert To Floating-point
3829 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
3830 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
3831 int_aarch64_neon_vcvtf64_s64,
3834 // Scalar Unsigned Integer Convert To Floating-point
3835 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
3836 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
3837 int_aarch64_neon_vcvtf64_u64,
3840 // Scalar Floating-point Reciprocal Estimate
3841 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
3842 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
3843 FRECPEss, FRECPEdd>;
3845 // Scalar Floating-point Reciprocal Exponent
3846 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
3847 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
3848 FRECPXss, FRECPXdd>;
3850 // Scalar Floating-point Reciprocal Square Root Estimate
3851 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
3852 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
3853 FRSQRTEss, FRSQRTEdd>;
3855 // Scalar Integer Compare
3857 // Scalar Compare Bitwise Equal
3858 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
3859 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
3861 // Scalar Compare Signed Greather Than Or Equal
3862 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
3863 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
3865 // Scalar Compare Unsigned Higher Or Same
3866 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
3867 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
3869 // Scalar Compare Unsigned Higher
3870 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
3871 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
3873 // Scalar Compare Signed Greater Than
3874 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
3875 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
3877 // Scalar Compare Bitwise Test Bits
3878 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
3879 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
3881 // Scalar Compare Bitwise Equal To Zero
3882 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
3883 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
3886 // Scalar Compare Signed Greather Than Or Equal To Zero
3887 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
3888 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
3891 // Scalar Compare Signed Greater Than Zero
3892 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
3893 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
3896 // Scalar Compare Signed Less Than Or Equal To Zero
3897 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
3898 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
3901 // Scalar Compare Less Than Zero
3902 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
3903 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
3906 // Scalar Floating-point Compare
3908 // Scalar Floating-point Compare Mask Equal
3909 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
3910 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
3911 FCMEQsss, FCMEQddd>;
3913 // Scalar Floating-point Compare Mask Equal To Zero
3914 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
3915 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
3916 FCMEQZssi, FCMEQZddi>;
3918 // Scalar Floating-point Compare Mask Greater Than Or Equal
3919 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
3920 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
3921 FCMGEsss, FCMGEddd>;
3923 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
3924 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
3925 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
3926 FCMGEZssi, FCMGEZddi>;
3928 // Scalar Floating-point Compare Mask Greather Than
3929 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
3930 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
3931 FCMGTsss, FCMGTddd>;
3933 // Scalar Floating-point Compare Mask Greather Than Zero
3934 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
3935 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
3936 FCMGTZssi, FCMGTZddi>;
3938 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
3939 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
3940 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
3941 FCMLEZssi, FCMLEZddi>;
3943 // Scalar Floating-point Compare Mask Less Than Zero
3944 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
3945 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
3946 FCMLTZssi, FCMLTZddi>;
3948 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
3949 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
3950 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
3951 FACGEsss, FACGEddd>;
3953 // Scalar Floating-point Absolute Compare Mask Greater Than
3954 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
3955 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
3956 FACGTsss, FACGTddd>;
3958 // Scalar Absolute Value
3959 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
3960 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
3962 // Scalar Signed Saturating Absolute Value
3963 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
3964 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
3965 SQABSbb, SQABShh, SQABSss, SQABSdd>;
3968 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
3969 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
3971 // Scalar Signed Saturating Negate
3972 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
3973 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
3974 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
3976 // Scalar Signed Saturating Accumulated of Unsigned Value
3977 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
3978 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
3980 SUQADDss, SUQADDdd>;
3982 // Scalar Unsigned Saturating Accumulated of Signed Value
3983 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
3984 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
3986 USQADDss, USQADDdd>;
3988 // Scalar Signed Saturating Extract Unsigned Narrow
3989 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
3990 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
3994 // Scalar Signed Saturating Extract Narrow
3995 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
3996 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
4000 // Scalar Unsigned Saturating Extract Narrow
4001 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
4002 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
4006 // Scalar Reduce Pairwise
4008 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
4009 string asmop, bit Commutable = 0> {
4010 let isCommutable = Commutable in {
4011 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
4012 (outs FPR64:$Rd), (ins VPR128:$Rn),
4013 !strconcat(asmop, "\t$Rd, $Rn.2d"),
4019 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
4020 string asmop, bit Commutable = 0>
4021 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
4022 let isCommutable = Commutable in {
4023 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
4024 (outs FPR32:$Rd), (ins VPR64:$Rn),
4025 !strconcat(asmop, "\t$Rd, $Rn.2s"),
4031 // Scalar Reduce Addition Pairwise (Integer) with
4032 // Pattern to match llvm.arm.* intrinsic
4033 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
4035 // Pattern to match llvm.aarch64.* intrinsic for
4036 // Scalar Reduce Addition Pairwise (Integer)
4037 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
4038 (ADDPvv_D_2D VPR128:$Rn)>;
4040 // Scalar Reduce Addition Pairwise (Floating Point)
4041 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
4043 // Scalar Reduce Maximum Pairwise (Floating Point)
4044 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
4046 // Scalar Reduce Minimum Pairwise (Floating Point)
4047 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
4049 // Scalar Reduce maxNum Pairwise (Floating Point)
4050 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
4052 // Scalar Reduce minNum Pairwise (Floating Point)
4053 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
4055 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
4056 SDPatternOperator opnodeD,
4058 Instruction INSTD> {
4059 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
4061 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
4062 (INSTD VPR128:$Rn)>;
4065 // Patterns to match llvm.aarch64.* intrinsic for
4066 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
4067 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
4068 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
4070 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
4071 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
4073 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
4074 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
4076 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
4077 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
4079 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
4080 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
4084 //===----------------------------------------------------------------------===//
4085 // Non-Instruction Patterns
4086 //===----------------------------------------------------------------------===//
4088 // 64-bit vector bitcasts...
4090 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
4091 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
4092 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
4093 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
4095 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
4096 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
4097 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
4098 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
4100 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
4101 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
4102 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
4103 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
4105 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
4106 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
4107 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
4108 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
4110 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
4111 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4112 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4113 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4115 // ..and 128-bit vector bitcasts...
4117 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
4118 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
4119 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
4120 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
4121 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
4123 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
4124 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
4125 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
4126 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
4127 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
4129 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
4130 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
4131 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
4132 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
4133 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
4135 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
4136 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
4137 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
4138 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
4139 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
4141 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
4142 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
4143 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
4144 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
4145 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
4147 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
4148 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
4149 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
4150 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
4151 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
4154 // ...and scalar bitcasts...
4155 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
4156 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
4157 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
4158 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
4159 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
4161 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
4162 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
4164 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4165 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4166 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4168 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
4169 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
4170 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
4171 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
4172 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
4174 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
4175 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
4176 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
4177 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
4178 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
4179 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
4181 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
4182 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
4183 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4184 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
4185 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
4187 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
4188 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
4190 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
4191 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
4192 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
4193 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
4194 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4196 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
4197 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
4198 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
4199 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
4200 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
4201 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
4203 def neon_uimm0_bare : Operand<i64>,
4204 ImmLeaf<i64, [{return Imm == 0;}]> {
4205 let ParserMatchClass = neon_uimm0_asmoperand;
4206 let PrintMethod = "printNeonUImm8OperandBare";
4209 def neon_uimm1_bare : Operand<i64>,
4210 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4211 let ParserMatchClass = neon_uimm1_asmoperand;
4212 let PrintMethod = "printNeonUImm8OperandBare";
4215 def neon_uimm2_bare : Operand<i64>,
4216 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4217 let ParserMatchClass = neon_uimm2_asmoperand;
4218 let PrintMethod = "printNeonUImm8OperandBare";
4221 def neon_uimm3_bare : Operand<i64>,
4222 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4223 let ParserMatchClass = uimm3_asmoperand;
4224 let PrintMethod = "printNeonUImm8OperandBare";
4227 def neon_uimm4_bare : Operand<i64>,
4228 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4229 let ParserMatchClass = uimm4_asmoperand;
4230 let PrintMethod = "printNeonUImm8OperandBare";
4233 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
4234 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
4235 : NeonI_copy<0b1, 0b0, 0b0011,
4236 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
4237 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
4238 [(set (ResTy VPR128:$Rd),
4239 (ResTy (vector_insert
4240 (ResTy VPR128:$src),
4245 let Constraints = "$src = $Rd";
4248 // The followings are for instruction class (3V Elem)
4252 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
4253 string asmop, string ResS, string OpS, string EleOpS,
4254 Operand OpImm, RegisterOperand ResVPR,
4255 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4256 : NeonI_2VElem<q, u, size, opcode,
4257 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
4258 EleOpVPR:$Re, OpImm:$Index),
4259 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4260 ", $Re." # EleOpS # "[$Index]",
4266 let Constraints = "$src = $Rd";
4269 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
4270 // vector register class for element is always 128-bit to cover the max index
4271 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4272 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4273 let Inst{11} = {Index{1}};
4274 let Inst{21} = {Index{0}};
4275 let Inst{20-16} = Re;
4278 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4279 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4280 let Inst{11} = {Index{1}};
4281 let Inst{21} = {Index{0}};
4282 let Inst{20-16} = Re;
4285 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4286 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4287 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4288 let Inst{11} = {Index{2}};
4289 let Inst{21} = {Index{1}};
4290 let Inst{20} = {Index{0}};
4291 let Inst{19-16} = Re{3-0};
4294 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4295 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4296 let Inst{11} = {Index{2}};
4297 let Inst{21} = {Index{1}};
4298 let Inst{20} = {Index{0}};
4299 let Inst{19-16} = Re{3-0};
4303 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
4304 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
4306 // Pattern for lane in 128-bit vector
4307 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4308 RegisterOperand ResVPR, RegisterOperand OpVPR,
4309 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4310 ValueType EleOpTy, SDPatternOperator coreop>
4311 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4312 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4313 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4315 // Pattern for lane in 64-bit vector
4316 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4317 RegisterOperand ResVPR, RegisterOperand OpVPR,
4318 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4319 ValueType EleOpTy, SDPatternOperator coreop>
4320 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4321 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4322 (INST ResVPR:$src, OpVPR:$Rn,
4323 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4325 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
4327 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4328 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
4329 BinOpFrag<(Neon_vduplane
4330 (Neon_low4S node:$LHS), node:$RHS)>>;
4332 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4333 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
4334 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4336 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4337 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4338 BinOpFrag<(Neon_vduplane
4339 (Neon_low8H node:$LHS), node:$RHS)>>;
4341 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4342 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4343 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4345 // Index can only be half of the max value for lane in 64-bit vector
4347 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4348 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
4349 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4351 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4352 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
4353 BinOpFrag<(Neon_vduplane
4354 (Neon_combine_4S node:$LHS, undef),
4357 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4358 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4359 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4361 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4362 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4363 BinOpFrag<(Neon_vduplane
4364 (Neon_combine_8H node:$LHS, undef),
4368 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
4369 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
4371 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
4372 string asmop, string ResS, string OpS, string EleOpS,
4373 Operand OpImm, RegisterOperand ResVPR,
4374 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4375 : NeonI_2VElem<q, u, size, opcode,
4376 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
4377 EleOpVPR:$Re, OpImm:$Index),
4378 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4379 ", $Re." # EleOpS # "[$Index]",
4386 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
4387 // vector register class for element is always 128-bit to cover the max index
4388 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4389 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4390 let Inst{11} = {Index{1}};
4391 let Inst{21} = {Index{0}};
4392 let Inst{20-16} = Re;
4395 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4396 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4397 let Inst{11} = {Index{1}};
4398 let Inst{21} = {Index{0}};
4399 let Inst{20-16} = Re;
4402 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4403 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4404 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4405 let Inst{11} = {Index{2}};
4406 let Inst{21} = {Index{1}};
4407 let Inst{20} = {Index{0}};
4408 let Inst{19-16} = Re{3-0};
4411 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4412 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4413 let Inst{11} = {Index{2}};
4414 let Inst{21} = {Index{1}};
4415 let Inst{20} = {Index{0}};
4416 let Inst{19-16} = Re{3-0};
4420 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
4421 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
4422 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
4424 // Pattern for lane in 128-bit vector
4425 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4426 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4427 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4428 SDPatternOperator coreop>
4429 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4430 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4431 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4433 // Pattern for lane in 64-bit vector
4434 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4435 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4436 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4437 SDPatternOperator coreop>
4438 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4439 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4441 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4443 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
4444 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4445 op, VPR64, VPR128, v2i32, v2i32, v4i32,
4446 BinOpFrag<(Neon_vduplane
4447 (Neon_low4S node:$LHS), node:$RHS)>>;
4449 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4450 op, VPR128, VPR128, v4i32, v4i32, v4i32,
4451 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4453 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4454 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4455 BinOpFrag<(Neon_vduplane
4456 (Neon_low8H node:$LHS), node:$RHS)>>;
4458 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4459 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4460 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4462 // Index can only be half of the max value for lane in 64-bit vector
4464 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4465 op, VPR64, VPR64, v2i32, v2i32, v2i32,
4466 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4468 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4469 op, VPR128, VPR64, v4i32, v4i32, v2i32,
4470 BinOpFrag<(Neon_vduplane
4471 (Neon_combine_4S node:$LHS, undef),
4474 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4475 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4476 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4478 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4479 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4480 BinOpFrag<(Neon_vduplane
4481 (Neon_combine_8H node:$LHS, undef),
4485 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
4486 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
4487 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
4491 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
4492 // vector register class for element is always 128-bit to cover the max index
4493 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4494 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4495 let Inst{11} = {Index{1}};
4496 let Inst{21} = {Index{0}};
4497 let Inst{20-16} = Re;
4500 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4501 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4502 let Inst{11} = {Index{1}};
4503 let Inst{21} = {Index{0}};
4504 let Inst{20-16} = Re;
4507 // _1d2d doesn't exist!
4509 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4510 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4511 let Inst{11} = {Index{0}};
4513 let Inst{20-16} = Re;
4517 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
4518 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
4520 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
4521 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4522 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4523 SDPatternOperator coreop>
4524 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4525 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
4527 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
4529 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
4530 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4531 op, VPR64, VPR128, v2f32, v2f32, v4f32,
4532 BinOpFrag<(Neon_vduplane
4533 (Neon_low4f node:$LHS), node:$RHS)>>;
4535 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4536 op, VPR128, VPR128, v4f32, v4f32, v4f32,
4537 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4539 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4540 op, VPR128, VPR128, v2f64, v2f64, v2f64,
4541 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4543 // Index can only be half of the max value for lane in 64-bit vector
4545 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4546 op, VPR64, VPR64, v2f32, v2f32, v2f32,
4547 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4549 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4550 op, VPR128, VPR64, v4f32, v4f32, v2f32,
4551 BinOpFrag<(Neon_vduplane
4552 (Neon_combine_4f node:$LHS, undef),
4555 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4556 op, VPR128, VPR64, v2f64, v2f64, v1f64,
4557 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4560 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
4561 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
4563 // The followings are patterns using fma
4564 // -ffp-contract=fast generates fma
4566 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
4567 // vector register class for element is always 128-bit to cover the max index
4568 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4569 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4570 let Inst{11} = {Index{1}};
4571 let Inst{21} = {Index{0}};
4572 let Inst{20-16} = Re;
4575 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4576 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4577 let Inst{11} = {Index{1}};
4578 let Inst{21} = {Index{0}};
4579 let Inst{20-16} = Re;
4582 // _1d2d doesn't exist!
4584 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4585 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4586 let Inst{11} = {Index{0}};
4588 let Inst{20-16} = Re;
4592 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
4593 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
4595 // Pattern for lane in 128-bit vector
4596 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4597 RegisterOperand ResVPR, RegisterOperand OpVPR,
4598 ValueType ResTy, ValueType OpTy,
4599 SDPatternOperator coreop>
4600 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4601 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
4602 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
4604 // Pattern for lane in 64-bit vector
4605 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4606 RegisterOperand ResVPR, RegisterOperand OpVPR,
4607 ValueType ResTy, ValueType OpTy,
4608 SDPatternOperator coreop>
4609 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4610 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4611 (INST ResVPR:$src, ResVPR:$Rn,
4612 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
4614 // Pattern for lane in 64-bit vector
4615 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
4616 SDPatternOperator op,
4617 RegisterOperand ResVPR, RegisterOperand OpVPR,
4618 ValueType ResTy, ValueType OpTy,
4619 SDPatternOperator coreop>
4620 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
4621 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4622 (INST ResVPR:$src, ResVPR:$Rn,
4623 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
4626 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
4627 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4628 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4629 BinOpFrag<(Neon_vduplane
4630 (Neon_low4f node:$LHS), node:$RHS)>>;
4632 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4633 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4634 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4636 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4637 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4638 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4640 // Index can only be half of the max value for lane in 64-bit vector
4642 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4643 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4644 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4646 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4647 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4648 BinOpFrag<(Neon_vduplane
4649 (Neon_combine_4f node:$LHS, undef),
4652 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4653 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4654 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4657 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
4659 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
4661 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4662 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4663 BinOpFrag<(fneg (Neon_vduplane
4664 (Neon_low4f node:$LHS), node:$RHS))>>;
4666 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4667 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4668 BinOpFrag<(Neon_vduplane
4669 (Neon_low4f (fneg node:$LHS)),
4672 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4673 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4674 BinOpFrag<(fneg (Neon_vduplane
4675 node:$LHS, node:$RHS))>>;
4677 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4678 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4679 BinOpFrag<(Neon_vduplane
4680 (fneg node:$LHS), node:$RHS)>>;
4682 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4683 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4684 BinOpFrag<(fneg (Neon_vduplane
4685 node:$LHS, node:$RHS))>>;
4687 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4688 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4689 BinOpFrag<(Neon_vduplane
4690 (fneg node:$LHS), node:$RHS)>>;
4692 // Index can only be half of the max value for lane in 64-bit vector
4694 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4695 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4696 BinOpFrag<(fneg (Neon_vduplane
4697 node:$LHS, node:$RHS))>>;
4699 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4700 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4701 BinOpFrag<(Neon_vduplane
4702 (fneg node:$LHS), node:$RHS)>>;
4704 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4705 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4706 BinOpFrag<(fneg (Neon_vduplane
4707 (Neon_combine_4f node:$LHS, undef),
4710 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4711 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4712 BinOpFrag<(Neon_vduplane
4713 (Neon_combine_4f (fneg node:$LHS), undef),
4716 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4717 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4718 BinOpFrag<(fneg (Neon_combine_2d
4719 node:$LHS, node:$RHS))>>;
4721 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4722 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4723 BinOpFrag<(Neon_combine_2d
4724 (fneg node:$LHS), (fneg node:$RHS))>>;
4727 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
4729 // Variant 3: Long type
4730 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
4731 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
4733 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
4734 // vector register class for element is always 128-bit to cover the max index
4735 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4736 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4737 let Inst{11} = {Index{1}};
4738 let Inst{21} = {Index{0}};
4739 let Inst{20-16} = Re;
4742 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4743 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4744 let Inst{11} = {Index{1}};
4745 let Inst{21} = {Index{0}};
4746 let Inst{20-16} = Re;
4749 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4750 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4751 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4752 let Inst{11} = {Index{2}};
4753 let Inst{21} = {Index{1}};
4754 let Inst{20} = {Index{0}};
4755 let Inst{19-16} = Re{3-0};
4758 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4759 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4760 let Inst{11} = {Index{2}};
4761 let Inst{21} = {Index{1}};
4762 let Inst{20} = {Index{0}};
4763 let Inst{19-16} = Re{3-0};
4767 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
4768 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
4769 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
4770 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
4771 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
4772 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
4774 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
4775 // vector register class for element is always 128-bit to cover the max index
4776 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4777 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4778 let Inst{11} = {Index{1}};
4779 let Inst{21} = {Index{0}};
4780 let Inst{20-16} = Re;
4783 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4784 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4785 let Inst{11} = {Index{1}};
4786 let Inst{21} = {Index{0}};
4787 let Inst{20-16} = Re;
4790 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4791 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4792 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4793 let Inst{11} = {Index{2}};
4794 let Inst{21} = {Index{1}};
4795 let Inst{20} = {Index{0}};
4796 let Inst{19-16} = Re{3-0};
4799 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4800 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4801 let Inst{11} = {Index{2}};
4802 let Inst{21} = {Index{1}};
4803 let Inst{20} = {Index{0}};
4804 let Inst{19-16} = Re{3-0};
4808 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
4809 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
4810 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
4812 // Pattern for lane in 128-bit vector
4813 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4814 RegisterOperand EleOpVPR, ValueType ResTy,
4815 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4816 SDPatternOperator hiop, SDPatternOperator coreop>
4817 : Pat<(ResTy (op (ResTy VPR128:$src),
4818 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4819 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4820 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4822 // Pattern for lane in 64-bit vector
4823 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4824 RegisterOperand EleOpVPR, ValueType ResTy,
4825 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4826 SDPatternOperator hiop, SDPatternOperator coreop>
4827 : Pat<(ResTy (op (ResTy VPR128:$src),
4828 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4829 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4830 (INST VPR128:$src, VPR128:$Rn,
4831 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4833 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
4834 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4835 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4836 BinOpFrag<(Neon_vduplane
4837 (Neon_low8H node:$LHS), node:$RHS)>>;
4839 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4840 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
4841 BinOpFrag<(Neon_vduplane
4842 (Neon_low4S node:$LHS), node:$RHS)>>;
4844 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4845 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4846 BinOpFrag<(Neon_vduplane
4847 (Neon_low8H node:$LHS), node:$RHS)>>;
4849 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4850 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4851 BinOpFrag<(Neon_vduplane
4852 (Neon_low4S node:$LHS), node:$RHS)>>;
4854 // Index can only be half of the max value for lane in 64-bit vector
4856 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4857 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4858 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4860 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4861 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
4862 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4864 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4865 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4866 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4868 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4869 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4870 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4873 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
4874 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
4875 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
4876 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
4878 // Pattern for lane in 128-bit vector
4879 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4880 RegisterOperand EleOpVPR, ValueType ResTy,
4881 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4882 SDPatternOperator hiop, SDPatternOperator coreop>
4884 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4885 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4886 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4888 // Pattern for lane in 64-bit vector
4889 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4890 RegisterOperand EleOpVPR, ValueType ResTy,
4891 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4892 SDPatternOperator hiop, SDPatternOperator coreop>
4894 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4895 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4897 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4899 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
4900 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4901 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4902 BinOpFrag<(Neon_vduplane
4903 (Neon_low8H node:$LHS), node:$RHS)>>;
4905 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4906 op, VPR64, VPR128, v2i64, v2i32, v4i32,
4907 BinOpFrag<(Neon_vduplane
4908 (Neon_low4S node:$LHS), node:$RHS)>>;
4910 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4911 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
4913 BinOpFrag<(Neon_vduplane
4914 (Neon_low8H node:$LHS), node:$RHS)>>;
4916 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4917 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4918 BinOpFrag<(Neon_vduplane
4919 (Neon_low4S node:$LHS), node:$RHS)>>;
4921 // Index can only be half of the max value for lane in 64-bit vector
4923 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4924 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4925 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4927 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4928 op, VPR64, VPR64, v2i64, v2i32, v2i32,
4929 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4931 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4932 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4933 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4935 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4936 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4937 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4940 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
4941 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
4942 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
4944 multiclass NI_qdma<SDPatternOperator op> {
4945 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4947 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4949 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4951 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4954 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
4955 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
4957 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
4958 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4959 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
4960 v4i32, v4i16, v8i16,
4961 BinOpFrag<(Neon_vduplane
4962 (Neon_low8H node:$LHS), node:$RHS)>>;
4964 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4965 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
4966 v2i64, v2i32, v4i32,
4967 BinOpFrag<(Neon_vduplane
4968 (Neon_low4S node:$LHS), node:$RHS)>>;
4970 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4971 !cast<PatFrag>(op # "_4s"), VPR128Lo,
4972 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4973 BinOpFrag<(Neon_vduplane
4974 (Neon_low8H node:$LHS), node:$RHS)>>;
4976 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4977 !cast<PatFrag>(op # "_2d"), VPR128,
4978 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4979 BinOpFrag<(Neon_vduplane
4980 (Neon_low4S node:$LHS), node:$RHS)>>;
4982 // Index can only be half of the max value for lane in 64-bit vector
4984 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4985 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
4986 v4i32, v4i16, v4i16,
4987 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4989 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4990 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
4991 v2i64, v2i32, v2i32,
4992 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4994 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4995 !cast<PatFrag>(op # "_4s"), VPR64Lo,
4996 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4997 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4999 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5000 !cast<PatFrag>(op # "_2d"), VPR64,
5001 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5002 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5005 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
5006 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
5008 // End of implementation for instruction class (3V Elem)
5010 //Insert element (vector, from main)
5011 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
5013 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5015 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
5017 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5019 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
5021 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5023 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
5025 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5028 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
5029 RegisterClass OpGPR, ValueType OpTy,
5030 Operand OpImm, Instruction INS>
5031 : Pat<(ResTy (vector_insert
5035 (ResTy (EXTRACT_SUBREG
5036 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5037 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
5039 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
5040 neon_uimm3_bare, INSbw>;
5041 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
5042 neon_uimm2_bare, INShw>;
5043 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
5044 neon_uimm1_bare, INSsw>;
5045 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
5046 neon_uimm0_bare, INSdx>;
5048 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
5049 Operand ResImm, ValueType MidTy>
5050 : NeonI_insert<0b1, 0b1,
5051 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
5052 ResImm:$Immd, ResImm:$Immn),
5053 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
5054 [(set (ResTy VPR128:$Rd),
5055 (ResTy (vector_insert
5056 (ResTy VPR128:$src),
5057 (MidTy (vector_extract
5062 let Constraints = "$src = $Rd";
5067 //Insert element (vector, from element)
5068 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
5069 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
5070 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
5072 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
5073 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
5074 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
5075 // bit 11 is unspecified.
5077 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
5078 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
5079 let Inst{14-13} = {Immn{1}, Immn{0}};
5080 // bits 11-12 are unspecified.
5082 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
5083 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
5084 let Inst{14} = Immn{0};
5085 // bits 11-13 are unspecified.
5088 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
5090 RegisterClass OpFPR, Operand ResImm,
5091 SubRegIndex SubIndex, Instruction INS> {
5092 def : Pat<(ResTy (vector_insert
5093 (ResTy VPR128:$src),
5094 (MidTy (vector_extract
5098 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
5099 ResImm:$Immd, ResImm:$Immn)>;
5101 def : Pat <(ResTy (vector_insert
5102 (ResTy VPR128:$src),
5105 (INS (ResTy VPR128:$src),
5106 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
5110 def : Pat <(NaTy (vector_insert
5114 (NaTy (EXTRACT_SUBREG
5116 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
5117 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
5123 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
5125 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
5128 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
5129 ValueType MidTy, ValueType StTy,
5130 Operand StImm, Instruction INS> {
5131 def : Pat<(NaTy (vector_insert
5133 (MidTy (vector_extract
5137 (NaTy (EXTRACT_SUBREG
5139 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5145 def : Pat<(StTy (vector_insert
5147 (MidTy (vector_extract
5153 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5157 def : Pat<(NaTy (vector_insert
5159 (MidTy (vector_extract
5163 (NaTy (EXTRACT_SUBREG
5165 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5166 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5172 defm : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
5173 v16i8, neon_uimm4_bare, INSELb>;
5174 defm : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
5175 v8i16, neon_uimm3_bare, INSELh>;
5176 defm : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
5177 v4i32, neon_uimm2_bare, INSELs>;
5178 defm : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
5179 v2i64, neon_uimm1_bare, INSELd>;
5182 class NeonI_SMOV<string asmop, string Res, bit Q,
5183 ValueType OpTy, ValueType eleTy,
5184 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
5185 : NeonI_copy<Q, 0b0, 0b0101,
5186 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5187 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5188 [(set (ResTy ResGPR:$Rd),
5190 (ResTy (vector_extract
5191 (OpTy VPR128:$Rn), (OpImm:$Imm))),
5197 //Signed integer move (main, from element)
5198 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
5200 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5202 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
5204 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5206 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
5208 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5210 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
5212 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5214 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
5216 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5219 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
5220 ValueType eleTy, Operand StImm, Operand NaImm,
5221 Instruction SMOVI> {
5222 def : Pat<(i64 (sext_inreg
5224 (i32 (vector_extract
5225 (StTy VPR128:$Rn), (StImm:$Imm))))),
5227 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5229 def : Pat<(i64 (sext
5230 (i32 (vector_extract
5231 (StTy VPR128:$Rn), (StImm:$Imm))))),
5232 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5234 def : Pat<(i64 (sext_inreg
5235 (i64 (vector_extract
5236 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5238 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5241 def : Pat<(i64 (sext_inreg
5243 (i32 (vector_extract
5244 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5246 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5249 def : Pat<(i64 (sext
5250 (i32 (vector_extract
5251 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5252 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5256 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5257 neon_uimm3_bare, SMOVxb>;
5258 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5259 neon_uimm2_bare, SMOVxh>;
5260 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5261 neon_uimm1_bare, SMOVxs>;
5263 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
5264 ValueType eleTy, Operand StImm, Operand NaImm,
5266 : Pat<(i32 (sext_inreg
5267 (i32 (vector_extract
5268 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5270 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5273 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5274 neon_uimm3_bare, SMOVwb>;
5275 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5276 neon_uimm2_bare, SMOVwh>;
5278 class NeonI_UMOV<string asmop, string Res, bit Q,
5279 ValueType OpTy, Operand OpImm,
5280 RegisterClass ResGPR, ValueType ResTy>
5281 : NeonI_copy<Q, 0b0, 0b0111,
5282 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5283 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5284 [(set (ResTy ResGPR:$Rd),
5285 (ResTy (vector_extract
5286 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
5291 //Unsigned integer move (main, from element)
5292 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
5294 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5296 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
5298 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5300 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
5302 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5304 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
5306 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5309 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
5310 Operand StImm, Operand NaImm,
5312 : Pat<(ResTy (vector_extract
5313 (NaTy VPR64:$Rn), NaImm:$Imm)),
5314 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5317 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
5318 neon_uimm3_bare, UMOVwb>;
5319 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
5320 neon_uimm2_bare, UMOVwh>;
5321 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5322 neon_uimm1_bare, UMOVws>;
5325 (i32 (vector_extract
5326 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
5328 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
5331 (i32 (vector_extract
5332 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
5334 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
5336 def : Pat<(i64 (zext
5337 (i32 (vector_extract
5338 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
5339 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
5342 (i32 (vector_extract
5343 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
5345 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5346 neon_uimm3_bare:$Imm)>;
5349 (i32 (vector_extract
5350 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
5352 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5353 neon_uimm2_bare:$Imm)>;
5355 def : Pat<(i64 (zext
5356 (i32 (vector_extract
5357 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
5358 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5359 neon_uimm0_bare:$Imm)>;
5361 // Additional copy patterns for scalar types
5362 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
5364 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
5366 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
5368 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
5370 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
5371 (FMOVws FPR32:$Rn)>;
5373 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
5374 (FMOVxd FPR64:$Rn)>;
5376 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
5379 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
5382 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
5383 (v1i8 (EXTRACT_SUBREG (v16i8
5384 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
5387 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
5388 (v1i16 (EXTRACT_SUBREG (v8i16
5389 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
5392 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
5395 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
5398 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
5400 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
5403 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
5406 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
5407 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
5408 (f64 FPR64:$src), sub_64)>;
5410 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
5411 RegisterOperand ResVPR, ValueType ResTy,
5412 ValueType OpTy, Operand OpImm>
5413 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
5414 (ins VPR128:$Rn, OpImm:$Imm),
5415 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
5421 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, v16i8, v16i8,
5423 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5426 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, v8i16, v8i16,
5428 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5431 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, v4i32, v4i32,
5433 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5436 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, v2i64, v2i64,
5438 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5441 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, v8i8, v16i8,
5443 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5446 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, v4i16, v8i16,
5448 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5451 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, v2i32, v4i32,
5453 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5456 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
5457 ValueType OpTy,ValueType NaTy,
5458 ValueType ExTy, Operand OpLImm,
5460 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
5461 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
5463 def : Pat<(ResTy (Neon_vduplane
5464 (NaTy VPR64:$Rn), OpNImm:$Imm)),
5466 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
5468 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
5469 neon_uimm4_bare, neon_uimm3_bare>;
5470 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
5471 neon_uimm4_bare, neon_uimm3_bare>;
5472 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
5473 neon_uimm3_bare, neon_uimm2_bare>;
5474 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
5475 neon_uimm3_bare, neon_uimm2_bare>;
5476 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
5477 neon_uimm2_bare, neon_uimm1_bare>;
5478 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
5479 neon_uimm2_bare, neon_uimm1_bare>;
5480 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
5481 neon_uimm1_bare, neon_uimm0_bare>;
5482 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
5483 neon_uimm2_bare, neon_uimm1_bare>;
5484 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
5485 neon_uimm2_bare, neon_uimm1_bare>;
5486 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
5487 neon_uimm1_bare, neon_uimm0_bare>;
5489 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
5491 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5493 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
5495 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5497 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
5499 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
5502 class NeonI_DUP<bit Q, string asmop, string rdlane,
5503 RegisterOperand ResVPR, ValueType ResTy,
5504 RegisterClass OpGPR, ValueType OpTy>
5505 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
5506 asmop # "\t$Rd" # rdlane # ", $Rn",
5507 [(set (ResTy ResVPR:$Rd),
5508 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
5511 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
5513 // bits 17-19 are unspecified.
5516 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
5517 let Inst{17-16} = 0b10;
5518 // bits 18-19 are unspecified.
5521 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
5522 let Inst{18-16} = 0b100;
5523 // bit 19 is unspecified.
5526 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
5527 let Inst{19-16} = 0b1000;
5530 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
5532 // bits 17-19 are unspecified.
5535 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
5536 let Inst{17-16} = 0b10;
5537 // bits 18-19 are unspecified.
5540 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
5541 let Inst{18-16} = 0b100;
5542 // bit 19 is unspecified.
5545 // patterns for CONCAT_VECTORS
5546 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
5547 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
5548 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
5549 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
5551 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5552 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
5555 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
5557 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5561 defm : Concat_Vector_Pattern<v16i8, v8i8>;
5562 defm : Concat_Vector_Pattern<v8i16, v4i16>;
5563 defm : Concat_Vector_Pattern<v4i32, v2i32>;
5564 defm : Concat_Vector_Pattern<v2i64, v1i64>;
5565 defm : Concat_Vector_Pattern<v4f32, v2f32>;
5566 defm : Concat_Vector_Pattern<v2f64, v1f64>;
5568 //patterns for EXTRACT_SUBVECTOR
5569 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
5570 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5571 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
5572 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5573 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
5574 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5575 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
5576 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5577 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
5578 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5579 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
5580 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;