1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1,
45 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
47 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
49 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
50 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
52 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
53 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
55 //===----------------------------------------------------------------------===//
57 //===----------------------------------------------------------------------===//
59 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
60 string asmop, SDPatternOperator opnode8B,
61 SDPatternOperator opnode16B,
64 let isCommutable = Commutable in {
65 def _8B : NeonI_3VSame<0b0, u, size, opcode,
66 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
67 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
68 [(set (v8i8 VPR64:$Rd),
69 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
72 def _16B : NeonI_3VSame<0b1, u, size, opcode,
73 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
74 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
75 [(set (v16i8 VPR128:$Rd),
76 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
82 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
83 string asmop, SDPatternOperator opnode,
86 let isCommutable = Commutable in {
87 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
88 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
89 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
90 [(set (v4i16 VPR64:$Rd),
91 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
94 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
95 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
96 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
97 [(set (v8i16 VPR128:$Rd),
98 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
101 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
102 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
103 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
104 [(set (v2i32 VPR64:$Rd),
105 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
108 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
109 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
110 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
111 [(set (v4i32 VPR128:$Rd),
112 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
116 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
117 string asmop, SDPatternOperator opnode,
119 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
121 let isCommutable = Commutable in {
122 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
123 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
124 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
125 [(set (v8i8 VPR64:$Rd),
126 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
129 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
130 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
131 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
132 [(set (v16i8 VPR128:$Rd),
133 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
138 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
139 string asmop, SDPatternOperator opnode,
141 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
143 let isCommutable = Commutable in {
144 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
145 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
146 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
147 [(set (v2i64 VPR128:$Rd),
148 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
153 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
154 // but Result types can be integer or floating point types.
155 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
156 string asmop, SDPatternOperator opnode2S,
157 SDPatternOperator opnode4S,
158 SDPatternOperator opnode2D,
159 ValueType ResTy2S, ValueType ResTy4S,
160 ValueType ResTy2D, bit Commutable = 0>
162 let isCommutable = Commutable in {
163 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
164 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
165 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
166 [(set (ResTy2S VPR64:$Rd),
167 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
170 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
171 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
172 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
173 [(set (ResTy4S VPR128:$Rd),
174 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
177 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
178 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
179 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
180 [(set (ResTy2D VPR128:$Rd),
181 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
186 //===----------------------------------------------------------------------===//
187 // Instruction Definitions
188 //===----------------------------------------------------------------------===//
190 // Vector Arithmetic Instructions
192 // Vector Add (Integer and Floating-Point)
194 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
195 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
196 v2f32, v4f32, v2f64, 1>;
198 // Vector Sub (Integer and Floating-Point)
200 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
201 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
202 v2f32, v4f32, v2f64, 0>;
204 // Vector Multiply (Integer and Floating-Point)
206 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
207 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
208 v2f32, v4f32, v2f64, 1>;
210 // Vector Multiply (Polynomial)
212 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
213 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
215 // Vector Multiply-accumulate and Multiply-subtract (Integer)
217 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
218 // two operands constraints.
219 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
220 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
221 bits<5> opcode, SDPatternOperator opnode>
222 : NeonI_3VSame<q, u, size, opcode,
223 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
224 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
225 [(set (OpTy VPRC:$Rd),
226 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
228 let Constraints = "$src = $Rd";
231 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
232 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
234 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
235 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
238 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
239 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
240 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
241 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
242 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
243 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
244 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
245 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
246 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
247 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
248 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
249 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
251 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
252 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
253 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
254 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
255 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
256 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
257 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
258 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
259 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
260 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
261 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
262 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
264 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
266 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
267 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
269 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
270 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
272 let Predicates = [HasNEON, UseFusedMAC] in {
273 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
274 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
275 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
276 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
277 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
278 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
280 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
281 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
282 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
283 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
284 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
285 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
288 // We're also allowed to match the fma instruction regardless of compile
290 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
291 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
292 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
293 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
294 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
295 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
297 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
298 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
299 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
300 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
301 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
302 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
304 // Vector Divide (Floating-Point)
306 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
307 v2f32, v4f32, v2f64, 0>;
309 // Vector Bitwise Operations
311 // Vector Bitwise AND
313 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
315 // Vector Bitwise Exclusive OR
317 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
321 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
323 // ORR disassembled as MOV if Vn==Vm
325 // Vector Move - register
326 // Alias for ORR if Vn=Vm.
327 // FIXME: This is actually the preferred syntax but TableGen can't deal with
328 // custom printing of aliases.
329 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
330 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
331 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
332 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
334 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
335 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
336 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
338 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
339 OpCmodeConstVal->getZExtValue(), EltBits);
340 return (EltBits == 8 && EltVal == 0xff);
344 def Neon_not8B : PatFrag<(ops node:$in),
345 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
346 def Neon_not16B : PatFrag<(ops node:$in),
347 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
349 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
350 (or node:$Rn, (Neon_not8B node:$Rm))>;
352 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
353 (or node:$Rn, (Neon_not16B node:$Rm))>;
355 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
356 (and node:$Rn, (Neon_not8B node:$Rm))>;
358 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
359 (and node:$Rn, (Neon_not16B node:$Rm))>;
362 // Vector Bitwise OR NOT - register
364 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
365 Neon_orn8B, Neon_orn16B, 0>;
367 // Vector Bitwise Bit Clear (AND NOT) - register
369 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
370 Neon_bic8B, Neon_bic16B, 0>;
372 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
373 SDPatternOperator opnode16B,
375 Instruction INST16B> {
376 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
377 (INST8B VPR64:$Rn, VPR64:$Rm)>;
378 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
379 (INST8B VPR64:$Rn, VPR64:$Rm)>;
380 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
381 (INST8B VPR64:$Rn, VPR64:$Rm)>;
382 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
383 (INST16B VPR128:$Rn, VPR128:$Rm)>;
384 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
385 (INST16B VPR128:$Rn, VPR128:$Rm)>;
386 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
387 (INST16B VPR128:$Rn, VPR128:$Rm)>;
390 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
391 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
392 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
393 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
394 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
395 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
397 // Vector Bitwise Select
398 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
399 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
401 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
402 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
404 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
406 Instruction INST16B> {
407 // Disassociate type from instruction definition
408 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
409 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
410 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
411 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
412 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
413 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
414 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
415 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
416 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
417 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
418 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
419 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
421 // Allow to match BSL instruction pattern with non-constant operand
422 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
423 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
424 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
425 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
426 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
427 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
428 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
429 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
430 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
431 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
432 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
433 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
434 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
435 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
436 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
437 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
438 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
439 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
440 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
441 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
442 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
443 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
444 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
445 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
447 // Allow to match llvm.arm.* intrinsics.
448 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
449 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
450 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
451 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
452 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
453 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
454 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
455 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
456 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
457 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
458 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
459 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
460 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
461 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
462 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
463 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
464 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
465 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
466 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
467 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
468 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
469 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
470 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
471 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
473 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
474 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
475 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
476 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
477 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
478 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
479 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
480 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
483 // Additional patterns for bitwise instruction BSL
484 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
486 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
487 (Neon_bsl node:$src, node:$Rn, node:$Rm),
488 [{ (void)N; return false; }]>;
490 // Vector Bitwise Insert if True
492 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
493 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
494 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
495 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
497 // Vector Bitwise Insert if False
499 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
500 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
501 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
502 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
504 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
506 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
507 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
508 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
509 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
511 // Vector Absolute Difference and Accumulate (Unsigned)
512 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
513 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
514 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
515 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
516 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
517 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
518 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
519 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
520 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
521 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
522 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
523 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
525 // Vector Absolute Difference and Accumulate (Signed)
526 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
527 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
528 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
529 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
530 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
531 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
532 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
533 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
534 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
535 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
536 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
537 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
540 // Vector Absolute Difference (Signed, Unsigned)
541 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
542 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
544 // Vector Absolute Difference (Floating Point)
545 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
546 int_arm_neon_vabds, int_arm_neon_vabds,
547 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
549 // Vector Reciprocal Step (Floating Point)
550 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
551 int_arm_neon_vrecps, int_arm_neon_vrecps,
553 v2f32, v4f32, v2f64, 0>;
555 // Vector Reciprocal Square Root Step (Floating Point)
556 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
557 int_arm_neon_vrsqrts,
558 int_arm_neon_vrsqrts,
559 int_arm_neon_vrsqrts,
560 v2f32, v4f32, v2f64, 0>;
562 // Vector Comparisons
564 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
565 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
566 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
567 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
568 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
569 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
570 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
571 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
572 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
573 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
575 // NeonI_compare_aliases class: swaps register operands to implement
576 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
577 class NeonI_compare_aliases<string asmop, string asmlane,
578 Instruction inst, RegisterOperand VPRC>
579 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
581 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
583 // Vector Comparisons (Integer)
585 // Vector Compare Mask Equal (Integer)
586 let isCommutable =1 in {
587 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
590 // Vector Compare Mask Higher or Same (Unsigned Integer)
591 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
593 // Vector Compare Mask Greater Than or Equal (Integer)
594 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
596 // Vector Compare Mask Higher (Unsigned Integer)
597 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
599 // Vector Compare Mask Greater Than (Integer)
600 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
602 // Vector Compare Mask Bitwise Test (Integer)
603 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
605 // Vector Compare Mask Less or Same (Unsigned Integer)
606 // CMLS is alias for CMHS with operands reversed.
607 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
608 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
609 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
610 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
611 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
612 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
613 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
615 // Vector Compare Mask Less Than or Equal (Integer)
616 // CMLE is alias for CMGE with operands reversed.
617 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
618 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
619 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
620 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
621 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
622 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
623 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
625 // Vector Compare Mask Lower (Unsigned Integer)
626 // CMLO is alias for CMHI with operands reversed.
627 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
628 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
629 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
630 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
631 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
632 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
633 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
635 // Vector Compare Mask Less Than (Integer)
636 // CMLT is alias for CMGT with operands reversed.
637 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
638 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
639 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
640 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
641 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
642 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
643 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
646 def neon_uimm0_asmoperand : AsmOperandClass
649 let PredicateMethod = "isUImm<0>";
650 let RenderMethod = "addImmOperands";
653 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
654 let ParserMatchClass = neon_uimm0_asmoperand;
655 let PrintMethod = "printNeonUImm0Operand";
659 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
661 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
662 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
663 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
664 [(set (v8i8 VPR64:$Rd),
665 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
668 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
669 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
670 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
671 [(set (v16i8 VPR128:$Rd),
672 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
675 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
676 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
677 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
678 [(set (v4i16 VPR64:$Rd),
679 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
682 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
683 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
684 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
685 [(set (v8i16 VPR128:$Rd),
686 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
689 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
690 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
691 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
692 [(set (v2i32 VPR64:$Rd),
693 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
696 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
697 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
698 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
699 [(set (v4i32 VPR128:$Rd),
700 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
703 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
704 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
705 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
706 [(set (v2i64 VPR128:$Rd),
707 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
711 // Vector Compare Mask Equal to Zero (Integer)
712 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
714 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
715 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
717 // Vector Compare Mask Greater Than Zero (Signed Integer)
718 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
720 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
721 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
723 // Vector Compare Mask Less Than Zero (Signed Integer)
724 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
726 // Vector Comparisons (Floating Point)
728 // Vector Compare Mask Equal (Floating Point)
729 let isCommutable =1 in {
730 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
731 Neon_cmeq, Neon_cmeq,
732 v2i32, v4i32, v2i64, 0>;
735 // Vector Compare Mask Greater Than Or Equal (Floating Point)
736 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
737 Neon_cmge, Neon_cmge,
738 v2i32, v4i32, v2i64, 0>;
740 // Vector Compare Mask Greater Than (Floating Point)
741 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
742 Neon_cmgt, Neon_cmgt,
743 v2i32, v4i32, v2i64, 0>;
745 // Vector Compare Mask Less Than Or Equal (Floating Point)
746 // FCMLE is alias for FCMGE with operands reversed.
747 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
748 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
749 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
751 // Vector Compare Mask Less Than (Floating Point)
752 // FCMLT is alias for FCMGT with operands reversed.
753 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
754 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
755 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
758 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
759 string asmop, CondCode CC>
761 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
762 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
763 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
764 [(set (v2i32 VPR64:$Rd),
765 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
768 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
769 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
770 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
771 [(set (v4i32 VPR128:$Rd),
772 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
775 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
776 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
777 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
778 [(set (v2i64 VPR128:$Rd),
779 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
783 // Vector Compare Mask Equal to Zero (Floating Point)
784 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
786 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
787 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
789 // Vector Compare Mask Greater Than Zero (Floating Point)
790 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
792 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
793 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
795 // Vector Compare Mask Less Than Zero (Floating Point)
796 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
798 // Vector Absolute Comparisons (Floating Point)
800 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
801 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
802 int_arm_neon_vacged, int_arm_neon_vacgeq,
803 int_aarch64_neon_vacgeq,
804 v2i32, v4i32, v2i64, 0>;
806 // Vector Absolute Compare Mask Greater Than (Floating Point)
807 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
808 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
809 int_aarch64_neon_vacgtq,
810 v2i32, v4i32, v2i64, 0>;
812 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
813 // FACLE is alias for FACGE with operands reversed.
814 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
815 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
816 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
818 // Vector Absolute Compare Mask Less Than (Floating Point)
819 // FACLT is alias for FACGT with operands reversed.
820 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
821 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
822 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
824 // Vector halving add (Integer Signed, Unsigned)
825 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
826 int_arm_neon_vhadds, 1>;
827 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
828 int_arm_neon_vhaddu, 1>;
830 // Vector halving sub (Integer Signed, Unsigned)
831 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
832 int_arm_neon_vhsubs, 0>;
833 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
834 int_arm_neon_vhsubu, 0>;
836 // Vector rouding halving add (Integer Signed, Unsigned)
837 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
838 int_arm_neon_vrhadds, 1>;
839 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
840 int_arm_neon_vrhaddu, 1>;
842 // Vector Saturating add (Integer Signed, Unsigned)
843 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
844 int_arm_neon_vqadds, 1>;
845 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
846 int_arm_neon_vqaddu, 1>;
848 // Vector Saturating sub (Integer Signed, Unsigned)
849 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
850 int_arm_neon_vqsubs, 1>;
851 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
852 int_arm_neon_vqsubu, 1>;
854 // Vector Shift Left (Signed and Unsigned Integer)
855 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
856 int_arm_neon_vshifts, 1>;
857 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
858 int_arm_neon_vshiftu, 1>;
860 // Vector Saturating Shift Left (Signed and Unsigned Integer)
861 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
862 int_arm_neon_vqshifts, 1>;
863 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
864 int_arm_neon_vqshiftu, 1>;
866 // Vector Rouding Shift Left (Signed and Unsigned Integer)
867 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
868 int_arm_neon_vrshifts, 1>;
869 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
870 int_arm_neon_vrshiftu, 1>;
872 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
873 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
874 int_arm_neon_vqrshifts, 1>;
875 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
876 int_arm_neon_vqrshiftu, 1>;
878 // Vector Maximum (Signed and Unsigned Integer)
879 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
880 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
882 // Vector Minimum (Signed and Unsigned Integer)
883 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
884 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
886 // Vector Maximum (Floating Point)
887 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
888 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
889 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
891 // Vector Minimum (Floating Point)
892 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
893 int_arm_neon_vmins, int_arm_neon_vmins,
894 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
896 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
897 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
898 int_aarch64_neon_vmaxnm,
899 int_aarch64_neon_vmaxnm,
900 int_aarch64_neon_vmaxnm,
901 v2f32, v4f32, v2f64, 1>;
903 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
904 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
905 int_aarch64_neon_vminnm,
906 int_aarch64_neon_vminnm,
907 int_aarch64_neon_vminnm,
908 v2f32, v4f32, v2f64, 1>;
910 // Vector Maximum Pairwise (Signed and Unsigned Integer)
911 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
912 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
914 // Vector Minimum Pairwise (Signed and Unsigned Integer)
915 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
916 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
918 // Vector Maximum Pairwise (Floating Point)
919 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
920 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
921 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
923 // Vector Minimum Pairwise (Floating Point)
924 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
925 int_arm_neon_vpmins, int_arm_neon_vpmins,
926 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
928 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
929 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
930 int_aarch64_neon_vpmaxnm,
931 int_aarch64_neon_vpmaxnm,
932 int_aarch64_neon_vpmaxnm,
933 v2f32, v4f32, v2f64, 1>;
935 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
936 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
937 int_aarch64_neon_vpminnm,
938 int_aarch64_neon_vpminnm,
939 int_aarch64_neon_vpminnm,
940 v2f32, v4f32, v2f64, 1>;
942 // Vector Addition Pairwise (Integer)
943 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
945 // Vector Addition Pairwise (Floating Point)
946 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
950 v2f32, v4f32, v2f64, 1>;
952 // Vector Saturating Doubling Multiply High
953 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
954 int_arm_neon_vqdmulh, 1>;
956 // Vector Saturating Rouding Doubling Multiply High
957 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
958 int_arm_neon_vqrdmulh, 1>;
960 // Vector Multiply Extended (Floating Point)
961 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
962 int_aarch64_neon_vmulx,
963 int_aarch64_neon_vmulx,
964 int_aarch64_neon_vmulx,
965 v2f32, v4f32, v2f64, 1>;
967 // Vector Immediate Instructions
969 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
971 def _asmoperand : AsmOperandClass
973 let Name = "NeonMovImmShift" # PREFIX;
974 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
975 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
979 // Definition of vector immediates shift operands
981 // The selectable use-cases extract the shift operation
982 // information from the OpCmode fields encoded in the immediate.
983 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
984 uint64_t OpCmode = N->getZExtValue();
986 unsigned ShiftOnesIn;
988 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
989 if (!HasShift) return SDValue();
990 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
993 // Vector immediates shift operands which accept LSL and MSL
994 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
995 // or 0, 8 (LSLH) or 8, 16 (MSL).
996 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
997 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
998 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
999 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1001 multiclass neon_mov_imm_shift_operands<string PREFIX,
1002 string HALF, string ISHALF, code pred>
1004 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1007 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1009 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1010 let ParserMatchClass =
1011 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1015 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1017 unsigned ShiftOnesIn;
1019 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1020 return (HasShift && !ShiftOnesIn);
1023 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1025 unsigned ShiftOnesIn;
1027 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1028 return (HasShift && ShiftOnesIn);
1031 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1033 unsigned ShiftOnesIn;
1035 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1036 return (HasShift && !ShiftOnesIn);
1039 def neon_uimm1_asmoperand : AsmOperandClass
1042 let PredicateMethod = "isUImm<1>";
1043 let RenderMethod = "addImmOperands";
1046 def neon_uimm2_asmoperand : AsmOperandClass
1049 let PredicateMethod = "isUImm<2>";
1050 let RenderMethod = "addImmOperands";
1053 def neon_uimm8_asmoperand : AsmOperandClass
1056 let PredicateMethod = "isUImm<8>";
1057 let RenderMethod = "addImmOperands";
1060 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1061 let ParserMatchClass = neon_uimm8_asmoperand;
1062 let PrintMethod = "printNeonUImm8Operand";
1065 def neon_uimm64_mask_asmoperand : AsmOperandClass
1067 let Name = "NeonUImm64Mask";
1068 let PredicateMethod = "isNeonUImm64Mask";
1069 let RenderMethod = "addNeonUImm64MaskOperands";
1072 // MCOperand for 64-bit bytemask with each byte having only the
1073 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1074 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1075 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1076 let PrintMethod = "printNeonUImm64MaskOperand";
1079 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1080 SDPatternOperator opnode>
1082 // shift zeros, per word
1083 def _2S : NeonI_1VModImm<0b0, op,
1085 (ins neon_uimm8:$Imm,
1086 neon_mov_imm_LSL_operand:$Simm),
1087 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1088 [(set (v2i32 VPR64:$Rd),
1089 (v2i32 (opnode (timm:$Imm),
1090 (neon_mov_imm_LSL_operand:$Simm))))],
1093 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1096 def _4S : NeonI_1VModImm<0b1, op,
1098 (ins neon_uimm8:$Imm,
1099 neon_mov_imm_LSL_operand:$Simm),
1100 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1101 [(set (v4i32 VPR128:$Rd),
1102 (v4i32 (opnode (timm:$Imm),
1103 (neon_mov_imm_LSL_operand:$Simm))))],
1106 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1109 // shift zeros, per halfword
1110 def _4H : NeonI_1VModImm<0b0, op,
1112 (ins neon_uimm8:$Imm,
1113 neon_mov_imm_LSLH_operand:$Simm),
1114 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1115 [(set (v4i16 VPR64:$Rd),
1116 (v4i16 (opnode (timm:$Imm),
1117 (neon_mov_imm_LSLH_operand:$Simm))))],
1120 let cmode = {0b1, 0b0, Simm, 0b0};
1123 def _8H : NeonI_1VModImm<0b1, op,
1125 (ins neon_uimm8:$Imm,
1126 neon_mov_imm_LSLH_operand:$Simm),
1127 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1128 [(set (v8i16 VPR128:$Rd),
1129 (v8i16 (opnode (timm:$Imm),
1130 (neon_mov_imm_LSLH_operand:$Simm))))],
1133 let cmode = {0b1, 0b0, Simm, 0b0};
1137 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1138 SDPatternOperator opnode,
1139 SDPatternOperator neonopnode>
1141 let Constraints = "$src = $Rd" in {
1142 // shift zeros, per word
1143 def _2S : NeonI_1VModImm<0b0, op,
1145 (ins VPR64:$src, neon_uimm8:$Imm,
1146 neon_mov_imm_LSL_operand:$Simm),
1147 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1148 [(set (v2i32 VPR64:$Rd),
1149 (v2i32 (opnode (v2i32 VPR64:$src),
1150 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1151 neon_mov_imm_LSL_operand:$Simm)))))))],
1154 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1157 def _4S : NeonI_1VModImm<0b1, op,
1159 (ins VPR128:$src, neon_uimm8:$Imm,
1160 neon_mov_imm_LSL_operand:$Simm),
1161 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1162 [(set (v4i32 VPR128:$Rd),
1163 (v4i32 (opnode (v4i32 VPR128:$src),
1164 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1165 neon_mov_imm_LSL_operand:$Simm)))))))],
1168 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1171 // shift zeros, per halfword
1172 def _4H : NeonI_1VModImm<0b0, op,
1174 (ins VPR64:$src, neon_uimm8:$Imm,
1175 neon_mov_imm_LSLH_operand:$Simm),
1176 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1177 [(set (v4i16 VPR64:$Rd),
1178 (v4i16 (opnode (v4i16 VPR64:$src),
1179 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1180 neon_mov_imm_LSL_operand:$Simm)))))))],
1183 let cmode = {0b1, 0b0, Simm, 0b1};
1186 def _8H : NeonI_1VModImm<0b1, op,
1188 (ins VPR128:$src, neon_uimm8:$Imm,
1189 neon_mov_imm_LSLH_operand:$Simm),
1190 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1191 [(set (v8i16 VPR128:$Rd),
1192 (v8i16 (opnode (v8i16 VPR128:$src),
1193 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1194 neon_mov_imm_LSL_operand:$Simm)))))))],
1197 let cmode = {0b1, 0b0, Simm, 0b1};
1202 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1203 SDPatternOperator opnode>
1205 // shift ones, per word
1206 def _2S : NeonI_1VModImm<0b0, op,
1208 (ins neon_uimm8:$Imm,
1209 neon_mov_imm_MSL_operand:$Simm),
1210 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1211 [(set (v2i32 VPR64:$Rd),
1212 (v2i32 (opnode (timm:$Imm),
1213 (neon_mov_imm_MSL_operand:$Simm))))],
1216 let cmode = {0b1, 0b1, 0b0, Simm};
1219 def _4S : NeonI_1VModImm<0b1, op,
1221 (ins neon_uimm8:$Imm,
1222 neon_mov_imm_MSL_operand:$Simm),
1223 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1224 [(set (v4i32 VPR128:$Rd),
1225 (v4i32 (opnode (timm:$Imm),
1226 (neon_mov_imm_MSL_operand:$Simm))))],
1229 let cmode = {0b1, 0b1, 0b0, Simm};
1233 // Vector Move Immediate Shifted
1234 let isReMaterializable = 1 in {
1235 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1238 // Vector Move Inverted Immediate Shifted
1239 let isReMaterializable = 1 in {
1240 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1243 // Vector Bitwise Bit Clear (AND NOT) - immediate
1244 let isReMaterializable = 1 in {
1245 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1249 // Vector Bitwise OR - immedidate
1251 let isReMaterializable = 1 in {
1252 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1256 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1257 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1258 // BIC immediate instructions selection requires additional patterns to
1259 // transform Neon_movi operands into BIC immediate operands
1261 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1262 uint64_t OpCmode = N->getZExtValue();
1264 unsigned ShiftOnesIn;
1265 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1266 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1267 // Transform encoded shift amount 0 to 1 and 1 to 0.
1268 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1271 def neon_mov_imm_LSLH_transform_operand
1274 unsigned ShiftOnesIn;
1276 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1277 return (HasShift && !ShiftOnesIn); }],
1278 neon_mov_imm_LSLH_transform_XFORM>;
1280 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1281 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1282 def : Pat<(v4i16 (and VPR64:$src,
1283 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1284 (BICvi_lsl_4H VPR64:$src, 0,
1285 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1287 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1288 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1289 def : Pat<(v8i16 (and VPR128:$src,
1290 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1291 (BICvi_lsl_8H VPR128:$src, 0,
1292 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1295 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1296 SDPatternOperator neonopnode,
1298 Instruction INST8H> {
1299 def : Pat<(v8i8 (opnode VPR64:$src,
1300 (bitconvert(v4i16 (neonopnode timm:$Imm,
1301 neon_mov_imm_LSLH_operand:$Simm))))),
1302 (INST4H VPR64:$src, neon_uimm8:$Imm,
1303 neon_mov_imm_LSLH_operand:$Simm)>;
1304 def : Pat<(v1i64 (opnode VPR64:$src,
1305 (bitconvert(v4i16 (neonopnode timm:$Imm,
1306 neon_mov_imm_LSLH_operand:$Simm))))),
1307 (INST4H VPR64:$src, neon_uimm8:$Imm,
1308 neon_mov_imm_LSLH_operand:$Simm)>;
1310 def : Pat<(v16i8 (opnode VPR128:$src,
1311 (bitconvert(v8i16 (neonopnode timm:$Imm,
1312 neon_mov_imm_LSLH_operand:$Simm))))),
1313 (INST8H VPR128:$src, neon_uimm8:$Imm,
1314 neon_mov_imm_LSLH_operand:$Simm)>;
1315 def : Pat<(v4i32 (opnode VPR128:$src,
1316 (bitconvert(v8i16 (neonopnode timm:$Imm,
1317 neon_mov_imm_LSLH_operand:$Simm))))),
1318 (INST8H VPR128:$src, neon_uimm8:$Imm,
1319 neon_mov_imm_LSLH_operand:$Simm)>;
1320 def : Pat<(v2i64 (opnode VPR128:$src,
1321 (bitconvert(v8i16 (neonopnode timm:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm))))),
1323 (INST8H VPR128:$src, neon_uimm8:$Imm,
1324 neon_mov_imm_LSLH_operand:$Simm)>;
1327 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1328 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1330 // Additional patterns for Vector Bitwise OR - immedidate
1331 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1334 // Vector Move Immediate Masked
1335 let isReMaterializable = 1 in {
1336 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1339 // Vector Move Inverted Immediate Masked
1340 let isReMaterializable = 1 in {
1341 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1344 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1345 Instruction inst, RegisterOperand VPRC>
1346 : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1347 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1349 // Aliases for Vector Move Immediate Shifted
1350 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1351 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1352 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1353 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1355 // Aliases for Vector Move Inverted Immediate Shifted
1356 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1357 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1358 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1359 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1361 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1362 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1363 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1364 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1365 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1367 // Aliases for Vector Bitwise OR - immedidate
1368 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1369 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1370 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1371 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1373 // Vector Move Immediate - per byte
1374 let isReMaterializable = 1 in {
1375 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1376 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1377 "movi\t$Rd.8b, $Imm",
1378 [(set (v8i8 VPR64:$Rd),
1379 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1384 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1385 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1386 "movi\t$Rd.16b, $Imm",
1387 [(set (v16i8 VPR128:$Rd),
1388 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1394 // Vector Move Immediate - bytemask, per double word
1395 let isReMaterializable = 1 in {
1396 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1397 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1398 "movi\t $Rd.2d, $Imm",
1399 [(set (v2i64 VPR128:$Rd),
1400 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1406 // Vector Move Immediate - bytemask, one doubleword
1408 let isReMaterializable = 1 in {
1409 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1410 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1412 [(set (f64 FPR64:$Rd),
1414 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1420 // Vector Floating Point Move Immediate
1422 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1423 Operand immOpType, bit q, bit op>
1424 : NeonI_1VModImm<q, op,
1425 (outs VPRC:$Rd), (ins immOpType:$Imm),
1426 "fmov\t$Rd" # asmlane # ", $Imm",
1427 [(set (OpTy VPRC:$Rd),
1428 (OpTy (Neon_fmovi (timm:$Imm))))],
1433 let isReMaterializable = 1 in {
1434 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1435 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1436 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1439 // Vector Shift (Immediate)
1440 // Immediate in [0, 63]
1441 def imm0_63 : Operand<i32> {
1442 let ParserMatchClass = uimm6_asmoperand;
1445 // Shift Right Immediate - A shift right immediate is encoded differently from
1446 // other shift immediates. The immh:immb field is encoded like so:
1449 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1450 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1451 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1452 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1453 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1454 let Name = "ShrImm" # OFFSET;
1455 let RenderMethod = "addImmOperands";
1456 let DiagnosticType = "ShrImm" # OFFSET;
1459 class shr_imm<string OFFSET> : Operand<i32> {
1460 let EncoderMethod = "getShiftRightImm" # OFFSET;
1461 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1462 let ParserMatchClass =
1463 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1466 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1467 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1468 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1469 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1471 def shr_imm8 : shr_imm<"8">;
1472 def shr_imm16 : shr_imm<"16">;
1473 def shr_imm32 : shr_imm<"32">;
1474 def shr_imm64 : shr_imm<"64">;
1476 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1477 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1478 : NeonI_2VShiftImm<q, u, opcode,
1479 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1480 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1481 [(set (Ty VPRC:$Rd),
1482 (Ty (OpNode (Ty VPRC:$Rn),
1483 (Ty (Neon_dupImm (i32 imm:$Imm))))))],
1486 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1487 // 64-bit vector types.
1488 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1489 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1492 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1493 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1496 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1497 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1500 // 128-bit vector types.
1501 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1502 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1505 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1506 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1509 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1510 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1513 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1514 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1518 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1519 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1521 let Inst{22-19} = 0b0001;
1524 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1526 let Inst{22-20} = 0b001;
1529 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1531 let Inst{22-21} = 0b01;
1534 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1536 let Inst{22-19} = 0b0001;
1539 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1541 let Inst{22-20} = 0b001;
1544 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1546 let Inst{22-21} = 0b01;
1549 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1556 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1559 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1560 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1562 def Neon_High16B : PatFrag<(ops node:$in),
1563 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1564 def Neon_High8H : PatFrag<(ops node:$in),
1565 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1566 def Neon_High4S : PatFrag<(ops node:$in),
1567 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1569 def Neon_low8H : PatFrag<(ops node:$in),
1570 (v4i16 (extract_subvector (v8i16 node:$in),
1572 def Neon_low4S : PatFrag<(ops node:$in),
1573 (v2i32 (extract_subvector (v4i32 node:$in),
1575 def Neon_low4f : PatFrag<(ops node:$in),
1576 (v2f32 (extract_subvector (v4f32 node:$in),
1579 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1580 string SrcT, ValueType DestTy, ValueType SrcTy,
1581 Operand ImmTy, SDPatternOperator ExtOp>
1582 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1583 (ins VPR64:$Rn, ImmTy:$Imm),
1584 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1585 [(set (DestTy VPR128:$Rd),
1587 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1588 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1591 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1592 string SrcT, ValueType DestTy, ValueType SrcTy,
1593 int StartIndex, Operand ImmTy,
1594 SDPatternOperator ExtOp, PatFrag getTop>
1595 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1596 (ins VPR128:$Rn, ImmTy:$Imm),
1597 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1598 [(set (DestTy VPR128:$Rd),
1601 (SrcTy (getTop VPR128:$Rn)))),
1602 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1605 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1607 // 64-bit vector types.
1608 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1610 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1613 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1615 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1618 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1620 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1623 // 128-bit vector types
1624 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1625 v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
1626 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1629 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1630 v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
1631 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1634 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1635 v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
1636 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1639 // Use other patterns to match when the immediate is 0.
1640 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1641 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1643 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1644 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1646 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1647 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1649 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1650 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1652 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1653 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1655 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1656 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1660 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1661 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1663 // Rounding/Saturating shift
1664 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1665 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1666 SDPatternOperator OpNode>
1667 : NeonI_2VShiftImm<q, u, opcode,
1668 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1669 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1670 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1674 // shift right (vector by immediate)
1675 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1676 SDPatternOperator OpNode> {
1677 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1679 let Inst{22-19} = 0b0001;
1682 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1684 let Inst{22-20} = 0b001;
1687 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1689 let Inst{22-21} = 0b01;
1692 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1694 let Inst{22-19} = 0b0001;
1697 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1699 let Inst{22-20} = 0b001;
1702 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1704 let Inst{22-21} = 0b01;
1707 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1713 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1714 SDPatternOperator OpNode> {
1715 // 64-bit vector types.
1716 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1718 let Inst{22-19} = 0b0001;
1721 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1723 let Inst{22-20} = 0b001;
1726 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1728 let Inst{22-21} = 0b01;
1731 // 128-bit vector types.
1732 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1734 let Inst{22-19} = 0b0001;
1737 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1739 let Inst{22-20} = 0b001;
1742 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1744 let Inst{22-21} = 0b01;
1747 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1753 // Rounding shift right
1754 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1755 int_aarch64_neon_vsrshr>;
1756 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1757 int_aarch64_neon_vurshr>;
1759 // Saturating shift left unsigned
1760 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1762 // Saturating shift left
1763 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1764 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1766 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1767 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1769 : NeonI_2VShiftImm<q, u, opcode,
1770 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1771 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1772 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1773 (Ty (OpNode (Ty VPRC:$Rn),
1774 (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
1776 let Constraints = "$src = $Rd";
1779 // Shift Right accumulate
1780 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1781 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1783 let Inst{22-19} = 0b0001;
1786 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1788 let Inst{22-20} = 0b001;
1791 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1793 let Inst{22-21} = 0b01;
1796 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1798 let Inst{22-19} = 0b0001;
1801 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1803 let Inst{22-20} = 0b001;
1806 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1808 let Inst{22-21} = 0b01;
1811 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1817 // Shift right and accumulate
1818 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1819 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1821 // Rounding shift accumulate
1822 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1823 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1824 SDPatternOperator OpNode>
1825 : NeonI_2VShiftImm<q, u, opcode,
1826 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1827 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1828 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1829 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1831 let Constraints = "$src = $Rd";
1834 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1835 SDPatternOperator OpNode> {
1836 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1838 let Inst{22-19} = 0b0001;
1841 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1843 let Inst{22-20} = 0b001;
1846 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1848 let Inst{22-21} = 0b01;
1851 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1853 let Inst{22-19} = 0b0001;
1856 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1858 let Inst{22-20} = 0b001;
1861 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1863 let Inst{22-21} = 0b01;
1866 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1872 // Rounding shift right and accumulate
1873 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1874 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1876 // Shift insert by immediate
1877 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1878 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1879 SDPatternOperator OpNode>
1880 : NeonI_2VShiftImm<q, u, opcode,
1881 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1882 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1883 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1886 let Constraints = "$src = $Rd";
1889 // shift left insert (vector by immediate)
1890 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1891 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1892 int_aarch64_neon_vsli> {
1893 let Inst{22-19} = 0b0001;
1896 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1897 int_aarch64_neon_vsli> {
1898 let Inst{22-20} = 0b001;
1901 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1902 int_aarch64_neon_vsli> {
1903 let Inst{22-21} = 0b01;
1906 // 128-bit vector types
1907 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1908 int_aarch64_neon_vsli> {
1909 let Inst{22-19} = 0b0001;
1912 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1913 int_aarch64_neon_vsli> {
1914 let Inst{22-20} = 0b001;
1917 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1918 int_aarch64_neon_vsli> {
1919 let Inst{22-21} = 0b01;
1922 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1923 int_aarch64_neon_vsli> {
1928 // shift right insert (vector by immediate)
1929 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1930 // 64-bit vector types.
1931 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1932 int_aarch64_neon_vsri> {
1933 let Inst{22-19} = 0b0001;
1936 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1937 int_aarch64_neon_vsri> {
1938 let Inst{22-20} = 0b001;
1941 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1942 int_aarch64_neon_vsri> {
1943 let Inst{22-21} = 0b01;
1946 // 128-bit vector types
1947 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1948 int_aarch64_neon_vsri> {
1949 let Inst{22-19} = 0b0001;
1952 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1953 int_aarch64_neon_vsri> {
1954 let Inst{22-20} = 0b001;
1957 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1958 int_aarch64_neon_vsri> {
1959 let Inst{22-21} = 0b01;
1962 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1963 int_aarch64_neon_vsri> {
1968 // Shift left and insert
1969 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1971 // Shift right and insert
1972 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1974 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1975 string SrcT, Operand ImmTy>
1976 : NeonI_2VShiftImm<q, u, opcode,
1977 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1978 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1981 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1982 string SrcT, Operand ImmTy>
1983 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1984 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
1985 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1987 let Constraints = "$src = $Rd";
1990 // left long shift by immediate
1991 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
1992 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
1993 let Inst{22-19} = 0b0001;
1996 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
1997 let Inst{22-20} = 0b001;
2000 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2001 let Inst{22-21} = 0b01;
2004 // Shift Narrow High
2005 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2007 let Inst{22-19} = 0b0001;
2010 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2012 let Inst{22-20} = 0b001;
2015 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2017 let Inst{22-21} = 0b01;
2021 // Shift right narrow
2022 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2024 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2025 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2026 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2027 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2028 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2029 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2030 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2031 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2033 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2034 (v2i64 (concat_vectors (v1i64 node:$Rm),
2035 (v1i64 node:$Rn)))>;
2036 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2037 (v8i16 (concat_vectors (v4i16 node:$Rm),
2038 (v4i16 node:$Rn)))>;
2039 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2040 (v4i32 (concat_vectors (v2i32 node:$Rm),
2041 (v2i32 node:$Rn)))>;
2042 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2043 (v4f32 (concat_vectors (v2f32 node:$Rm),
2044 (v2f32 node:$Rn)))>;
2045 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2046 (v2f64 (concat_vectors (v1f64 node:$Rm),
2047 (v1f64 node:$Rn)))>;
2049 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2050 (v8i16 (srl (v8i16 node:$lhs),
2051 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2052 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2053 (v4i32 (srl (v4i32 node:$lhs),
2054 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2055 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2056 (v2i64 (srl (v2i64 node:$lhs),
2057 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2058 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2059 (v8i16 (sra (v8i16 node:$lhs),
2060 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2061 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2062 (v4i32 (sra (v4i32 node:$lhs),
2063 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2064 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2065 (v2i64 (sra (v2i64 node:$lhs),
2066 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2068 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2069 multiclass Neon_shiftNarrow_patterns<string shr> {
2070 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2072 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2073 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2075 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2076 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2078 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2080 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2081 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2082 VPR128:$Rn, imm:$Imm)))))),
2083 (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2084 VPR128:$Rn, imm:$Imm)>;
2085 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2086 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2087 VPR128:$Rn, imm:$Imm)))))),
2088 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2089 VPR128:$Rn, imm:$Imm)>;
2090 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2091 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2092 VPR128:$Rn, imm:$Imm)))))),
2093 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2094 VPR128:$Rn, imm:$Imm)>;
2097 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2098 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2099 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2100 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2101 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2102 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2103 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2105 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2106 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2107 (!cast<Instruction>(prefix # "_16B")
2108 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2109 VPR128:$Rn, imm:$Imm)>;
2110 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2111 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2112 (!cast<Instruction>(prefix # "_8H")
2113 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2114 VPR128:$Rn, imm:$Imm)>;
2115 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2116 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2117 (!cast<Instruction>(prefix # "_4S")
2118 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2119 VPR128:$Rn, imm:$Imm)>;
2122 defm : Neon_shiftNarrow_patterns<"lshr">;
2123 defm : Neon_shiftNarrow_patterns<"ashr">;
2125 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2126 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2127 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2128 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2129 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2130 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2131 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2133 // Convert fix-point and float-pointing
2134 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2135 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2136 Operand ImmTy, SDPatternOperator IntOp>
2137 : NeonI_2VShiftImm<q, u, opcode,
2138 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2139 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2140 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2144 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2145 SDPatternOperator IntOp> {
2146 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2148 let Inst{22-21} = 0b01;
2151 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2153 let Inst{22-21} = 0b01;
2156 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2162 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2163 SDPatternOperator IntOp> {
2164 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2166 let Inst{22-21} = 0b01;
2169 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2171 let Inst{22-21} = 0b01;
2174 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2180 // Convert fixed-point to floating-point
2181 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2182 int_arm_neon_vcvtfxs2fp>;
2183 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2184 int_arm_neon_vcvtfxu2fp>;
2186 // Convert floating-point to fixed-point
2187 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2188 int_arm_neon_vcvtfp2fxs>;
2189 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2190 int_arm_neon_vcvtfp2fxu>;
2192 multiclass Neon_sshll2_0<SDNode ext>
2194 def _v8i8 : PatFrag<(ops node:$Rn),
2195 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2196 def _v4i16 : PatFrag<(ops node:$Rn),
2197 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2198 def _v2i32 : PatFrag<(ops node:$Rn),
2199 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2202 defm NI_sext_high : Neon_sshll2_0<sext>;
2203 defm NI_zext_high : Neon_sshll2_0<zext>;
2206 //===----------------------------------------------------------------------===//
2207 // Multiclasses for NeonI_Across
2208 //===----------------------------------------------------------------------===//
2212 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2213 string asmop, SDPatternOperator opnode>
2215 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2216 (outs FPR16:$Rd), (ins VPR64:$Rn),
2217 asmop # "\t$Rd, $Rn.8b",
2218 [(set (v1i16 FPR16:$Rd),
2219 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2222 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2223 (outs FPR16:$Rd), (ins VPR128:$Rn),
2224 asmop # "\t$Rd, $Rn.16b",
2225 [(set (v1i16 FPR16:$Rd),
2226 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2229 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2230 (outs FPR32:$Rd), (ins VPR64:$Rn),
2231 asmop # "\t$Rd, $Rn.4h",
2232 [(set (v1i32 FPR32:$Rd),
2233 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2236 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2237 (outs FPR32:$Rd), (ins VPR128:$Rn),
2238 asmop # "\t$Rd, $Rn.8h",
2239 [(set (v1i32 FPR32:$Rd),
2240 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2243 // _1d2s doesn't exist!
2245 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2246 (outs FPR64:$Rd), (ins VPR128:$Rn),
2247 asmop # "\t$Rd, $Rn.4s",
2248 [(set (v1i64 FPR64:$Rd),
2249 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2253 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2254 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2258 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2259 string asmop, SDPatternOperator opnode>
2261 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2262 (outs FPR8:$Rd), (ins VPR64:$Rn),
2263 asmop # "\t$Rd, $Rn.8b",
2264 [(set (v1i8 FPR8:$Rd),
2265 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2268 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2269 (outs FPR8:$Rd), (ins VPR128:$Rn),
2270 asmop # "\t$Rd, $Rn.16b",
2271 [(set (v1i8 FPR8:$Rd),
2272 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2275 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2276 (outs FPR16:$Rd), (ins VPR64:$Rn),
2277 asmop # "\t$Rd, $Rn.4h",
2278 [(set (v1i16 FPR16:$Rd),
2279 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2282 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2283 (outs FPR16:$Rd), (ins VPR128:$Rn),
2284 asmop # "\t$Rd, $Rn.8h",
2285 [(set (v1i16 FPR16:$Rd),
2286 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2289 // _1s2s doesn't exist!
2291 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2292 (outs FPR32:$Rd), (ins VPR128:$Rn),
2293 asmop # "\t$Rd, $Rn.4s",
2294 [(set (v1i32 FPR32:$Rd),
2295 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2299 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2300 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2302 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2303 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2305 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2309 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2310 string asmop, SDPatternOperator opnode>
2312 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2313 (outs FPR32:$Rd), (ins VPR128:$Rn),
2314 asmop # "\t$Rd, $Rn.4s",
2315 [(set (v1f32 FPR32:$Rd),
2316 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2320 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2321 int_aarch64_neon_vmaxnmv>;
2322 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2323 int_aarch64_neon_vminnmv>;
2325 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2326 int_aarch64_neon_vmaxv>;
2327 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2328 int_aarch64_neon_vminv>;
2330 // The followings are for instruction class (3V Diff)
2332 // normal long/long2 pattern
2333 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2334 string asmop, string ResS, string OpS,
2335 SDPatternOperator opnode, SDPatternOperator ext,
2336 RegisterOperand OpVPR,
2337 ValueType ResTy, ValueType OpTy>
2338 : NeonI_3VDiff<q, u, size, opcode,
2339 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2340 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2341 [(set (ResTy VPR128:$Rd),
2342 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2343 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2346 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2347 string asmop, SDPatternOperator opnode,
2350 let isCommutable = Commutable in {
2351 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2352 opnode, sext, VPR64, v8i16, v8i8>;
2353 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2354 opnode, sext, VPR64, v4i32, v4i16>;
2355 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2356 opnode, sext, VPR64, v2i64, v2i32>;
2360 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2361 string asmop, SDPatternOperator opnode,
2364 let isCommutable = Commutable in {
2365 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2366 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2367 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2368 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2369 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2370 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2374 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2375 string asmop, SDPatternOperator opnode,
2378 let isCommutable = Commutable in {
2379 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2380 opnode, zext, VPR64, v8i16, v8i8>;
2381 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2382 opnode, zext, VPR64, v4i32, v4i16>;
2383 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2384 opnode, zext, VPR64, v2i64, v2i32>;
2388 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2389 string asmop, SDPatternOperator opnode,
2392 let isCommutable = Commutable in {
2393 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2394 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2395 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2396 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2397 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2398 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2402 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2403 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2405 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2406 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2408 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2409 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2411 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2412 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2414 // normal wide/wide2 pattern
2415 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2416 string asmop, string ResS, string OpS,
2417 SDPatternOperator opnode, SDPatternOperator ext,
2418 RegisterOperand OpVPR,
2419 ValueType ResTy, ValueType OpTy>
2420 : NeonI_3VDiff<q, u, size, opcode,
2421 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2422 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2423 [(set (ResTy VPR128:$Rd),
2424 (ResTy (opnode (ResTy VPR128:$Rn),
2425 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2428 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2429 string asmop, SDPatternOperator opnode>
2431 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2432 opnode, sext, VPR64, v8i16, v8i8>;
2433 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2434 opnode, sext, VPR64, v4i32, v4i16>;
2435 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2436 opnode, sext, VPR64, v2i64, v2i32>;
2439 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2440 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2442 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2443 string asmop, SDPatternOperator opnode>
2445 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2446 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2447 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2448 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2449 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2450 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2453 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2454 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2456 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2457 string asmop, SDPatternOperator opnode>
2459 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2460 opnode, zext, VPR64, v8i16, v8i8>;
2461 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2462 opnode, zext, VPR64, v4i32, v4i16>;
2463 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2464 opnode, zext, VPR64, v2i64, v2i32>;
2467 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2468 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2470 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2471 string asmop, SDPatternOperator opnode>
2473 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2474 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2475 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2476 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2477 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2478 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2481 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2482 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2484 // Get the high half part of the vector element.
2485 multiclass NeonI_get_high
2487 def _8h : PatFrag<(ops node:$Rn),
2488 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2489 (v8i16 (Neon_dupImm 8))))))>;
2490 def _4s : PatFrag<(ops node:$Rn),
2491 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2492 (v4i32 (Neon_dupImm 16))))))>;
2493 def _2d : PatFrag<(ops node:$Rn),
2494 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2495 (v2i64 (Neon_dupImm 32))))))>;
2498 defm NI_get_hi : NeonI_get_high;
2500 // pattern for addhn/subhn with 2 operands
2501 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2502 string asmop, string ResS, string OpS,
2503 SDPatternOperator opnode, SDPatternOperator get_hi,
2504 ValueType ResTy, ValueType OpTy>
2505 : NeonI_3VDiff<q, u, size, opcode,
2506 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2507 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2508 [(set (ResTy VPR64:$Rd),
2510 (OpTy (opnode (OpTy VPR128:$Rn),
2511 (OpTy VPR128:$Rm))))))],
2514 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2515 string asmop, SDPatternOperator opnode,
2518 let isCommutable = Commutable in {
2519 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2520 opnode, NI_get_hi_8h, v8i8, v8i16>;
2521 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2522 opnode, NI_get_hi_4s, v4i16, v4i32>;
2523 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2524 opnode, NI_get_hi_2d, v2i32, v2i64>;
2528 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2529 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2531 // pattern for operation with 2 operands
2532 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2533 string asmop, string ResS, string OpS,
2534 SDPatternOperator opnode,
2535 RegisterOperand ResVPR, RegisterOperand OpVPR,
2536 ValueType ResTy, ValueType OpTy>
2537 : NeonI_3VDiff<q, u, size, opcode,
2538 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2539 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2540 [(set (ResTy ResVPR:$Rd),
2541 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2544 // normal narrow pattern
2545 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2546 string asmop, SDPatternOperator opnode,
2549 let isCommutable = Commutable in {
2550 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2551 opnode, VPR64, VPR128, v8i8, v8i16>;
2552 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2553 opnode, VPR64, VPR128, v4i16, v4i32>;
2554 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2555 opnode, VPR64, VPR128, v2i32, v2i64>;
2559 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2560 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2562 // pattern for acle intrinsic with 3 operands
2563 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2564 string asmop, string ResS, string OpS>
2565 : NeonI_3VDiff<q, u, size, opcode,
2566 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2567 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2569 let Constraints = "$src = $Rd";
2570 let neverHasSideEffects = 1;
2573 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2575 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2576 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2577 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2580 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2581 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2583 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2584 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2586 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2588 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2589 SDPatternOperator coreop>
2590 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2591 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2592 (SrcTy VPR128:$Rm)))))),
2593 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2594 VPR128:$Rn, VPR128:$Rm)>;
2597 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2598 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2599 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2600 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2601 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2602 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2605 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2606 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2607 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2608 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2609 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2610 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2613 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2614 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2615 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2618 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2619 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2620 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2622 // pattern that need to extend result
2623 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2624 string asmop, string ResS, string OpS,
2625 SDPatternOperator opnode,
2626 RegisterOperand OpVPR,
2627 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2628 : NeonI_3VDiff<q, u, size, opcode,
2629 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2630 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2631 [(set (ResTy VPR128:$Rd),
2632 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2633 (OpTy OpVPR:$Rm))))))],
2636 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2637 string asmop, SDPatternOperator opnode,
2640 let isCommutable = Commutable in {
2641 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2642 opnode, VPR64, v8i16, v8i8, v8i8>;
2643 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2644 opnode, VPR64, v4i32, v4i16, v4i16>;
2645 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2646 opnode, VPR64, v2i64, v2i32, v2i32>;
2650 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2651 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2653 multiclass NeonI_Op_High<SDPatternOperator op>
2655 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2656 (op (v8i8 (Neon_High16B node:$Rn)), (v8i8 (Neon_High16B node:$Rm)))>;
2657 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2658 (op (v4i16 (Neon_High8H node:$Rn)), (v4i16 (Neon_High8H node:$Rm)))>;
2659 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2660 (op (v2i32 (Neon_High4S node:$Rn)), (v2i32 (Neon_High4S node:$Rm)))>;
2664 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2665 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2666 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2667 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2668 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2669 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2671 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2672 string asmop, string opnode,
2675 let isCommutable = Commutable in {
2676 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2677 !cast<PatFrag>(opnode # "_16B"),
2678 VPR128, v8i16, v16i8, v8i8>;
2679 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2680 !cast<PatFrag>(opnode # "_8H"),
2681 VPR128, v4i32, v8i16, v4i16>;
2682 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2683 !cast<PatFrag>(opnode # "_4S"),
2684 VPR128, v2i64, v4i32, v2i32>;
2688 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2689 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2691 // For pattern that need two operators being chained.
2692 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2693 string asmop, string ResS, string OpS,
2694 SDPatternOperator opnode, SDPatternOperator subop,
2695 RegisterOperand OpVPR,
2696 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2697 : NeonI_3VDiff<q, u, size, opcode,
2698 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2699 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2700 [(set (ResTy VPR128:$Rd),
2702 (ResTy VPR128:$src),
2703 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2704 (OpTy OpVPR:$Rm))))))))],
2706 let Constraints = "$src = $Rd";
2709 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2710 string asmop, SDPatternOperator opnode,
2711 SDPatternOperator subop>
2713 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2714 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2715 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2716 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2717 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2718 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2721 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2722 add, int_arm_neon_vabds>;
2723 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2724 add, int_arm_neon_vabdu>;
2726 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2727 string asmop, SDPatternOperator opnode,
2730 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2731 opnode, !cast<PatFrag>(subop # "_16B"),
2732 VPR128, v8i16, v16i8, v8i8>;
2733 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2734 opnode, !cast<PatFrag>(subop # "_8H"),
2735 VPR128, v4i32, v8i16, v4i16>;
2736 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2737 opnode, !cast<PatFrag>(subop # "_4S"),
2738 VPR128, v2i64, v4i32, v2i32>;
2741 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2743 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2746 // Long pattern with 2 operands
2747 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2748 string asmop, SDPatternOperator opnode,
2751 let isCommutable = Commutable in {
2752 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2753 opnode, VPR128, VPR64, v8i16, v8i8>;
2754 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2755 opnode, VPR128, VPR64, v4i32, v4i16>;
2756 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2757 opnode, VPR128, VPR64, v2i64, v2i32>;
2761 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2762 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2764 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2765 string asmop, string ResS, string OpS,
2766 SDPatternOperator opnode,
2767 ValueType ResTy, ValueType OpTy>
2768 : NeonI_3VDiff<q, u, size, opcode,
2769 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2770 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2771 [(set (ResTy VPR128:$Rd),
2772 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2776 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2781 let isCommutable = Commutable in {
2782 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2783 !cast<PatFrag>(opnode # "_16B"),
2785 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2786 !cast<PatFrag>(opnode # "_8H"),
2788 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2789 !cast<PatFrag>(opnode # "_4S"),
2794 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2796 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2799 // Long pattern with 3 operands
2800 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2801 string asmop, string ResS, string OpS,
2802 SDPatternOperator opnode,
2803 ValueType ResTy, ValueType OpTy>
2804 : NeonI_3VDiff<q, u, size, opcode,
2805 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2806 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2807 [(set (ResTy VPR128:$Rd),
2809 (ResTy VPR128:$src),
2810 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2812 let Constraints = "$src = $Rd";
2815 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2816 string asmop, SDPatternOperator opnode>
2818 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2819 opnode, v8i16, v8i8>;
2820 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2821 opnode, v4i32, v4i16>;
2822 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2823 opnode, v2i64, v2i32>;
2826 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2828 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2830 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2832 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2834 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2836 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2838 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2840 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2842 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2843 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2845 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2846 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2848 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2849 string asmop, string ResS, string OpS,
2850 SDPatternOperator subop, SDPatternOperator opnode,
2851 RegisterOperand OpVPR,
2852 ValueType ResTy, ValueType OpTy>
2853 : NeonI_3VDiff<q, u, size, opcode,
2854 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2855 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2856 [(set (ResTy VPR128:$Rd),
2858 (ResTy VPR128:$src),
2859 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2861 let Constraints = "$src = $Rd";
2864 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2866 SDPatternOperator subop,
2869 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2870 subop, !cast<PatFrag>(opnode # "_16B"),
2871 VPR128, v8i16, v16i8>;
2872 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2873 subop, !cast<PatFrag>(opnode # "_8H"),
2874 VPR128, v4i32, v8i16>;
2875 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2876 subop, !cast<PatFrag>(opnode # "_4S"),
2877 VPR128, v2i64, v4i32>;
2880 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2881 add, "NI_smull_hi">;
2882 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2883 add, "NI_umull_hi">;
2885 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2886 sub, "NI_smull_hi">;
2887 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2888 sub, "NI_umull_hi">;
2890 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2891 string asmop, SDPatternOperator opnode>
2893 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2894 opnode, int_arm_neon_vqdmull,
2895 VPR64, v4i32, v4i16>;
2896 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2897 opnode, int_arm_neon_vqdmull,
2898 VPR64, v2i64, v2i32>;
2901 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2902 int_arm_neon_vqadds>;
2903 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2904 int_arm_neon_vqsubs>;
2906 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2907 string asmop, SDPatternOperator opnode,
2910 let isCommutable = Commutable in {
2911 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2912 opnode, VPR128, VPR64, v4i32, v4i16>;
2913 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2914 opnode, VPR128, VPR64, v2i64, v2i32>;
2918 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2919 int_arm_neon_vqdmull, 1>;
2921 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2926 let isCommutable = Commutable in {
2927 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2928 !cast<PatFrag>(opnode # "_8H"),
2930 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2931 !cast<PatFrag>(opnode # "_4S"),
2936 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2939 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2941 SDPatternOperator opnode>
2943 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2944 opnode, NI_qdmull_hi_8H,
2945 VPR128, v4i32, v8i16>;
2946 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2947 opnode, NI_qdmull_hi_4S,
2948 VPR128, v2i64, v4i32>;
2951 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2952 int_arm_neon_vqadds>;
2953 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2954 int_arm_neon_vqsubs>;
2956 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2957 string asmop, SDPatternOperator opnode,
2960 let isCommutable = Commutable in {
2961 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2962 opnode, VPR128, VPR64, v8i16, v8i8>;
2966 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2968 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
2973 let isCommutable = Commutable in {
2974 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2975 !cast<PatFrag>(opnode # "_16B"),
2980 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
2983 // End of implementation for instruction class (3V Diff)
2985 // The followings are vector load/store multiple N-element structure
2986 // (class SIMD lselem).
2988 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
2989 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
2990 // The structure consists of a sequence of sets of N values.
2991 // The first element of the structure is placed in the first lane
2992 // of the first first vector, the second element in the first lane
2993 // of the second vector, and so on.
2994 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
2995 // the three 64-bit vectors list {BA, DC, FE}.
2996 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
2997 // 64-bit vectors list {DA, EB, FC}.
2998 // Store instructions store multiple structure to N registers like load.
3001 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3002 RegisterOperand VecList, string asmop>
3003 : NeonI_LdStMult<q, 1, opcode, size,
3004 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3005 asmop # "\t$Rt, [$Rn]",
3009 let neverHasSideEffects = 1;
3012 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3013 def _8B : NeonI_LDVList<0, opcode, 0b00,
3014 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3016 def _4H : NeonI_LDVList<0, opcode, 0b01,
3017 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3019 def _2S : NeonI_LDVList<0, opcode, 0b10,
3020 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3022 def _16B : NeonI_LDVList<1, opcode, 0b00,
3023 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3025 def _8H : NeonI_LDVList<1, opcode, 0b01,
3026 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3028 def _4S : NeonI_LDVList<1, opcode, 0b10,
3029 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3031 def _2D : NeonI_LDVList<1, opcode, 0b11,
3032 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3035 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3036 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3037 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3039 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3041 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3043 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3045 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3046 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3047 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3049 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3050 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3052 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3053 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3055 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3056 RegisterOperand VecList, string asmop>
3057 : NeonI_LdStMult<q, 0, opcode, size,
3058 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3059 asmop # "\t$Rt, [$Rn]",
3063 let neverHasSideEffects = 1;
3066 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3067 def _8B : NeonI_STVList<0, opcode, 0b00,
3068 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3070 def _4H : NeonI_STVList<0, opcode, 0b01,
3071 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3073 def _2S : NeonI_STVList<0, opcode, 0b10,
3074 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3076 def _16B : NeonI_STVList<1, opcode, 0b00,
3077 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3079 def _8H : NeonI_STVList<1, opcode, 0b01,
3080 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3082 def _4S : NeonI_STVList<1, opcode, 0b10,
3083 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3085 def _2D : NeonI_STVList<1, opcode, 0b11,
3086 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3089 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3090 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3091 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3093 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3095 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3097 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3099 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3100 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3101 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3103 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3104 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3106 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3107 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3109 // End of vector load/store multiple N-element structure(class SIMD lselem)
3111 // Scalar Arithmetic
3113 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3114 : NeonI_Scalar3Same<u, 0b11, opcode,
3115 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3116 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3120 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode,
3121 string asmop, bit Commutable = 0>
3123 let isCommutable = Commutable in {
3124 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3125 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3126 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3129 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3130 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3131 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3137 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3138 string asmop, bit Commutable = 0>
3140 let isCommutable = Commutable in {
3141 def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode,
3142 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3143 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3146 def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode,
3147 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3148 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3154 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3155 string asmop, bit Commutable = 0>
3157 let isCommutable = Commutable in {
3158 def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
3159 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
3160 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3163 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3164 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3165 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3168 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3169 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3170 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3173 def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
3174 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3175 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3181 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3182 Instruction INSTD> {
3183 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3184 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3187 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3192 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3193 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3194 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3196 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3197 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3199 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3200 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3203 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3205 Instruction INSTS> {
3206 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3207 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3208 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3209 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3212 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3214 Instruction INSTD> {
3215 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3216 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3217 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3218 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3221 // Scalar Two Registers Miscellaneous
3223 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3225 def ss : NeonI_Scalar2SameMisc<u, {size_high, 0b0}, opcode,
3226 (outs FPR32:$Rd), (ins FPR32:$Rn),
3227 !strconcat(asmop, " $Rd, $Rn"),
3229 def dd : NeonI_Scalar2SameMisc<u, {size_high, 0b1}, opcode,
3230 (outs FPR64:$Rd), (ins FPR64:$Rn),
3231 !strconcat(asmop, " $Rd, $Rn"),
3235 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3236 SDPatternOperator Dopnode,
3238 Instruction INSTD> {
3239 def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
3241 def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
3245 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3247 Instruction INSTD> {
3248 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3250 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3254 // Scalar Integer Add
3255 let isCommutable = 1 in {
3256 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
3259 // Scalar Integer Sub
3260 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
3262 // Pattern for Scalar Integer Add and Sub with D register only
3263 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
3264 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
3266 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
3267 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
3268 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
3269 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
3270 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
3272 // Scalar Integer Saturating Add (Signed, Unsigned)
3273 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
3274 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
3276 // Scalar Integer Saturating Sub (Signed, Unsigned)
3277 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
3278 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
3280 // Patterns to match llvm.arm.* intrinsic for
3281 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3282 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
3283 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
3284 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
3285 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
3287 // Patterns to match llvm.aarch64.* intrinsic for
3288 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3289 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
3290 SQADDhhh, SQADDsss, SQADDddd>;
3291 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
3292 UQADDhhh, UQADDsss, UQADDddd>;
3293 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
3294 SQSUBhhh, SQSUBsss, SQSUBddd>;
3295 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
3296 UQSUBhhh, UQSUBsss, UQSUBddd>;
3298 // Scalar Integer Saturating Doubling Multiply Half High
3299 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
3301 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3302 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
3304 // Patterns to match llvm.arm.* intrinsic for
3305 // Scalar Integer Saturating Doubling Multiply Half High and
3306 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3307 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
3309 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
3312 // Scalar Floating-point Multiply Extended
3313 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
3315 // Scalar Floating-point Reciprocal Step
3316 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
3318 // Scalar Floating-point Reciprocal Square Root Step
3319 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
3321 // Patterns to match llvm.arm.* intrinsic for
3322 // Scalar Floating-point Reciprocal Step and
3323 // Scalar Floating-point Reciprocal Square Root Step
3324 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
3326 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
3329 // Patterns to match llvm.aarch64.* intrinsic for
3330 // Scalar Floating-point Multiply Extended,
3331 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
3334 // Scalar Integer Shift Left (Signed, Unsigned)
3335 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
3336 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
3338 // Patterns to match llvm.arm.* intrinsic for
3339 // Scalar Integer Shift Left (Signed, Unsigned)
3340 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
3341 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
3343 // Patterns to match llvm.aarch64.* intrinsic for
3344 // Scalar Integer Shift Left (Signed, Unsigned)
3345 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
3346 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
3348 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
3349 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
3350 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
3352 // Patterns to match llvm.aarch64.* intrinsic for
3353 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3354 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
3355 SQSHLhhh, SQSHLsss, SQSHLddd>;
3356 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
3357 UQSHLhhh, UQSHLsss, UQSHLddd>;
3359 // Patterns to match llvm.arm.* intrinsic for
3360 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3361 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
3362 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
3364 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3365 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
3366 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
3368 // Patterns to match llvm.aarch64.* intrinsic for
3369 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3370 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
3371 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
3373 // Patterns to match llvm.arm.* intrinsic for
3374 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3375 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
3376 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
3378 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3379 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
3380 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
3382 // Patterns to match llvm.aarch64.* intrinsic for
3383 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3384 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
3385 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
3386 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
3387 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
3389 // Patterns to match llvm.arm.* intrinsic for
3390 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3391 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
3392 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
3394 // Scalar Signed Integer Convert To Floating-point
3395 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
3396 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
3397 int_aarch64_neon_vcvtf64_s64,
3400 // Scalar Unsigned Integer Convert To Floating-point
3401 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
3402 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
3403 int_aarch64_neon_vcvtf64_u64,
3406 // Scalar Floating-point Reciprocal Estimate
3407 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
3408 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
3409 FRECPEss, FRECPEdd>;
3411 // Scalar Floating-point Reciprocal Exponent
3412 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
3413 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
3414 FRECPXss, FRECPXdd>;
3416 // Scalar Floating-point Reciprocal Square Root Estimate
3417 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
3418 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
3419 FRSQRTEss, FRSQRTEdd>;
3421 // Scalar Reduce Pairwise
3423 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
3424 string asmop, bit Commutable = 0> {
3425 let isCommutable = Commutable in {
3426 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
3427 (outs FPR64:$Rd), (ins VPR128:$Rn),
3428 !strconcat(asmop, " $Rd, $Rn.2d"),
3434 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
3435 string asmop, bit Commutable = 0>
3436 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
3437 let isCommutable = Commutable in {
3438 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
3439 (outs FPR32:$Rd), (ins VPR64:$Rn),
3440 !strconcat(asmop, " $Rd, $Rn.2s"),
3446 // Scalar Reduce Addition Pairwise (Integer) with
3447 // Pattern to match llvm.arm.* intrinsic
3448 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
3450 // Pattern to match llvm.aarch64.* intrinsic for
3451 // Scalar Reduce Addition Pairwise (Integer)
3452 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
3453 (ADDPvv_D_2D VPR128:$Rn)>;
3455 // Scalar Reduce Addition Pairwise (Floating Point)
3456 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
3458 // Scalar Reduce Maximum Pairwise (Floating Point)
3459 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
3461 // Scalar Reduce Minimum Pairwise (Floating Point)
3462 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
3464 // Scalar Reduce maxNum Pairwise (Floating Point)
3465 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
3467 // Scalar Reduce minNum Pairwise (Floating Point)
3468 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
3470 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
3471 SDPatternOperator opnodeD,
3473 Instruction INSTD> {
3474 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
3476 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
3477 (INSTD VPR128:$Rn)>;
3480 // Patterns to match llvm.aarch64.* intrinsic for
3481 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
3482 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
3483 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
3485 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
3486 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
3488 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
3489 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
3491 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
3492 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
3494 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
3495 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
3499 //===----------------------------------------------------------------------===//
3500 // Non-Instruction Patterns
3501 //===----------------------------------------------------------------------===//
3503 // 64-bit vector bitcasts...
3505 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
3506 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
3507 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
3508 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
3510 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
3511 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
3512 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
3513 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
3515 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
3516 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
3517 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
3518 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
3520 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
3521 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
3522 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
3523 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
3525 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
3526 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3527 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3528 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3530 // ..and 128-bit vector bitcasts...
3532 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
3533 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
3534 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
3535 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
3536 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
3538 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
3539 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
3540 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
3541 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
3542 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
3544 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
3545 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
3546 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
3547 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
3548 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
3550 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
3551 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
3552 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
3553 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
3554 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
3556 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
3557 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
3558 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
3559 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
3560 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
3562 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
3563 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
3564 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
3565 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
3566 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
3569 // ...and scalar bitcasts...
3570 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
3571 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
3572 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
3573 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
3574 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
3576 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
3577 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
3579 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3580 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3581 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3583 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
3584 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
3585 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
3586 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
3587 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
3589 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
3590 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
3591 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
3592 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
3593 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
3594 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
3596 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
3597 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
3598 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3599 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
3600 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
3602 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
3603 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
3605 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
3606 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
3607 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
3608 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
3609 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3611 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
3612 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
3613 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
3614 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
3615 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
3616 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
3618 def neon_uimm0_bare : Operand<i64>,
3619 ImmLeaf<i64, [{return Imm == 0;}]> {
3620 let ParserMatchClass = neon_uimm0_asmoperand;
3621 let PrintMethod = "printNeonUImm8OperandBare";
3624 def neon_uimm1_bare : Operand<i64>,
3625 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3626 let ParserMatchClass = neon_uimm1_asmoperand;
3627 let PrintMethod = "printNeonUImm8OperandBare";
3630 def neon_uimm2_bare : Operand<i64>,
3631 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3632 let ParserMatchClass = neon_uimm2_asmoperand;
3633 let PrintMethod = "printNeonUImm8OperandBare";
3636 def neon_uimm3_bare : Operand<i64>,
3637 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3638 let ParserMatchClass = uimm3_asmoperand;
3639 let PrintMethod = "printNeonUImm8OperandBare";
3642 def neon_uimm4_bare : Operand<i64>,
3643 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3644 let ParserMatchClass = uimm4_asmoperand;
3645 let PrintMethod = "printNeonUImm8OperandBare";
3648 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
3649 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
3650 : NeonI_copy<0b1, 0b0, 0b0011,
3651 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
3652 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
3653 [(set (ResTy VPR128:$Rd),
3654 (ResTy (vector_insert
3655 (ResTy VPR128:$src),
3660 let Constraints = "$src = $Rd";
3663 // The followings are for instruction class (3V Elem)
3667 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
3668 string asmop, string ResS, string OpS, string EleOpS,
3669 Operand OpImm, RegisterOperand ResVPR,
3670 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
3671 : NeonI_2VElem<q, u, size, opcode,
3672 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
3673 EleOpVPR:$Re, OpImm:$Index),
3674 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
3675 ", $Re." # EleOpS # "[$Index]",
3681 let Constraints = "$src = $Rd";
3684 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop>
3686 // vector register class for element is always 128-bit to cover the max index
3687 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
3688 neon_uimm2_bare, VPR64, VPR64, VPR128> {
3689 let Inst{11} = {Index{1}};
3690 let Inst{21} = {Index{0}};
3691 let Inst{20-16} = Re;
3694 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
3695 neon_uimm2_bare, VPR128, VPR128, VPR128> {
3696 let Inst{11} = {Index{1}};
3697 let Inst{21} = {Index{0}};
3698 let Inst{20-16} = Re;
3701 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
3702 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
3703 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
3704 let Inst{11} = {Index{2}};
3705 let Inst{21} = {Index{1}};
3706 let Inst{20} = {Index{0}};
3707 let Inst{19-16} = Re{3-0};
3710 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
3711 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
3712 let Inst{11} = {Index{2}};
3713 let Inst{21} = {Index{1}};
3714 let Inst{20} = {Index{0}};
3715 let Inst{19-16} = Re{3-0};
3719 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
3720 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
3722 // Pattern for lane in 128-bit vector
3723 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
3724 RegisterOperand ResVPR, RegisterOperand OpVPR,
3725 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
3726 ValueType EleOpTy, SDPatternOperator coreop>
3727 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
3728 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
3729 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
3731 // Pattern for lane in 64-bit vector
3732 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
3733 RegisterOperand ResVPR, RegisterOperand OpVPR,
3734 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
3735 ValueType EleOpTy, SDPatternOperator coreop>
3736 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
3737 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
3738 (INST ResVPR:$src, OpVPR:$Rn,
3739 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
3741 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
3743 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
3744 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
3745 BinOpFrag<(Neon_vduplane
3746 (Neon_low4S node:$LHS), node:$RHS)>>;
3748 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
3749 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
3750 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3752 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
3753 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
3754 BinOpFrag<(Neon_vduplane
3755 (Neon_low8H node:$LHS), node:$RHS)>>;
3757 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
3758 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
3759 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3761 // Index can only be half of the max value for lane in 64-bit vector
3763 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
3764 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
3765 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3767 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
3768 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
3769 BinOpFrag<(Neon_vduplane
3770 (Neon_combine_4S node:$LHS, undef),
3773 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
3774 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
3775 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3777 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
3778 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
3779 BinOpFrag<(Neon_vduplane
3780 (Neon_combine_8H node:$LHS, undef),
3784 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
3785 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
3787 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
3788 string asmop, string ResS, string OpS, string EleOpS,
3789 Operand OpImm, RegisterOperand ResVPR,
3790 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
3791 : NeonI_2VElem<q, u, size, opcode,
3792 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
3793 EleOpVPR:$Re, OpImm:$Index),
3794 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
3795 ", $Re." # EleOpS # "[$Index]",
3802 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop>
3804 // vector register class for element is always 128-bit to cover the max index
3805 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
3806 neon_uimm2_bare, VPR64, VPR64, VPR128> {
3807 let Inst{11} = {Index{1}};
3808 let Inst{21} = {Index{0}};
3809 let Inst{20-16} = Re;
3812 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
3813 neon_uimm2_bare, VPR128, VPR128, VPR128> {
3814 let Inst{11} = {Index{1}};
3815 let Inst{21} = {Index{0}};
3816 let Inst{20-16} = Re;
3819 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
3820 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
3821 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
3822 let Inst{11} = {Index{2}};
3823 let Inst{21} = {Index{1}};
3824 let Inst{20} = {Index{0}};
3825 let Inst{19-16} = Re{3-0};
3828 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
3829 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
3830 let Inst{11} = {Index{2}};
3831 let Inst{21} = {Index{1}};
3832 let Inst{20} = {Index{0}};
3833 let Inst{19-16} = Re{3-0};
3837 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
3838 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
3839 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
3841 // Pattern for lane in 128-bit vector
3842 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
3843 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
3844 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
3845 SDPatternOperator coreop>
3846 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
3847 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
3848 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
3850 // Pattern for lane in 64-bit vector
3851 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
3852 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
3853 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
3854 SDPatternOperator coreop>
3855 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
3856 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
3858 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
3860 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op>
3862 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
3863 op, VPR64, VPR128, v2i32, v2i32, v4i32,
3864 BinOpFrag<(Neon_vduplane
3865 (Neon_low4S node:$LHS), node:$RHS)>>;
3867 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
3868 op, VPR128, VPR128, v4i32, v4i32, v4i32,
3869 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3871 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
3872 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
3873 BinOpFrag<(Neon_vduplane
3874 (Neon_low8H node:$LHS), node:$RHS)>>;
3876 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
3877 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
3878 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3880 // Index can only be half of the max value for lane in 64-bit vector
3882 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
3883 op, VPR64, VPR64, v2i32, v2i32, v2i32,
3884 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3886 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
3887 op, VPR128, VPR64, v4i32, v4i32, v2i32,
3888 BinOpFrag<(Neon_vduplane
3889 (Neon_combine_4S node:$LHS, undef),
3892 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
3893 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
3894 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3896 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
3897 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
3898 BinOpFrag<(Neon_vduplane
3899 (Neon_combine_8H node:$LHS, undef),
3903 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
3904 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
3905 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
3909 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop>
3911 // vector register class for element is always 128-bit to cover the max index
3912 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
3913 neon_uimm2_bare, VPR64, VPR64, VPR128> {
3914 let Inst{11} = {Index{1}};
3915 let Inst{21} = {Index{0}};
3916 let Inst{20-16} = Re;
3919 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
3920 neon_uimm2_bare, VPR128, VPR128, VPR128> {
3921 let Inst{11} = {Index{1}};
3922 let Inst{21} = {Index{0}};
3923 let Inst{20-16} = Re;
3926 // _1d2d doesn't exist!
3928 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
3929 neon_uimm1_bare, VPR128, VPR128, VPR128> {
3930 let Inst{11} = {Index{0}};
3932 let Inst{20-16} = Re;
3936 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
3937 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
3939 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
3940 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
3941 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
3942 SDPatternOperator coreop>
3943 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
3944 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
3946 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
3948 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op>
3950 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
3951 op, VPR64, VPR128, v2f32, v2f32, v4f32,
3952 BinOpFrag<(Neon_vduplane
3953 (Neon_low4f node:$LHS), node:$RHS)>>;
3955 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
3956 op, VPR128, VPR128, v4f32, v4f32, v4f32,
3957 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3959 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
3960 op, VPR128, VPR128, v2f64, v2f64, v2f64,
3961 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3963 // Index can only be half of the max value for lane in 64-bit vector
3965 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
3966 op, VPR64, VPR64, v2f32, v2f32, v2f32,
3967 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
3969 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
3970 op, VPR128, VPR64, v4f32, v4f32, v2f32,
3971 BinOpFrag<(Neon_vduplane
3972 (Neon_combine_4f node:$LHS, undef),
3975 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
3976 op, VPR128, VPR64, v2f64, v2f64, v1f64,
3977 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
3980 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
3981 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
3983 // The followings are patterns using fma
3984 // -ffp-contract=fast generates fma
3986 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop>
3988 // vector register class for element is always 128-bit to cover the max index
3989 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
3990 neon_uimm2_bare, VPR64, VPR64, VPR128> {
3991 let Inst{11} = {Index{1}};
3992 let Inst{21} = {Index{0}};
3993 let Inst{20-16} = Re;
3996 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
3997 neon_uimm2_bare, VPR128, VPR128, VPR128> {
3998 let Inst{11} = {Index{1}};
3999 let Inst{21} = {Index{0}};
4000 let Inst{20-16} = Re;
4003 // _1d2d doesn't exist!
4005 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4006 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4007 let Inst{11} = {Index{0}};
4009 let Inst{20-16} = Re;
4013 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
4014 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
4016 // Pattern for lane in 128-bit vector
4017 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4018 RegisterOperand ResVPR, RegisterOperand OpVPR,
4019 ValueType ResTy, ValueType OpTy,
4020 SDPatternOperator coreop>
4021 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4022 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
4023 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
4025 // Pattern for lane in 64-bit vector
4026 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4027 RegisterOperand ResVPR, RegisterOperand OpVPR,
4028 ValueType ResTy, ValueType OpTy,
4029 SDPatternOperator coreop>
4030 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4031 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4032 (INST ResVPR:$src, ResVPR:$Rn,
4033 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
4035 // Pattern for lane in 64-bit vector
4036 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
4037 SDPatternOperator op,
4038 RegisterOperand ResVPR, RegisterOperand OpVPR,
4039 ValueType ResTy, ValueType OpTy,
4040 SDPatternOperator coreop>
4041 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
4042 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4043 (INST ResVPR:$src, ResVPR:$Rn,
4044 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
4047 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op>
4049 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4050 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4051 BinOpFrag<(Neon_vduplane
4052 (Neon_low4f node:$LHS), node:$RHS)>>;
4054 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4055 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4056 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4058 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4059 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4060 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4062 // Index can only be half of the max value for lane in 64-bit vector
4064 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4065 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4066 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4068 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4069 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4070 BinOpFrag<(Neon_vduplane
4071 (Neon_combine_4f node:$LHS, undef),
4074 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4075 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4076 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4079 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
4081 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
4083 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4084 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4085 BinOpFrag<(fneg (Neon_vduplane
4086 (Neon_low4f node:$LHS), node:$RHS))>>;
4088 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4089 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4090 BinOpFrag<(Neon_vduplane
4091 (Neon_low4f (fneg node:$LHS)),
4094 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4095 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4096 BinOpFrag<(fneg (Neon_vduplane
4097 node:$LHS, node:$RHS))>>;
4099 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4100 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4101 BinOpFrag<(Neon_vduplane
4102 (fneg node:$LHS), node:$RHS)>>;
4104 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4105 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4106 BinOpFrag<(fneg (Neon_vduplane
4107 node:$LHS, node:$RHS))>>;
4109 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4110 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4111 BinOpFrag<(Neon_vduplane
4112 (fneg node:$LHS), node:$RHS)>>;
4114 // Index can only be half of the max value for lane in 64-bit vector
4116 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4117 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4118 BinOpFrag<(fneg (Neon_vduplane
4119 node:$LHS, node:$RHS))>>;
4121 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4122 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4123 BinOpFrag<(Neon_vduplane
4124 (fneg node:$LHS), node:$RHS)>>;
4126 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4127 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4128 BinOpFrag<(fneg (Neon_vduplane
4129 (Neon_combine_4f node:$LHS, undef),
4132 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4133 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4134 BinOpFrag<(Neon_vduplane
4135 (Neon_combine_4f (fneg node:$LHS), undef),
4138 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4139 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4140 BinOpFrag<(fneg (Neon_combine_2d
4141 node:$LHS, node:$RHS))>>;
4143 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4144 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4145 BinOpFrag<(Neon_combine_2d
4146 (fneg node:$LHS), (fneg node:$RHS))>>;
4149 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
4151 // Variant 3: Long type
4152 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
4153 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
4155 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop>
4157 // vector register class for element is always 128-bit to cover the max index
4158 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4159 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4160 let Inst{11} = {Index{1}};
4161 let Inst{21} = {Index{0}};
4162 let Inst{20-16} = Re;
4165 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4166 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4167 let Inst{11} = {Index{1}};
4168 let Inst{21} = {Index{0}};
4169 let Inst{20-16} = Re;
4172 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4173 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4174 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4175 let Inst{11} = {Index{2}};
4176 let Inst{21} = {Index{1}};
4177 let Inst{20} = {Index{0}};
4178 let Inst{19-16} = Re{3-0};
4181 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4182 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4183 let Inst{11} = {Index{2}};
4184 let Inst{21} = {Index{1}};
4185 let Inst{20} = {Index{0}};
4186 let Inst{19-16} = Re{3-0};
4190 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
4191 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
4192 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
4193 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
4194 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
4195 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
4197 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop>
4199 // vector register class for element is always 128-bit to cover the max index
4200 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4201 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4202 let Inst{11} = {Index{1}};
4203 let Inst{21} = {Index{0}};
4204 let Inst{20-16} = Re;
4207 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4208 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4209 let Inst{11} = {Index{1}};
4210 let Inst{21} = {Index{0}};
4211 let Inst{20-16} = Re;
4214 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4215 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4216 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4217 let Inst{11} = {Index{2}};
4218 let Inst{21} = {Index{1}};
4219 let Inst{20} = {Index{0}};
4220 let Inst{19-16} = Re{3-0};
4223 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4224 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4225 let Inst{11} = {Index{2}};
4226 let Inst{21} = {Index{1}};
4227 let Inst{20} = {Index{0}};
4228 let Inst{19-16} = Re{3-0};
4232 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
4233 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
4234 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
4236 // Pattern for lane in 128-bit vector
4237 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4238 RegisterOperand EleOpVPR, ValueType ResTy,
4239 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4240 SDPatternOperator hiop, SDPatternOperator coreop>
4241 : Pat<(ResTy (op (ResTy VPR128:$src),
4242 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4243 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4244 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4246 // Pattern for lane in 64-bit vector
4247 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4248 RegisterOperand EleOpVPR, ValueType ResTy,
4249 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4250 SDPatternOperator hiop, SDPatternOperator coreop>
4251 : Pat<(ResTy (op (ResTy VPR128:$src),
4252 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4253 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4254 (INST VPR128:$src, VPR128:$Rn,
4255 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4257 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op>
4259 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4260 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4261 BinOpFrag<(Neon_vduplane
4262 (Neon_low8H node:$LHS), node:$RHS)>>;
4264 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4265 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
4266 BinOpFrag<(Neon_vduplane
4267 (Neon_low4S node:$LHS), node:$RHS)>>;
4269 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4270 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4271 BinOpFrag<(Neon_vduplane
4272 (Neon_low8H node:$LHS), node:$RHS)>>;
4274 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4275 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4276 BinOpFrag<(Neon_vduplane
4277 (Neon_low4S node:$LHS), node:$RHS)>>;
4279 // Index can only be half of the max value for lane in 64-bit vector
4281 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4282 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4283 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4285 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4286 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
4287 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4289 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4290 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4291 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4293 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4294 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4295 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4298 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
4299 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
4300 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
4301 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
4303 // Pattern for lane in 128-bit vector
4304 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4305 RegisterOperand EleOpVPR, ValueType ResTy,
4306 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4307 SDPatternOperator hiop, SDPatternOperator coreop>
4309 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4310 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4311 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4313 // Pattern for lane in 64-bit vector
4314 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4315 RegisterOperand EleOpVPR, ValueType ResTy,
4316 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4317 SDPatternOperator hiop, SDPatternOperator coreop>
4319 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4320 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4322 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4324 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op>
4326 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4327 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4328 BinOpFrag<(Neon_vduplane
4329 (Neon_low8H node:$LHS), node:$RHS)>>;
4331 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4332 op, VPR64, VPR128, v2i64, v2i32, v4i32,
4333 BinOpFrag<(Neon_vduplane
4334 (Neon_low4S node:$LHS), node:$RHS)>>;
4336 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4337 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
4339 BinOpFrag<(Neon_vduplane
4340 (Neon_low8H node:$LHS), node:$RHS)>>;
4342 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4343 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4344 BinOpFrag<(Neon_vduplane
4345 (Neon_low4S node:$LHS), node:$RHS)>>;
4347 // Index can only be half of the max value for lane in 64-bit vector
4349 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4350 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4351 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4353 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4354 op, VPR64, VPR64, v2i64, v2i32, v2i32,
4355 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4357 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4358 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4359 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4361 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4362 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4363 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4366 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
4367 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
4368 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
4370 multiclass NI_qdma<SDPatternOperator op>
4372 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4374 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4376 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
4378 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
4381 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
4382 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
4384 multiclass NI_2VEL_v3_qdma_pat<string subop, string op>
4386 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4387 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
4388 v4i32, v4i16, v8i16,
4389 BinOpFrag<(Neon_vduplane
4390 (Neon_low8H node:$LHS), node:$RHS)>>;
4392 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4393 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
4394 v2i64, v2i32, v4i32,
4395 BinOpFrag<(Neon_vduplane
4396 (Neon_low4S node:$LHS), node:$RHS)>>;
4398 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4399 !cast<PatFrag>(op # "_4s"), VPR128Lo,
4400 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4401 BinOpFrag<(Neon_vduplane
4402 (Neon_low8H node:$LHS), node:$RHS)>>;
4404 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4405 !cast<PatFrag>(op # "_2d"), VPR128,
4406 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4407 BinOpFrag<(Neon_vduplane
4408 (Neon_low4S node:$LHS), node:$RHS)>>;
4410 // Index can only be half of the max value for lane in 64-bit vector
4412 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4413 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
4414 v4i32, v4i16, v4i16,
4415 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4417 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4418 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
4419 v2i64, v2i32, v2i32,
4420 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4422 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4423 !cast<PatFrag>(op # "_4s"), VPR64Lo,
4424 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4425 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4427 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4428 !cast<PatFrag>(op # "_2d"), VPR64,
4429 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4430 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4433 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
4434 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
4436 // End of implementation for instruction class (3V Elem)
4438 //Insert element (vector, from main)
4439 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
4441 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4443 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
4445 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4447 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
4449 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
4451 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
4453 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
4456 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
4457 RegisterClass OpGPR, ValueType OpTy,
4458 Operand OpImm, Instruction INS>
4459 : Pat<(ResTy (vector_insert
4463 (ResTy (EXTRACT_SUBREG
4464 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4465 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
4467 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
4468 neon_uimm3_bare, INSbw>;
4469 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
4470 neon_uimm2_bare, INShw>;
4471 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
4472 neon_uimm1_bare, INSsw>;
4473 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
4474 neon_uimm0_bare, INSdx>;
4476 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
4477 Operand ResImm, ValueType MidTy>
4478 : NeonI_insert<0b1, 0b1,
4479 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
4480 ResImm:$Immd, ResImm:$Immn),
4481 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
4482 [(set (ResTy VPR128:$Rd),
4483 (ResTy (vector_insert
4484 (ResTy VPR128:$src),
4485 (MidTy (vector_extract
4490 let Constraints = "$src = $Rd";
4495 //Insert element (vector, from element)
4496 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
4497 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
4498 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
4500 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
4501 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
4502 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
4503 // bit 11 is unspecified.
4505 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
4506 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
4507 let Inst{14-13} = {Immn{1}, Immn{0}};
4508 // bits 11-12 are unspecified.
4510 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
4511 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
4512 let Inst{14} = Immn{0};
4513 // bits 11-13 are unspecified.
4516 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
4517 ValueType MidTy, ValueType StTy,
4518 Operand StImm, Instruction INS> {
4519 def : Pat<(NaTy (vector_insert
4521 (MidTy (vector_extract
4525 (NaTy (EXTRACT_SUBREG
4527 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4533 def : Pat<(StTy (vector_insert
4535 (MidTy (vector_extract
4541 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4545 def : Pat<(NaTy (vector_insert
4547 (MidTy (vector_extract
4551 (NaTy (EXTRACT_SUBREG
4553 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
4554 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4560 defm INSb_pattern : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
4561 v16i8, neon_uimm4_bare, INSELb>;
4562 defm INSh_pattern : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
4563 v8i16, neon_uimm3_bare, INSELh>;
4564 defm INSs_pattern : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
4565 v4i32, neon_uimm2_bare, INSELs>;
4566 defm INSd_pattern : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
4567 v2i64, neon_uimm1_bare, INSELd>;
4569 class NeonI_SMOV<string asmop, string Res, bit Q,
4570 ValueType OpTy, ValueType eleTy,
4571 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
4572 : NeonI_copy<Q, 0b0, 0b0101,
4573 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
4574 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
4575 [(set (ResTy ResGPR:$Rd),
4577 (ResTy (vector_extract
4578 (OpTy VPR128:$Rn), (OpImm:$Imm))),
4584 //Signed integer move (main, from element)
4585 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
4587 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4589 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
4591 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4593 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
4595 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4597 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
4599 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4601 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
4603 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
4606 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
4607 ValueType eleTy, Operand StImm, Operand NaImm,
4608 Instruction SMOVI> {
4609 def : Pat<(i64 (sext_inreg
4611 (i32 (vector_extract
4612 (StTy VPR128:$Rn), (StImm:$Imm))))),
4614 (SMOVI VPR128:$Rn, StImm:$Imm)>;
4616 def : Pat<(i64 (sext
4617 (i32 (vector_extract
4618 (StTy VPR128:$Rn), (StImm:$Imm))))),
4619 (SMOVI VPR128:$Rn, StImm:$Imm)>;
4621 def : Pat<(i64 (sext_inreg
4622 (i64 (vector_extract
4623 (NaTy VPR64:$Rn), (NaImm:$Imm))),
4625 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4628 def : Pat<(i64 (sext_inreg
4630 (i32 (vector_extract
4631 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
4633 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4636 def : Pat<(i64 (sext
4637 (i32 (vector_extract
4638 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
4639 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4643 defm SMOVxb_pattern : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
4644 neon_uimm3_bare, SMOVxb>;
4645 defm SMOVxh_pattern : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
4646 neon_uimm2_bare, SMOVxh>;
4647 defm SMOVxs_pattern : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
4648 neon_uimm1_bare, SMOVxs>;
4650 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
4651 ValueType eleTy, Operand StImm, Operand NaImm,
4653 : Pat<(i32 (sext_inreg
4654 (i32 (vector_extract
4655 (NaTy VPR64:$Rn), (NaImm:$Imm))),
4657 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4660 def SMOVwb_pattern : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
4661 neon_uimm3_bare, SMOVwb>;
4662 def SMOVwh_pattern : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
4663 neon_uimm2_bare, SMOVwh>;
4666 class NeonI_UMOV<string asmop, string Res, bit Q,
4667 ValueType OpTy, Operand OpImm,
4668 RegisterClass ResGPR, ValueType ResTy>
4669 : NeonI_copy<Q, 0b0, 0b0111,
4670 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
4671 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
4672 [(set (ResTy ResGPR:$Rd),
4673 (ResTy (vector_extract
4674 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
4679 //Unsigned integer move (main, from element)
4680 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
4682 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
4684 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
4686 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
4688 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
4690 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
4692 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
4694 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
4697 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
4698 Operand StImm, Operand NaImm,
4700 : Pat<(ResTy (vector_extract
4701 (NaTy VPR64:$Rn), NaImm:$Imm)),
4702 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
4705 def UMOVwb_pattern : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
4706 neon_uimm3_bare, UMOVwb>;
4707 def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
4708 neon_uimm2_bare, UMOVwh>;
4709 def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
4710 neon_uimm1_bare, UMOVws>;
4713 (i32 (vector_extract
4714 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
4716 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
4719 (i32 (vector_extract
4720 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
4722 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
4724 def : Pat<(i64 (zext
4725 (i32 (vector_extract
4726 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
4727 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
4730 (i32 (vector_extract
4731 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
4733 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
4734 neon_uimm3_bare:$Imm)>;
4737 (i32 (vector_extract
4738 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
4740 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
4741 neon_uimm2_bare:$Imm)>;
4743 def : Pat<(i64 (zext
4744 (i32 (vector_extract
4745 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
4746 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
4747 neon_uimm0_bare:$Imm)>;
4749 // Additional copy patterns for scalar types
4750 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
4752 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
4754 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
4756 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
4758 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
4759 (FMOVws FPR32:$Rn)>;
4761 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
4762 (FMOVxd FPR64:$Rn)>;
4764 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
4767 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
4770 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
4771 (v1i8 (EXTRACT_SUBREG (v16i8
4772 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
4775 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
4776 (v1i16 (EXTRACT_SUBREG (v8i16
4777 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
4780 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
4783 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
4786 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
4788 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),