1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
46 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
47 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
49 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
51 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
52 [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
54 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
58 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
59 string asmop, SDPatternOperator opnode8B,
60 SDPatternOperator opnode16B,
63 let isCommutable = Commutable in {
64 def _8B : NeonI_3VSame<0b0, u, size, opcode,
65 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
66 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
67 [(set (v8i8 VPR64:$Rd),
68 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
71 def _16B : NeonI_3VSame<0b1, u, size, opcode,
72 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
73 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
74 [(set (v16i8 VPR128:$Rd),
75 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
81 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
82 string asmop, SDPatternOperator opnode,
85 let isCommutable = Commutable in {
86 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
87 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
88 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
89 [(set (v4i16 VPR64:$Rd),
90 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
93 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
94 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
95 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
96 [(set (v8i16 VPR128:$Rd),
97 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
100 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
101 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
102 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
103 [(set (v2i32 VPR64:$Rd),
104 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
107 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
108 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
109 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
110 [(set (v4i32 VPR128:$Rd),
111 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
115 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
116 string asmop, SDPatternOperator opnode,
118 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
120 let isCommutable = Commutable in {
121 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
122 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
123 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
124 [(set (v8i8 VPR64:$Rd),
125 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
128 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
129 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
130 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
131 [(set (v16i8 VPR128:$Rd),
132 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
138 string asmop, SDPatternOperator opnode,
140 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
142 let isCommutable = Commutable in {
143 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
144 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
145 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
146 [(set (v2i64 VPR128:$Rd),
147 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
152 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
153 // but Result types can be integer or floating point types.
154 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
155 string asmop, SDPatternOperator opnode2S,
156 SDPatternOperator opnode4S,
157 SDPatternOperator opnode2D,
158 ValueType ResTy2S, ValueType ResTy4S,
159 ValueType ResTy2D, bit Commutable = 0>
161 let isCommutable = Commutable in {
162 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
163 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
164 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
165 [(set (ResTy2S VPR64:$Rd),
166 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
169 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
170 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
171 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
172 [(set (ResTy4S VPR128:$Rd),
173 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
176 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
177 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
178 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
179 [(set (ResTy2D VPR128:$Rd),
180 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
185 //===----------------------------------------------------------------------===//
186 // Instruction Definitions
187 //===----------------------------------------------------------------------===//
189 // Vector Arithmetic Instructions
191 // Vector Add (Integer and Floating-Point)
193 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
194 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
195 v2f32, v4f32, v2f64, 1>;
197 // Vector Sub (Integer and Floating-Point)
199 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
200 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
201 v2f32, v4f32, v2f64, 0>;
203 // Vector Multiply (Integer and Floating-Point)
205 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
206 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
207 v2f32, v4f32, v2f64, 1>;
209 // Vector Multiply (Polynomial)
211 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
212 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
214 // Vector Multiply-accumulate and Multiply-subtract (Integer)
216 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
217 // two operands constraints.
218 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
219 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
220 bits<5> opcode, SDPatternOperator opnode>
221 : NeonI_3VSame<q, u, size, opcode,
222 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
223 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
224 [(set (OpTy VPRC:$Rd),
225 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
227 let Constraints = "$src = $Rd";
230 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
231 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
233 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
234 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
237 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
238 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
239 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
240 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
241 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
242 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
243 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
244 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
245 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
246 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
247 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
248 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
250 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
251 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
252 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
253 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
254 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
255 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
256 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
257 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
258 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
259 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
260 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
261 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
263 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
265 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
266 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
268 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
269 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
271 let Predicates = [HasNEON, UseFusedMAC] in {
272 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
273 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
274 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
275 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
276 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
277 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
279 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
280 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
281 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
282 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
283 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
284 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
287 // We're also allowed to match the fma instruction regardless of compile
289 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
290 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
291 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
292 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
293 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
294 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
296 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
297 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
298 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
299 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
300 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
301 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
303 // Vector Divide (Floating-Point)
305 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
306 v2f32, v4f32, v2f64, 0>;
308 // Vector Bitwise Operations
310 // Vector Bitwise AND
312 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
314 // Vector Bitwise Exclusive OR
316 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
320 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
322 // ORR disassembled as MOV if Vn==Vm
324 // Vector Move - register
325 // Alias for ORR if Vn=Vm.
326 // FIXME: This is actually the preferred syntax but TableGen can't deal with
327 // custom printing of aliases.
328 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
329 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
330 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
331 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
333 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
334 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
335 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
337 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
338 OpCmodeConstVal->getZExtValue(), EltBits);
339 return (EltBits == 8 && EltVal == 0xff);
342 def Neon_immAllZeros: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
343 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
344 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
346 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
347 OpCmodeConstVal->getZExtValue(), EltBits);
348 return (EltBits == 8 && EltVal == 0x0);
352 def Neon_not8B : PatFrag<(ops node:$in),
353 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
354 def Neon_not16B : PatFrag<(ops node:$in),
355 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
357 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
358 (or node:$Rn, (Neon_not8B node:$Rm))>;
360 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
361 (or node:$Rn, (Neon_not16B node:$Rm))>;
363 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
364 (and node:$Rn, (Neon_not8B node:$Rm))>;
366 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
367 (and node:$Rn, (Neon_not16B node:$Rm))>;
370 // Vector Bitwise OR NOT - register
372 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
373 Neon_orn8B, Neon_orn16B, 0>;
375 // Vector Bitwise Bit Clear (AND NOT) - register
377 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
378 Neon_bic8B, Neon_bic16B, 0>;
380 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
381 SDPatternOperator opnode16B,
383 Instruction INST16B> {
384 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
385 (INST8B VPR64:$Rn, VPR64:$Rm)>;
386 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
387 (INST8B VPR64:$Rn, VPR64:$Rm)>;
388 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
389 (INST8B VPR64:$Rn, VPR64:$Rm)>;
390 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
391 (INST16B VPR128:$Rn, VPR128:$Rm)>;
392 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
393 (INST16B VPR128:$Rn, VPR128:$Rm)>;
394 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
395 (INST16B VPR128:$Rn, VPR128:$Rm)>;
398 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
399 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
400 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
401 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
402 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
403 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
405 // Vector Bitwise Select
406 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
407 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
409 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
410 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
412 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
414 Instruction INST16B> {
415 // Disassociate type from instruction definition
416 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
417 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
418 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
419 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
420 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
421 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
422 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
423 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
424 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
425 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
426 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
427 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
429 // Allow to match BSL instruction pattern with non-constant operand
430 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
431 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
432 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
433 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
434 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
435 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
436 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
437 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
438 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
439 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
440 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
441 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
442 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
443 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
444 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
445 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
446 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
447 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
448 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
449 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
450 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
451 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
452 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
453 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
455 // Allow to match llvm.arm.* intrinsics.
456 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
457 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
458 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
459 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
460 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
461 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
463 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
464 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
465 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
466 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
467 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
469 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
470 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
471 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
472 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
473 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
475 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
476 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
477 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
478 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
479 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
480 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
481 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
482 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
483 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
484 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
485 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
486 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
487 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
488 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
491 // Additional patterns for bitwise instruction BSL
492 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
494 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
495 (Neon_bsl node:$src, node:$Rn, node:$Rm),
496 [{ (void)N; return false; }]>;
498 // Vector Bitwise Insert if True
500 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
501 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
502 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
503 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
505 // Vector Bitwise Insert if False
507 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
508 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
509 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
510 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
512 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
514 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
515 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
516 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
517 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
519 // Vector Absolute Difference and Accumulate (Unsigned)
520 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
521 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
522 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
523 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
524 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
525 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
526 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
527 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
528 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
529 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
530 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
531 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
533 // Vector Absolute Difference and Accumulate (Signed)
534 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
535 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
536 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
537 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
538 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
539 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
540 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
541 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
542 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
543 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
544 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
545 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
548 // Vector Absolute Difference (Signed, Unsigned)
549 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
550 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
552 // Vector Absolute Difference (Floating Point)
553 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
554 int_arm_neon_vabds, int_arm_neon_vabds,
555 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
557 // Vector Reciprocal Step (Floating Point)
558 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
559 int_arm_neon_vrecps, int_arm_neon_vrecps,
561 v2f32, v4f32, v2f64, 0>;
563 // Vector Reciprocal Square Root Step (Floating Point)
564 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
565 int_arm_neon_vrsqrts,
566 int_arm_neon_vrsqrts,
567 int_arm_neon_vrsqrts,
568 v2f32, v4f32, v2f64, 0>;
570 // Vector Comparisons
572 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
573 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
574 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
575 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
576 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
577 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
578 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
579 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
580 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
581 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
583 // NeonI_compare_aliases class: swaps register operands to implement
584 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
585 class NeonI_compare_aliases<string asmop, string asmlane,
586 Instruction inst, RegisterOperand VPRC>
587 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
589 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
591 // Vector Comparisons (Integer)
593 // Vector Compare Mask Equal (Integer)
594 let isCommutable =1 in {
595 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
598 // Vector Compare Mask Higher or Same (Unsigned Integer)
599 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
601 // Vector Compare Mask Greater Than or Equal (Integer)
602 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
604 // Vector Compare Mask Higher (Unsigned Integer)
605 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
607 // Vector Compare Mask Greater Than (Integer)
608 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
610 // Vector Compare Mask Bitwise Test (Integer)
611 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
613 // Vector Compare Mask Less or Same (Unsigned Integer)
614 // CMLS is alias for CMHS with operands reversed.
615 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
616 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
617 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
618 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
619 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
620 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
621 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
623 // Vector Compare Mask Less Than or Equal (Integer)
624 // CMLE is alias for CMGE with operands reversed.
625 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
626 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
627 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
628 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
629 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
630 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
631 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
633 // Vector Compare Mask Lower (Unsigned Integer)
634 // CMLO is alias for CMHI with operands reversed.
635 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
636 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
637 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
638 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
639 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
640 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
641 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
643 // Vector Compare Mask Less Than (Integer)
644 // CMLT is alias for CMGT with operands reversed.
645 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
646 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
647 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
648 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
649 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
650 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
651 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
654 def neon_uimm0_asmoperand : AsmOperandClass
657 let PredicateMethod = "isUImm<0>";
658 let RenderMethod = "addImmOperands";
661 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
662 let ParserMatchClass = neon_uimm0_asmoperand;
663 let PrintMethod = "printNeonUImm0Operand";
667 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
669 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
670 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
671 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
672 [(set (v8i8 VPR64:$Rd),
673 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
676 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
677 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
678 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
679 [(set (v16i8 VPR128:$Rd),
680 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
683 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
684 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
685 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
686 [(set (v4i16 VPR64:$Rd),
687 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
690 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
691 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
692 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
693 [(set (v8i16 VPR128:$Rd),
694 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
697 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
698 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
699 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
700 [(set (v2i32 VPR64:$Rd),
701 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
704 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
705 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
706 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
707 [(set (v4i32 VPR128:$Rd),
708 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
711 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
712 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
713 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
714 [(set (v2i64 VPR128:$Rd),
715 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
719 // Vector Compare Mask Equal to Zero (Integer)
720 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
722 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
723 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
725 // Vector Compare Mask Greater Than Zero (Signed Integer)
726 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
728 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
729 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
731 // Vector Compare Mask Less Than Zero (Signed Integer)
732 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
734 // Vector Comparisons (Floating Point)
736 // Vector Compare Mask Equal (Floating Point)
737 let isCommutable =1 in {
738 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
739 Neon_cmeq, Neon_cmeq,
740 v2i32, v4i32, v2i64, 0>;
743 // Vector Compare Mask Greater Than Or Equal (Floating Point)
744 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
745 Neon_cmge, Neon_cmge,
746 v2i32, v4i32, v2i64, 0>;
748 // Vector Compare Mask Greater Than (Floating Point)
749 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
750 Neon_cmgt, Neon_cmgt,
751 v2i32, v4i32, v2i64, 0>;
753 // Vector Compare Mask Less Than Or Equal (Floating Point)
754 // FCMLE is alias for FCMGE with operands reversed.
755 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
756 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
757 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
759 // Vector Compare Mask Less Than (Floating Point)
760 // FCMLT is alias for FCMGT with operands reversed.
761 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
762 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
763 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
766 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
767 string asmop, CondCode CC>
769 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
770 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
771 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
772 [(set (v2i32 VPR64:$Rd),
773 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
776 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
777 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
778 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
779 [(set (v4i32 VPR128:$Rd),
780 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
783 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
784 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
785 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
786 [(set (v2i64 VPR128:$Rd),
787 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
791 // Vector Compare Mask Equal to Zero (Floating Point)
792 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
794 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
795 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
797 // Vector Compare Mask Greater Than Zero (Floating Point)
798 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
800 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
801 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
803 // Vector Compare Mask Less Than Zero (Floating Point)
804 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
806 // Vector Absolute Comparisons (Floating Point)
808 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
809 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
810 int_arm_neon_vacged, int_arm_neon_vacgeq,
811 int_aarch64_neon_vacgeq,
812 v2i32, v4i32, v2i64, 0>;
814 // Vector Absolute Compare Mask Greater Than (Floating Point)
815 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
816 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
817 int_aarch64_neon_vacgtq,
818 v2i32, v4i32, v2i64, 0>;
820 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
821 // FACLE is alias for FACGE with operands reversed.
822 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
823 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
824 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
826 // Vector Absolute Compare Mask Less Than (Floating Point)
827 // FACLT is alias for FACGT with operands reversed.
828 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
829 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
830 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
832 // Vector halving add (Integer Signed, Unsigned)
833 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
834 int_arm_neon_vhadds, 1>;
835 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
836 int_arm_neon_vhaddu, 1>;
838 // Vector halving sub (Integer Signed, Unsigned)
839 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
840 int_arm_neon_vhsubs, 0>;
841 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
842 int_arm_neon_vhsubu, 0>;
844 // Vector rouding halving add (Integer Signed, Unsigned)
845 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
846 int_arm_neon_vrhadds, 1>;
847 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
848 int_arm_neon_vrhaddu, 1>;
850 // Vector Saturating add (Integer Signed, Unsigned)
851 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
852 int_arm_neon_vqadds, 1>;
853 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
854 int_arm_neon_vqaddu, 1>;
856 // Vector Saturating sub (Integer Signed, Unsigned)
857 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
858 int_arm_neon_vqsubs, 1>;
859 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
860 int_arm_neon_vqsubu, 1>;
862 // Vector Shift Left (Signed and Unsigned Integer)
863 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
864 int_arm_neon_vshifts, 1>;
865 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
866 int_arm_neon_vshiftu, 1>;
868 // Vector Saturating Shift Left (Signed and Unsigned Integer)
869 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
870 int_arm_neon_vqshifts, 1>;
871 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
872 int_arm_neon_vqshiftu, 1>;
874 // Vector Rouding Shift Left (Signed and Unsigned Integer)
875 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
876 int_arm_neon_vrshifts, 1>;
877 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
878 int_arm_neon_vrshiftu, 1>;
880 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
881 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
882 int_arm_neon_vqrshifts, 1>;
883 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
884 int_arm_neon_vqrshiftu, 1>;
886 // Vector Maximum (Signed and Unsigned Integer)
887 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
888 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
890 // Vector Minimum (Signed and Unsigned Integer)
891 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
892 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
894 // Vector Maximum (Floating Point)
895 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
896 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
897 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
899 // Vector Minimum (Floating Point)
900 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
901 int_arm_neon_vmins, int_arm_neon_vmins,
902 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
904 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
905 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
906 int_aarch64_neon_vmaxnm,
907 int_aarch64_neon_vmaxnm,
908 int_aarch64_neon_vmaxnm,
909 v2f32, v4f32, v2f64, 1>;
911 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
912 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
913 int_aarch64_neon_vminnm,
914 int_aarch64_neon_vminnm,
915 int_aarch64_neon_vminnm,
916 v2f32, v4f32, v2f64, 1>;
918 // Vector Maximum Pairwise (Signed and Unsigned Integer)
919 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
920 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
922 // Vector Minimum Pairwise (Signed and Unsigned Integer)
923 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
924 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
926 // Vector Maximum Pairwise (Floating Point)
927 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
928 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
929 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
931 // Vector Minimum Pairwise (Floating Point)
932 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
933 int_arm_neon_vpmins, int_arm_neon_vpmins,
934 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
936 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
937 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
938 int_aarch64_neon_vpmaxnm,
939 int_aarch64_neon_vpmaxnm,
940 int_aarch64_neon_vpmaxnm,
941 v2f32, v4f32, v2f64, 1>;
943 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
944 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
945 int_aarch64_neon_vpminnm,
946 int_aarch64_neon_vpminnm,
947 int_aarch64_neon_vpminnm,
948 v2f32, v4f32, v2f64, 1>;
950 // Vector Addition Pairwise (Integer)
951 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
953 // Vector Addition Pairwise (Floating Point)
954 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
958 v2f32, v4f32, v2f64, 1>;
960 // Vector Saturating Doubling Multiply High
961 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
962 int_arm_neon_vqdmulh, 1>;
964 // Vector Saturating Rouding Doubling Multiply High
965 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
966 int_arm_neon_vqrdmulh, 1>;
968 // Vector Multiply Extended (Floating Point)
969 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
970 int_aarch64_neon_vmulx,
971 int_aarch64_neon_vmulx,
972 int_aarch64_neon_vmulx,
973 v2f32, v4f32, v2f64, 1>;
975 // Vector Immediate Instructions
977 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
979 def _asmoperand : AsmOperandClass
981 let Name = "NeonMovImmShift" # PREFIX;
982 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
983 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
987 // Definition of vector immediates shift operands
989 // The selectable use-cases extract the shift operation
990 // information from the OpCmode fields encoded in the immediate.
991 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
992 uint64_t OpCmode = N->getZExtValue();
994 unsigned ShiftOnesIn;
996 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
997 if (!HasShift) return SDValue();
998 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1001 // Vector immediates shift operands which accept LSL and MSL
1002 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1003 // or 0, 8 (LSLH) or 8, 16 (MSL).
1004 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1005 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1006 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
1007 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1009 multiclass neon_mov_imm_shift_operands<string PREFIX,
1010 string HALF, string ISHALF, code pred>
1012 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1015 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1017 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1018 let ParserMatchClass =
1019 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1023 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1025 unsigned ShiftOnesIn;
1027 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1028 return (HasShift && !ShiftOnesIn);
1031 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1033 unsigned ShiftOnesIn;
1035 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1036 return (HasShift && ShiftOnesIn);
1039 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1041 unsigned ShiftOnesIn;
1043 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1044 return (HasShift && !ShiftOnesIn);
1047 def neon_uimm1_asmoperand : AsmOperandClass
1050 let PredicateMethod = "isUImm<1>";
1051 let RenderMethod = "addImmOperands";
1054 def neon_uimm2_asmoperand : AsmOperandClass
1057 let PredicateMethod = "isUImm<2>";
1058 let RenderMethod = "addImmOperands";
1061 def neon_uimm8_asmoperand : AsmOperandClass
1064 let PredicateMethod = "isUImm<8>";
1065 let RenderMethod = "addImmOperands";
1068 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1069 let ParserMatchClass = neon_uimm8_asmoperand;
1070 let PrintMethod = "printNeonUImm8Operand";
1073 def neon_uimm64_mask_asmoperand : AsmOperandClass
1075 let Name = "NeonUImm64Mask";
1076 let PredicateMethod = "isNeonUImm64Mask";
1077 let RenderMethod = "addNeonUImm64MaskOperands";
1080 // MCOperand for 64-bit bytemask with each byte having only the
1081 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1082 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1083 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1084 let PrintMethod = "printNeonUImm64MaskOperand";
1087 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1088 SDPatternOperator opnode>
1090 // shift zeros, per word
1091 def _2S : NeonI_1VModImm<0b0, op,
1093 (ins neon_uimm8:$Imm,
1094 neon_mov_imm_LSL_operand:$Simm),
1095 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1096 [(set (v2i32 VPR64:$Rd),
1097 (v2i32 (opnode (timm:$Imm),
1098 (neon_mov_imm_LSL_operand:$Simm))))],
1101 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1104 def _4S : NeonI_1VModImm<0b1, op,
1106 (ins neon_uimm8:$Imm,
1107 neon_mov_imm_LSL_operand:$Simm),
1108 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1109 [(set (v4i32 VPR128:$Rd),
1110 (v4i32 (opnode (timm:$Imm),
1111 (neon_mov_imm_LSL_operand:$Simm))))],
1114 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1117 // shift zeros, per halfword
1118 def _4H : NeonI_1VModImm<0b0, op,
1120 (ins neon_uimm8:$Imm,
1121 neon_mov_imm_LSLH_operand:$Simm),
1122 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1123 [(set (v4i16 VPR64:$Rd),
1124 (v4i16 (opnode (timm:$Imm),
1125 (neon_mov_imm_LSLH_operand:$Simm))))],
1128 let cmode = {0b1, 0b0, Simm, 0b0};
1131 def _8H : NeonI_1VModImm<0b1, op,
1133 (ins neon_uimm8:$Imm,
1134 neon_mov_imm_LSLH_operand:$Simm),
1135 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1136 [(set (v8i16 VPR128:$Rd),
1137 (v8i16 (opnode (timm:$Imm),
1138 (neon_mov_imm_LSLH_operand:$Simm))))],
1141 let cmode = {0b1, 0b0, Simm, 0b0};
1145 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1146 SDPatternOperator opnode,
1147 SDPatternOperator neonopnode>
1149 let Constraints = "$src = $Rd" in {
1150 // shift zeros, per word
1151 def _2S : NeonI_1VModImm<0b0, op,
1153 (ins VPR64:$src, neon_uimm8:$Imm,
1154 neon_mov_imm_LSL_operand:$Simm),
1155 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1156 [(set (v2i32 VPR64:$Rd),
1157 (v2i32 (opnode (v2i32 VPR64:$src),
1158 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1159 neon_mov_imm_LSL_operand:$Simm)))))))],
1162 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1165 def _4S : NeonI_1VModImm<0b1, op,
1167 (ins VPR128:$src, neon_uimm8:$Imm,
1168 neon_mov_imm_LSL_operand:$Simm),
1169 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1170 [(set (v4i32 VPR128:$Rd),
1171 (v4i32 (opnode (v4i32 VPR128:$src),
1172 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1173 neon_mov_imm_LSL_operand:$Simm)))))))],
1176 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1179 // shift zeros, per halfword
1180 def _4H : NeonI_1VModImm<0b0, op,
1182 (ins VPR64:$src, neon_uimm8:$Imm,
1183 neon_mov_imm_LSLH_operand:$Simm),
1184 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1185 [(set (v4i16 VPR64:$Rd),
1186 (v4i16 (opnode (v4i16 VPR64:$src),
1187 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1188 neon_mov_imm_LSL_operand:$Simm)))))))],
1191 let cmode = {0b1, 0b0, Simm, 0b1};
1194 def _8H : NeonI_1VModImm<0b1, op,
1196 (ins VPR128:$src, neon_uimm8:$Imm,
1197 neon_mov_imm_LSLH_operand:$Simm),
1198 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1199 [(set (v8i16 VPR128:$Rd),
1200 (v8i16 (opnode (v8i16 VPR128:$src),
1201 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1202 neon_mov_imm_LSL_operand:$Simm)))))))],
1205 let cmode = {0b1, 0b0, Simm, 0b1};
1210 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1211 SDPatternOperator opnode>
1213 // shift ones, per word
1214 def _2S : NeonI_1VModImm<0b0, op,
1216 (ins neon_uimm8:$Imm,
1217 neon_mov_imm_MSL_operand:$Simm),
1218 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1219 [(set (v2i32 VPR64:$Rd),
1220 (v2i32 (opnode (timm:$Imm),
1221 (neon_mov_imm_MSL_operand:$Simm))))],
1224 let cmode = {0b1, 0b1, 0b0, Simm};
1227 def _4S : NeonI_1VModImm<0b1, op,
1229 (ins neon_uimm8:$Imm,
1230 neon_mov_imm_MSL_operand:$Simm),
1231 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1232 [(set (v4i32 VPR128:$Rd),
1233 (v4i32 (opnode (timm:$Imm),
1234 (neon_mov_imm_MSL_operand:$Simm))))],
1237 let cmode = {0b1, 0b1, 0b0, Simm};
1241 // Vector Move Immediate Shifted
1242 let isReMaterializable = 1 in {
1243 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1246 // Vector Move Inverted Immediate Shifted
1247 let isReMaterializable = 1 in {
1248 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1251 // Vector Bitwise Bit Clear (AND NOT) - immediate
1252 let isReMaterializable = 1 in {
1253 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1257 // Vector Bitwise OR - immedidate
1259 let isReMaterializable = 1 in {
1260 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1264 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1265 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1266 // BIC immediate instructions selection requires additional patterns to
1267 // transform Neon_movi operands into BIC immediate operands
1269 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1270 uint64_t OpCmode = N->getZExtValue();
1272 unsigned ShiftOnesIn;
1273 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1274 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1275 // Transform encoded shift amount 0 to 1 and 1 to 0.
1276 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1279 def neon_mov_imm_LSLH_transform_operand
1282 unsigned ShiftOnesIn;
1284 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1285 return (HasShift && !ShiftOnesIn); }],
1286 neon_mov_imm_LSLH_transform_XFORM>;
1288 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1289 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1290 def : Pat<(v4i16 (and VPR64:$src,
1291 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1292 (BICvi_lsl_4H VPR64:$src, 0,
1293 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1295 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1296 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1297 def : Pat<(v8i16 (and VPR128:$src,
1298 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1299 (BICvi_lsl_8H VPR128:$src, 0,
1300 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1303 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1304 SDPatternOperator neonopnode,
1306 Instruction INST8H> {
1307 def : Pat<(v8i8 (opnode VPR64:$src,
1308 (bitconvert(v4i16 (neonopnode timm:$Imm,
1309 neon_mov_imm_LSLH_operand:$Simm))))),
1310 (INST4H VPR64:$src, neon_uimm8:$Imm,
1311 neon_mov_imm_LSLH_operand:$Simm)>;
1312 def : Pat<(v1i64 (opnode VPR64:$src,
1313 (bitconvert(v4i16 (neonopnode timm:$Imm,
1314 neon_mov_imm_LSLH_operand:$Simm))))),
1315 (INST4H VPR64:$src, neon_uimm8:$Imm,
1316 neon_mov_imm_LSLH_operand:$Simm)>;
1318 def : Pat<(v16i8 (opnode VPR128:$src,
1319 (bitconvert(v8i16 (neonopnode timm:$Imm,
1320 neon_mov_imm_LSLH_operand:$Simm))))),
1321 (INST8H VPR128:$src, neon_uimm8:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm)>;
1323 def : Pat<(v4i32 (opnode VPR128:$src,
1324 (bitconvert(v8i16 (neonopnode timm:$Imm,
1325 neon_mov_imm_LSLH_operand:$Simm))))),
1326 (INST8H VPR128:$src, neon_uimm8:$Imm,
1327 neon_mov_imm_LSLH_operand:$Simm)>;
1328 def : Pat<(v2i64 (opnode VPR128:$src,
1329 (bitconvert(v8i16 (neonopnode timm:$Imm,
1330 neon_mov_imm_LSLH_operand:$Simm))))),
1331 (INST8H VPR128:$src, neon_uimm8:$Imm,
1332 neon_mov_imm_LSLH_operand:$Simm)>;
1335 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1336 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1338 // Additional patterns for Vector Bitwise OR - immedidate
1339 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1342 // Vector Move Immediate Masked
1343 let isReMaterializable = 1 in {
1344 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1347 // Vector Move Inverted Immediate Masked
1348 let isReMaterializable = 1 in {
1349 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1352 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1353 Instruction inst, RegisterOperand VPRC>
1354 : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1355 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1357 // Aliases for Vector Move Immediate Shifted
1358 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1359 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1360 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1361 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1363 // Aliases for Vector Move Inverted Immediate Shifted
1364 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1365 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1366 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1367 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1369 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1370 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1371 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1372 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1373 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1375 // Aliases for Vector Bitwise OR - immedidate
1376 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1377 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1378 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1379 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1381 // Vector Move Immediate - per byte
1382 let isReMaterializable = 1 in {
1383 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1384 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1385 "movi\t$Rd.8b, $Imm",
1386 [(set (v8i8 VPR64:$Rd),
1387 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1392 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1393 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1394 "movi\t$Rd.16b, $Imm",
1395 [(set (v16i8 VPR128:$Rd),
1396 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1402 // Vector Move Immediate - bytemask, per double word
1403 let isReMaterializable = 1 in {
1404 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1405 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1406 "movi\t $Rd.2d, $Imm",
1407 [(set (v2i64 VPR128:$Rd),
1408 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1414 // Vector Move Immediate - bytemask, one doubleword
1416 let isReMaterializable = 1 in {
1417 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1418 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1420 [(set (f64 FPR64:$Rd),
1422 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1428 // Vector Floating Point Move Immediate
1430 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1431 Operand immOpType, bit q, bit op>
1432 : NeonI_1VModImm<q, op,
1433 (outs VPRC:$Rd), (ins immOpType:$Imm),
1434 "fmov\t$Rd" # asmlane # ", $Imm",
1435 [(set (OpTy VPRC:$Rd),
1436 (OpTy (Neon_fmovi (timm:$Imm))))],
1441 let isReMaterializable = 1 in {
1442 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1443 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1444 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1447 // Vector Shift (Immediate)
1448 // Immediate in [0, 63]
1449 def imm0_63 : Operand<i32> {
1450 let ParserMatchClass = uimm6_asmoperand;
1453 // Shift Right/Left Immediate - A shift immediate is encoded differently from
1454 // other shift immediates. The immh:immb field is encoded like so:
1457 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1458 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1459 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1460 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1462 // The shift right immediate amount, in the range 1 to element bits, is computed
1463 // as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
1464 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1466 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1467 let Name = "ShrImm" # OFFSET;
1468 let RenderMethod = "addImmOperands";
1469 let DiagnosticType = "ShrImm" # OFFSET;
1472 class shr_imm<string OFFSET> : Operand<i32> {
1473 let EncoderMethod = "getShiftRightImm" # OFFSET;
1474 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1475 let ParserMatchClass =
1476 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1479 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1480 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1481 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1482 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1484 def shr_imm8 : shr_imm<"8">;
1485 def shr_imm16 : shr_imm<"16">;
1486 def shr_imm32 : shr_imm<"32">;
1487 def shr_imm64 : shr_imm<"64">;
1489 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1490 let Name = "ShlImm" # OFFSET;
1491 let RenderMethod = "addImmOperands";
1492 let DiagnosticType = "ShlImm" # OFFSET;
1495 class shl_imm<string OFFSET> : Operand<i32> {
1496 let EncoderMethod = "getShiftLeftImm" # OFFSET;
1497 let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1498 let ParserMatchClass =
1499 !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1502 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1503 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1504 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1505 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1507 def shl_imm8 : shl_imm<"8">;
1508 def shl_imm16 : shl_imm<"16">;
1509 def shl_imm32 : shl_imm<"32">;
1510 def shl_imm64 : shl_imm<"64">;
1512 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1513 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1514 : NeonI_2VShiftImm<q, u, opcode,
1515 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1516 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1517 [(set (Ty VPRC:$Rd),
1518 (Ty (OpNode (Ty VPRC:$Rn),
1519 (Ty (Neon_vdup (i32 imm:$Imm))))))],
1522 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1523 // 64-bit vector types.
1524 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1525 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1528 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1529 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1532 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1533 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1536 // 128-bit vector types.
1537 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1538 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1541 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1542 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1545 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1546 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1549 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1550 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1554 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1555 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1557 let Inst{22-19} = 0b0001;
1560 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1562 let Inst{22-20} = 0b001;
1565 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1567 let Inst{22-21} = 0b01;
1570 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1572 let Inst{22-19} = 0b0001;
1575 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1577 let Inst{22-20} = 0b001;
1580 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1582 let Inst{22-21} = 0b01;
1585 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1592 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1595 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1596 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1598 def Neon_High16B : PatFrag<(ops node:$in),
1599 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1600 def Neon_High8H : PatFrag<(ops node:$in),
1601 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1602 def Neon_High4S : PatFrag<(ops node:$in),
1603 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1605 def Neon_low8H : PatFrag<(ops node:$in),
1606 (v4i16 (extract_subvector (v8i16 node:$in),
1608 def Neon_low4S : PatFrag<(ops node:$in),
1609 (v2i32 (extract_subvector (v4i32 node:$in),
1611 def Neon_low4f : PatFrag<(ops node:$in),
1612 (v2f32 (extract_subvector (v4f32 node:$in),
1615 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1616 string SrcT, ValueType DestTy, ValueType SrcTy,
1617 Operand ImmTy, SDPatternOperator ExtOp>
1618 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1619 (ins VPR64:$Rn, ImmTy:$Imm),
1620 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1621 [(set (DestTy VPR128:$Rd),
1623 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1624 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1627 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1628 string SrcT, ValueType DestTy, ValueType SrcTy,
1629 int StartIndex, Operand ImmTy,
1630 SDPatternOperator ExtOp, PatFrag getTop>
1631 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1632 (ins VPR128:$Rn, ImmTy:$Imm),
1633 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1634 [(set (DestTy VPR128:$Rd),
1637 (SrcTy (getTop VPR128:$Rn)))),
1638 (DestTy (Neon_vdup (i32 imm:$Imm))))))],
1641 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1643 // 64-bit vector types.
1644 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1646 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1649 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1651 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1654 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1656 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1659 // 128-bit vector types
1660 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1661 v8i16, v8i8, 8, uimm3, ExtOp, Neon_High16B> {
1662 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1665 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1666 v4i32, v4i16, 4, uimm4, ExtOp, Neon_High8H> {
1667 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1670 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1671 v2i64, v2i32, 2, uimm5, ExtOp, Neon_High4S> {
1672 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1675 // Use other patterns to match when the immediate is 0.
1676 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1677 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1679 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1680 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1682 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1683 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1685 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1686 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1688 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1689 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1691 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1692 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1696 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1697 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1699 // Rounding/Saturating shift
1700 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1701 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1702 SDPatternOperator OpNode>
1703 : NeonI_2VShiftImm<q, u, opcode,
1704 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1705 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1706 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1710 // shift right (vector by immediate)
1711 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1712 SDPatternOperator OpNode> {
1713 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1715 let Inst{22-19} = 0b0001;
1718 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1720 let Inst{22-20} = 0b001;
1723 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1725 let Inst{22-21} = 0b01;
1728 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1730 let Inst{22-19} = 0b0001;
1733 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1735 let Inst{22-20} = 0b001;
1738 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1740 let Inst{22-21} = 0b01;
1743 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1749 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1750 SDPatternOperator OpNode> {
1751 // 64-bit vector types.
1752 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1754 let Inst{22-19} = 0b0001;
1757 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1759 let Inst{22-20} = 0b001;
1762 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1764 let Inst{22-21} = 0b01;
1767 // 128-bit vector types.
1768 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1770 let Inst{22-19} = 0b0001;
1773 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1775 let Inst{22-20} = 0b001;
1778 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1780 let Inst{22-21} = 0b01;
1783 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1789 // Rounding shift right
1790 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1791 int_aarch64_neon_vsrshr>;
1792 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1793 int_aarch64_neon_vurshr>;
1795 // Saturating shift left unsigned
1796 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1798 // Saturating shift left
1799 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1800 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1802 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1803 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1805 : NeonI_2VShiftImm<q, u, opcode,
1806 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1807 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1808 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1809 (Ty (OpNode (Ty VPRC:$Rn),
1810 (Ty (Neon_vdup (i32 imm:$Imm))))))))],
1812 let Constraints = "$src = $Rd";
1815 // Shift Right accumulate
1816 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1817 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1819 let Inst{22-19} = 0b0001;
1822 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1824 let Inst{22-20} = 0b001;
1827 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1829 let Inst{22-21} = 0b01;
1832 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1834 let Inst{22-19} = 0b0001;
1837 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1839 let Inst{22-20} = 0b001;
1842 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1844 let Inst{22-21} = 0b01;
1847 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1853 // Shift right and accumulate
1854 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1855 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1857 // Rounding shift accumulate
1858 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1859 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1860 SDPatternOperator OpNode>
1861 : NeonI_2VShiftImm<q, u, opcode,
1862 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1863 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1864 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1865 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1867 let Constraints = "$src = $Rd";
1870 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1871 SDPatternOperator OpNode> {
1872 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1874 let Inst{22-19} = 0b0001;
1877 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1879 let Inst{22-20} = 0b001;
1882 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1884 let Inst{22-21} = 0b01;
1887 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1889 let Inst{22-19} = 0b0001;
1892 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1894 let Inst{22-20} = 0b001;
1897 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1899 let Inst{22-21} = 0b01;
1902 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1908 // Rounding shift right and accumulate
1909 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1910 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1912 // Shift insert by immediate
1913 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1914 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1915 SDPatternOperator OpNode>
1916 : NeonI_2VShiftImm<q, u, opcode,
1917 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1918 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1919 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1922 let Constraints = "$src = $Rd";
1925 // shift left insert (vector by immediate)
1926 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1927 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1928 int_aarch64_neon_vsli> {
1929 let Inst{22-19} = 0b0001;
1932 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1933 int_aarch64_neon_vsli> {
1934 let Inst{22-20} = 0b001;
1937 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1938 int_aarch64_neon_vsli> {
1939 let Inst{22-21} = 0b01;
1942 // 128-bit vector types
1943 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1944 int_aarch64_neon_vsli> {
1945 let Inst{22-19} = 0b0001;
1948 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1949 int_aarch64_neon_vsli> {
1950 let Inst{22-20} = 0b001;
1953 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1954 int_aarch64_neon_vsli> {
1955 let Inst{22-21} = 0b01;
1958 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1959 int_aarch64_neon_vsli> {
1964 // shift right insert (vector by immediate)
1965 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1966 // 64-bit vector types.
1967 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1968 int_aarch64_neon_vsri> {
1969 let Inst{22-19} = 0b0001;
1972 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1973 int_aarch64_neon_vsri> {
1974 let Inst{22-20} = 0b001;
1977 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1978 int_aarch64_neon_vsri> {
1979 let Inst{22-21} = 0b01;
1982 // 128-bit vector types
1983 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1984 int_aarch64_neon_vsri> {
1985 let Inst{22-19} = 0b0001;
1988 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1989 int_aarch64_neon_vsri> {
1990 let Inst{22-20} = 0b001;
1993 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1994 int_aarch64_neon_vsri> {
1995 let Inst{22-21} = 0b01;
1998 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1999 int_aarch64_neon_vsri> {
2004 // Shift left and insert
2005 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2007 // Shift right and insert
2008 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2010 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2011 string SrcT, Operand ImmTy>
2012 : NeonI_2VShiftImm<q, u, opcode,
2013 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2014 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2017 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2018 string SrcT, Operand ImmTy>
2019 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2020 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2021 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2023 let Constraints = "$src = $Rd";
2026 // left long shift by immediate
2027 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2028 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2029 let Inst{22-19} = 0b0001;
2032 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2033 let Inst{22-20} = 0b001;
2036 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2037 let Inst{22-21} = 0b01;
2040 // Shift Narrow High
2041 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2043 let Inst{22-19} = 0b0001;
2046 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2048 let Inst{22-20} = 0b001;
2051 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2053 let Inst{22-21} = 0b01;
2057 // Shift right narrow
2058 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2060 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2061 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2062 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2063 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2064 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2065 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2066 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2067 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2069 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2070 (v2i64 (concat_vectors (v1i64 node:$Rm),
2071 (v1i64 node:$Rn)))>;
2072 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2073 (v8i16 (concat_vectors (v4i16 node:$Rm),
2074 (v4i16 node:$Rn)))>;
2075 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2076 (v4i32 (concat_vectors (v2i32 node:$Rm),
2077 (v2i32 node:$Rn)))>;
2078 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2079 (v4f32 (concat_vectors (v2f32 node:$Rm),
2080 (v2f32 node:$Rn)))>;
2081 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2082 (v2f64 (concat_vectors (v1f64 node:$Rm),
2083 (v1f64 node:$Rn)))>;
2085 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2086 (v8i16 (srl (v8i16 node:$lhs),
2087 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2088 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2089 (v4i32 (srl (v4i32 node:$lhs),
2090 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2091 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2092 (v2i64 (srl (v2i64 node:$lhs),
2093 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2094 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2095 (v8i16 (sra (v8i16 node:$lhs),
2096 (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2097 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2098 (v4i32 (sra (v4i32 node:$lhs),
2099 (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2100 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2101 (v2i64 (sra (v2i64 node:$lhs),
2102 (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2104 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2105 multiclass Neon_shiftNarrow_patterns<string shr> {
2106 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2108 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2109 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2111 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2112 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2114 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2116 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2117 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2118 VPR128:$Rn, (i32 imm:$Imm))))))),
2119 (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2120 VPR128:$Rn, imm:$Imm)>;
2121 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2122 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2123 VPR128:$Rn, (i32 imm:$Imm))))))),
2124 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2125 VPR128:$Rn, imm:$Imm)>;
2126 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2127 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2128 VPR128:$Rn, (i32 imm:$Imm))))))),
2129 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2130 VPR128:$Rn, imm:$Imm)>;
2133 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2134 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2135 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2136 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2137 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2138 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2139 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2141 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2142 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2143 (!cast<Instruction>(prefix # "_16B")
2144 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2145 VPR128:$Rn, imm:$Imm)>;
2146 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2147 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2148 (!cast<Instruction>(prefix # "_8H")
2149 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2150 VPR128:$Rn, imm:$Imm)>;
2151 def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2152 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2153 (!cast<Instruction>(prefix # "_4S")
2154 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2155 VPR128:$Rn, imm:$Imm)>;
2158 defm : Neon_shiftNarrow_patterns<"lshr">;
2159 defm : Neon_shiftNarrow_patterns<"ashr">;
2161 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2162 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2163 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2164 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2165 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2166 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2167 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2169 // Convert fix-point and float-pointing
2170 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2171 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2172 Operand ImmTy, SDPatternOperator IntOp>
2173 : NeonI_2VShiftImm<q, u, opcode,
2174 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2175 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2176 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2180 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2181 SDPatternOperator IntOp> {
2182 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2184 let Inst{22-21} = 0b01;
2187 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2189 let Inst{22-21} = 0b01;
2192 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2198 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2199 SDPatternOperator IntOp> {
2200 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2202 let Inst{22-21} = 0b01;
2205 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2207 let Inst{22-21} = 0b01;
2210 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2216 // Convert fixed-point to floating-point
2217 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2218 int_arm_neon_vcvtfxs2fp>;
2219 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2220 int_arm_neon_vcvtfxu2fp>;
2222 // Convert floating-point to fixed-point
2223 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2224 int_arm_neon_vcvtfp2fxs>;
2225 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2226 int_arm_neon_vcvtfp2fxu>;
2228 multiclass Neon_sshll2_0<SDNode ext>
2230 def _v8i8 : PatFrag<(ops node:$Rn),
2231 (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2232 def _v4i16 : PatFrag<(ops node:$Rn),
2233 (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2234 def _v2i32 : PatFrag<(ops node:$Rn),
2235 (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2238 defm NI_sext_high : Neon_sshll2_0<sext>;
2239 defm NI_zext_high : Neon_sshll2_0<zext>;
2242 //===----------------------------------------------------------------------===//
2243 // Multiclasses for NeonI_Across
2244 //===----------------------------------------------------------------------===//
2248 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2249 string asmop, SDPatternOperator opnode>
2251 def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2252 (outs FPR16:$Rd), (ins VPR64:$Rn),
2253 asmop # "\t$Rd, $Rn.8b",
2254 [(set (v1i16 FPR16:$Rd),
2255 (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2258 def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2259 (outs FPR16:$Rd), (ins VPR128:$Rn),
2260 asmop # "\t$Rd, $Rn.16b",
2261 [(set (v1i16 FPR16:$Rd),
2262 (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2265 def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2266 (outs FPR32:$Rd), (ins VPR64:$Rn),
2267 asmop # "\t$Rd, $Rn.4h",
2268 [(set (v1i32 FPR32:$Rd),
2269 (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2272 def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2273 (outs FPR32:$Rd), (ins VPR128:$Rn),
2274 asmop # "\t$Rd, $Rn.8h",
2275 [(set (v1i32 FPR32:$Rd),
2276 (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2279 // _1d2s doesn't exist!
2281 def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2282 (outs FPR64:$Rd), (ins VPR128:$Rn),
2283 asmop # "\t$Rd, $Rn.4s",
2284 [(set (v1i64 FPR64:$Rd),
2285 (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2289 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2290 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2294 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2295 string asmop, SDPatternOperator opnode>
2297 def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
2298 (outs FPR8:$Rd), (ins VPR64:$Rn),
2299 asmop # "\t$Rd, $Rn.8b",
2300 [(set (v1i8 FPR8:$Rd),
2301 (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2304 def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2305 (outs FPR8:$Rd), (ins VPR128:$Rn),
2306 asmop # "\t$Rd, $Rn.16b",
2307 [(set (v1i8 FPR8:$Rd),
2308 (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2311 def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
2312 (outs FPR16:$Rd), (ins VPR64:$Rn),
2313 asmop # "\t$Rd, $Rn.4h",
2314 [(set (v1i16 FPR16:$Rd),
2315 (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2318 def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
2319 (outs FPR16:$Rd), (ins VPR128:$Rn),
2320 asmop # "\t$Rd, $Rn.8h",
2321 [(set (v1i16 FPR16:$Rd),
2322 (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2325 // _1s2s doesn't exist!
2327 def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
2328 (outs FPR32:$Rd), (ins VPR128:$Rn),
2329 asmop # "\t$Rd, $Rn.4s",
2330 [(set (v1i32 FPR32:$Rd),
2331 (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2335 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2336 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2338 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2339 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2341 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2345 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2346 string asmop, SDPatternOperator opnode>
2348 def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
2349 (outs FPR32:$Rd), (ins VPR128:$Rn),
2350 asmop # "\t$Rd, $Rn.4s",
2351 [(set (v1f32 FPR32:$Rd),
2352 (v1f32 (opnode (v4f32 VPR128:$Rn))))],
2356 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2357 int_aarch64_neon_vmaxnmv>;
2358 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2359 int_aarch64_neon_vminnmv>;
2361 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2362 int_aarch64_neon_vmaxv>;
2363 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2364 int_aarch64_neon_vminv>;
2366 // The followings are for instruction class (3V Diff)
2368 // normal long/long2 pattern
2369 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2370 string asmop, string ResS, string OpS,
2371 SDPatternOperator opnode, SDPatternOperator ext,
2372 RegisterOperand OpVPR,
2373 ValueType ResTy, ValueType OpTy>
2374 : NeonI_3VDiff<q, u, size, opcode,
2375 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2376 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2377 [(set (ResTy VPR128:$Rd),
2378 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2379 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2382 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2383 string asmop, SDPatternOperator opnode,
2386 let isCommutable = Commutable in {
2387 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2388 opnode, sext, VPR64, v8i16, v8i8>;
2389 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2390 opnode, sext, VPR64, v4i32, v4i16>;
2391 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2392 opnode, sext, VPR64, v2i64, v2i32>;
2396 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2397 string asmop, SDPatternOperator opnode,
2400 let isCommutable = Commutable in {
2401 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2402 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2403 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2404 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2405 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2406 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2410 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2411 string asmop, SDPatternOperator opnode,
2414 let isCommutable = Commutable in {
2415 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2416 opnode, zext, VPR64, v8i16, v8i8>;
2417 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2418 opnode, zext, VPR64, v4i32, v4i16>;
2419 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2420 opnode, zext, VPR64, v2i64, v2i32>;
2424 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2425 string asmop, SDPatternOperator opnode,
2428 let isCommutable = Commutable in {
2429 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2430 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2431 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2432 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2433 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2434 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2438 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2439 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2441 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2442 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2444 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2445 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2447 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2448 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2450 // normal wide/wide2 pattern
2451 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2452 string asmop, string ResS, string OpS,
2453 SDPatternOperator opnode, SDPatternOperator ext,
2454 RegisterOperand OpVPR,
2455 ValueType ResTy, ValueType OpTy>
2456 : NeonI_3VDiff<q, u, size, opcode,
2457 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2458 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2459 [(set (ResTy VPR128:$Rd),
2460 (ResTy (opnode (ResTy VPR128:$Rn),
2461 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2464 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2465 string asmop, SDPatternOperator opnode>
2467 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2468 opnode, sext, VPR64, v8i16, v8i8>;
2469 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2470 opnode, sext, VPR64, v4i32, v4i16>;
2471 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2472 opnode, sext, VPR64, v2i64, v2i32>;
2475 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2476 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2478 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2479 string asmop, SDPatternOperator opnode>
2481 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2482 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2483 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2484 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2485 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2486 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2489 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2490 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2492 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2493 string asmop, SDPatternOperator opnode>
2495 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2496 opnode, zext, VPR64, v8i16, v8i8>;
2497 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2498 opnode, zext, VPR64, v4i32, v4i16>;
2499 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2500 opnode, zext, VPR64, v2i64, v2i32>;
2503 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2504 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2506 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2507 string asmop, SDPatternOperator opnode>
2509 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2510 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2511 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2512 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2513 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2514 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2517 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2518 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2520 // Get the high half part of the vector element.
2521 multiclass NeonI_get_high
2523 def _8h : PatFrag<(ops node:$Rn),
2524 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2525 (v8i16 (Neon_vdup (i32 8)))))))>;
2526 def _4s : PatFrag<(ops node:$Rn),
2527 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2528 (v4i32 (Neon_vdup (i32 16)))))))>;
2529 def _2d : PatFrag<(ops node:$Rn),
2530 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2531 (v2i64 (Neon_vdup (i32 32)))))))>;
2534 defm NI_get_hi : NeonI_get_high;
2536 // pattern for addhn/subhn with 2 operands
2537 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2538 string asmop, string ResS, string OpS,
2539 SDPatternOperator opnode, SDPatternOperator get_hi,
2540 ValueType ResTy, ValueType OpTy>
2541 : NeonI_3VDiff<q, u, size, opcode,
2542 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2543 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2544 [(set (ResTy VPR64:$Rd),
2546 (OpTy (opnode (OpTy VPR128:$Rn),
2547 (OpTy VPR128:$Rm))))))],
2550 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2551 string asmop, SDPatternOperator opnode,
2554 let isCommutable = Commutable in {
2555 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2556 opnode, NI_get_hi_8h, v8i8, v8i16>;
2557 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2558 opnode, NI_get_hi_4s, v4i16, v4i32>;
2559 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2560 opnode, NI_get_hi_2d, v2i32, v2i64>;
2564 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2565 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2567 // pattern for operation with 2 operands
2568 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2569 string asmop, string ResS, string OpS,
2570 SDPatternOperator opnode,
2571 RegisterOperand ResVPR, RegisterOperand OpVPR,
2572 ValueType ResTy, ValueType OpTy>
2573 : NeonI_3VDiff<q, u, size, opcode,
2574 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2575 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2576 [(set (ResTy ResVPR:$Rd),
2577 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2580 // normal narrow pattern
2581 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2582 string asmop, SDPatternOperator opnode,
2585 let isCommutable = Commutable in {
2586 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2587 opnode, VPR64, VPR128, v8i8, v8i16>;
2588 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2589 opnode, VPR64, VPR128, v4i16, v4i32>;
2590 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2591 opnode, VPR64, VPR128, v2i32, v2i64>;
2595 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2596 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2598 // pattern for acle intrinsic with 3 operands
2599 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2600 string asmop, string ResS, string OpS>
2601 : NeonI_3VDiff<q, u, size, opcode,
2602 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2603 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2605 let Constraints = "$src = $Rd";
2606 let neverHasSideEffects = 1;
2609 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2611 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2612 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2613 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2616 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2617 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2619 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2620 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2622 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2624 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2625 SDPatternOperator coreop>
2626 : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2627 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2628 (SrcTy VPR128:$Rm)))))),
2629 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2630 VPR128:$Rn, VPR128:$Rm)>;
2633 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2634 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2635 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2636 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2637 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2638 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2641 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2642 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2643 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2644 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2645 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2646 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2649 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2650 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2651 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2654 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2655 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2656 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2658 // pattern that need to extend result
2659 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2660 string asmop, string ResS, string OpS,
2661 SDPatternOperator opnode,
2662 RegisterOperand OpVPR,
2663 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2664 : NeonI_3VDiff<q, u, size, opcode,
2665 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2666 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2667 [(set (ResTy VPR128:$Rd),
2668 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2669 (OpTy OpVPR:$Rm))))))],
2672 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2673 string asmop, SDPatternOperator opnode,
2676 let isCommutable = Commutable in {
2677 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2678 opnode, VPR64, v8i16, v8i8, v8i8>;
2679 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2680 opnode, VPR64, v4i32, v4i16, v4i16>;
2681 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2682 opnode, VPR64, v2i64, v2i32, v2i32>;
2686 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2687 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2689 multiclass NeonI_Op_High<SDPatternOperator op>
2691 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2692 (op (v8i8 (Neon_High16B node:$Rn)), (v8i8 (Neon_High16B node:$Rm)))>;
2693 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2694 (op (v4i16 (Neon_High8H node:$Rn)), (v4i16 (Neon_High8H node:$Rm)))>;
2695 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2696 (op (v2i32 (Neon_High4S node:$Rn)), (v2i32 (Neon_High4S node:$Rm)))>;
2700 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2701 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2702 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2703 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2704 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2705 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2707 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2708 string asmop, string opnode,
2711 let isCommutable = Commutable in {
2712 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2713 !cast<PatFrag>(opnode # "_16B"),
2714 VPR128, v8i16, v16i8, v8i8>;
2715 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2716 !cast<PatFrag>(opnode # "_8H"),
2717 VPR128, v4i32, v8i16, v4i16>;
2718 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2719 !cast<PatFrag>(opnode # "_4S"),
2720 VPR128, v2i64, v4i32, v2i32>;
2724 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2725 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2727 // For pattern that need two operators being chained.
2728 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2729 string asmop, string ResS, string OpS,
2730 SDPatternOperator opnode, SDPatternOperator subop,
2731 RegisterOperand OpVPR,
2732 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2733 : NeonI_3VDiff<q, u, size, opcode,
2734 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2735 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2736 [(set (ResTy VPR128:$Rd),
2738 (ResTy VPR128:$src),
2739 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2740 (OpTy OpVPR:$Rm))))))))],
2742 let Constraints = "$src = $Rd";
2745 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2746 string asmop, SDPatternOperator opnode,
2747 SDPatternOperator subop>
2749 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2750 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2751 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2752 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2753 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2754 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2757 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2758 add, int_arm_neon_vabds>;
2759 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2760 add, int_arm_neon_vabdu>;
2762 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2763 string asmop, SDPatternOperator opnode,
2766 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2767 opnode, !cast<PatFrag>(subop # "_16B"),
2768 VPR128, v8i16, v16i8, v8i8>;
2769 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2770 opnode, !cast<PatFrag>(subop # "_8H"),
2771 VPR128, v4i32, v8i16, v4i16>;
2772 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2773 opnode, !cast<PatFrag>(subop # "_4S"),
2774 VPR128, v2i64, v4i32, v2i32>;
2777 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2779 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2782 // Long pattern with 2 operands
2783 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2784 string asmop, SDPatternOperator opnode,
2787 let isCommutable = Commutable in {
2788 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2789 opnode, VPR128, VPR64, v8i16, v8i8>;
2790 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2791 opnode, VPR128, VPR64, v4i32, v4i16>;
2792 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2793 opnode, VPR128, VPR64, v2i64, v2i32>;
2797 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2798 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2800 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2801 string asmop, string ResS, string OpS,
2802 SDPatternOperator opnode,
2803 ValueType ResTy, ValueType OpTy>
2804 : NeonI_3VDiff<q, u, size, opcode,
2805 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2806 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2807 [(set (ResTy VPR128:$Rd),
2808 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2812 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2817 let isCommutable = Commutable in {
2818 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2819 !cast<PatFrag>(opnode # "_16B"),
2821 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2822 !cast<PatFrag>(opnode # "_8H"),
2824 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2825 !cast<PatFrag>(opnode # "_4S"),
2830 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2832 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2835 // Long pattern with 3 operands
2836 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2837 string asmop, string ResS, string OpS,
2838 SDPatternOperator opnode,
2839 ValueType ResTy, ValueType OpTy>
2840 : NeonI_3VDiff<q, u, size, opcode,
2841 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2842 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2843 [(set (ResTy VPR128:$Rd),
2845 (ResTy VPR128:$src),
2846 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2848 let Constraints = "$src = $Rd";
2851 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2852 string asmop, SDPatternOperator opnode>
2854 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2855 opnode, v8i16, v8i8>;
2856 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2857 opnode, v4i32, v4i16>;
2858 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2859 opnode, v2i64, v2i32>;
2862 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2864 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2866 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2868 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2870 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2872 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2874 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2876 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2878 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2879 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2881 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2882 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2884 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2885 string asmop, string ResS, string OpS,
2886 SDPatternOperator subop, SDPatternOperator opnode,
2887 RegisterOperand OpVPR,
2888 ValueType ResTy, ValueType OpTy>
2889 : NeonI_3VDiff<q, u, size, opcode,
2890 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2891 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2892 [(set (ResTy VPR128:$Rd),
2894 (ResTy VPR128:$src),
2895 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2897 let Constraints = "$src = $Rd";
2900 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2902 SDPatternOperator subop,
2905 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2906 subop, !cast<PatFrag>(opnode # "_16B"),
2907 VPR128, v8i16, v16i8>;
2908 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2909 subop, !cast<PatFrag>(opnode # "_8H"),
2910 VPR128, v4i32, v8i16>;
2911 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2912 subop, !cast<PatFrag>(opnode # "_4S"),
2913 VPR128, v2i64, v4i32>;
2916 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2917 add, "NI_smull_hi">;
2918 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2919 add, "NI_umull_hi">;
2921 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2922 sub, "NI_smull_hi">;
2923 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2924 sub, "NI_umull_hi">;
2926 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2927 string asmop, SDPatternOperator opnode>
2929 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2930 opnode, int_arm_neon_vqdmull,
2931 VPR64, v4i32, v4i16>;
2932 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2933 opnode, int_arm_neon_vqdmull,
2934 VPR64, v2i64, v2i32>;
2937 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2938 int_arm_neon_vqadds>;
2939 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2940 int_arm_neon_vqsubs>;
2942 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2943 string asmop, SDPatternOperator opnode,
2946 let isCommutable = Commutable in {
2947 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2948 opnode, VPR128, VPR64, v4i32, v4i16>;
2949 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2950 opnode, VPR128, VPR64, v2i64, v2i32>;
2954 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2955 int_arm_neon_vqdmull, 1>;
2957 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2962 let isCommutable = Commutable in {
2963 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2964 !cast<PatFrag>(opnode # "_8H"),
2966 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2967 !cast<PatFrag>(opnode # "_4S"),
2972 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2975 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2977 SDPatternOperator opnode>
2979 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2980 opnode, NI_qdmull_hi_8H,
2981 VPR128, v4i32, v8i16>;
2982 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2983 opnode, NI_qdmull_hi_4S,
2984 VPR128, v2i64, v4i32>;
2987 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2988 int_arm_neon_vqadds>;
2989 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2990 int_arm_neon_vqsubs>;
2992 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2993 string asmop, SDPatternOperator opnode,
2996 let isCommutable = Commutable in {
2997 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2998 opnode, VPR128, VPR64, v8i16, v8i8>;
3002 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
3004 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
3009 let isCommutable = Commutable in {
3010 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3011 !cast<PatFrag>(opnode # "_16B"),
3016 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
3019 // End of implementation for instruction class (3V Diff)
3021 // The followings are vector load/store multiple N-element structure
3022 // (class SIMD lselem).
3024 // ld1: load multiple 1-element structure to 1/2/3/4 registers.
3025 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3026 // The structure consists of a sequence of sets of N values.
3027 // The first element of the structure is placed in the first lane
3028 // of the first first vector, the second element in the first lane
3029 // of the second vector, and so on.
3030 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3031 // the three 64-bit vectors list {BA, DC, FE}.
3032 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3033 // 64-bit vectors list {DA, EB, FC}.
3034 // Store instructions store multiple structure to N registers like load.
3037 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3038 RegisterOperand VecList, string asmop>
3039 : NeonI_LdStMult<q, 1, opcode, size,
3040 (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3041 asmop # "\t$Rt, [$Rn]",
3045 let neverHasSideEffects = 1;
3048 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3049 def _8B : NeonI_LDVList<0, opcode, 0b00,
3050 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3052 def _4H : NeonI_LDVList<0, opcode, 0b01,
3053 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3055 def _2S : NeonI_LDVList<0, opcode, 0b10,
3056 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3058 def _16B : NeonI_LDVList<1, opcode, 0b00,
3059 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3061 def _8H : NeonI_LDVList<1, opcode, 0b01,
3062 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3064 def _4S : NeonI_LDVList<1, opcode, 0b10,
3065 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3067 def _2D : NeonI_LDVList<1, opcode, 0b11,
3068 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3071 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3072 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3073 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3075 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3077 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3079 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3081 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3082 defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">;
3083 def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3085 defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3086 def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3088 defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3089 def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3091 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3092 RegisterOperand VecList, string asmop>
3093 : NeonI_LdStMult<q, 0, opcode, size,
3094 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3095 asmop # "\t$Rt, [$Rn]",
3099 let neverHasSideEffects = 1;
3102 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3103 def _8B : NeonI_STVList<0, opcode, 0b00,
3104 !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3106 def _4H : NeonI_STVList<0, opcode, 0b01,
3107 !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3109 def _2S : NeonI_STVList<0, opcode, 0b10,
3110 !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3112 def _16B : NeonI_STVList<1, opcode, 0b00,
3113 !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3115 def _8H : NeonI_STVList<1, opcode, 0b01,
3116 !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3118 def _4S : NeonI_STVList<1, opcode, 0b10,
3119 !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3121 def _2D : NeonI_STVList<1, opcode, 0b11,
3122 !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3125 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3126 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3127 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3129 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3131 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3133 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3135 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3136 defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">;
3137 def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3139 defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">;
3140 def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3142 defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">;
3143 def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3145 // End of vector load/store multiple N-element structure(class SIMD lselem)
3147 // Scalar Three Same
3149 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
3150 : NeonI_Scalar3Same<u, 0b11, opcode,
3151 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3152 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3156 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode,
3157 string asmop, bit Commutable = 0>
3159 let isCommutable = Commutable in {
3160 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3161 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3162 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3165 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3166 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3167 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3173 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
3174 string asmop, bit Commutable = 0>
3176 let isCommutable = Commutable in {
3177 def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode,
3178 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3179 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3182 def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode,
3183 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3184 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3190 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
3191 string asmop, bit Commutable = 0>
3193 let isCommutable = Commutable in {
3194 def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
3195 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
3196 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3199 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
3200 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3201 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3204 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
3205 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3206 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3209 def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
3210 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
3211 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3217 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
3218 Instruction INSTD> {
3219 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3220 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3223 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
3228 : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
3229 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
3230 (INSTB FPR8:$Rn, FPR8:$Rm)>;
3232 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3233 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3235 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3236 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3239 class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
3241 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
3242 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3244 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
3246 Instruction INSTS> {
3247 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3248 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3249 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3250 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3253 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
3255 Instruction INSTD> {
3256 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3257 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3258 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3259 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3262 multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
3264 Instruction INSTD> {
3265 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
3266 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3267 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
3268 (INSTD FPR64:$Rn, FPR64:$Rm)>;
3271 // Scalar Three Different
3273 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
3274 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3275 (outs FPR32:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
3276 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3279 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3280 (outs FPR64:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
3281 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3286 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
3287 let Constraints = "$Src = $Rd" in {
3288 def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
3289 (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
3290 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3293 def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
3294 (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
3295 !strconcat(asmop, " $Rd, $Rn, $Rm"),
3301 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
3303 Instruction INSTS> {
3304 def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3305 (INSTH FPR16:$Rn, FPR16:$Rm)>;
3306 def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3307 (INSTS FPR32:$Rn, FPR32:$Rm)>;
3310 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
3312 Instruction INSTS> {
3313 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
3314 (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
3315 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
3316 (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
3319 // Scalar Two Registers Miscellaneous
3321 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
3323 def ss : NeonI_Scalar2SameMisc<u, {size_high, 0b0}, opcode,
3324 (outs FPR32:$Rd), (ins FPR32:$Rn),
3325 !strconcat(asmop, " $Rd, $Rn"),
3327 def dd : NeonI_Scalar2SameMisc<u, {size_high, 0b1}, opcode,
3328 (outs FPR64:$Rd), (ins FPR64:$Rn),
3329 !strconcat(asmop, " $Rd, $Rn"),
3333 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
3334 def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
3335 (outs FPR64:$Rd), (ins FPR64:$Rn),
3336 !strconcat(asmop, " $Rd, $Rn"),
3340 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
3341 : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
3342 def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3343 (outs FPR8:$Rd), (ins FPR8:$Rn),
3344 !strconcat(asmop, " $Rd, $Rn"),
3346 def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3347 (outs FPR16:$Rd), (ins FPR16:$Rn),
3348 !strconcat(asmop, " $Rd, $Rn"),
3350 def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3351 (outs FPR32:$Rd), (ins FPR32:$Rn),
3352 !strconcat(asmop, " $Rd, $Rn"),
3356 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
3358 def bh : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3359 (outs FPR8:$Rd), (ins FPR16:$Rn),
3360 !strconcat(asmop, " $Rd, $Rn"),
3362 def hs : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3363 (outs FPR16:$Rd), (ins FPR32:$Rn),
3364 !strconcat(asmop, " $Rd, $Rn"),
3366 def sd : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3367 (outs FPR32:$Rd), (ins FPR64:$Rn),
3368 !strconcat(asmop, " $Rd, $Rn"),
3372 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
3375 let Constraints = "$Src = $Rd" in {
3376 def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
3377 (outs FPR8:$Rd), (ins FPR8:$Src, FPR8:$Rn),
3378 !strconcat(asmop, " $Rd, $Rn"),
3380 def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
3381 (outs FPR16:$Rd), (ins FPR16:$Src, FPR16:$Rn),
3382 !strconcat(asmop, " $Rd, $Rn"),
3384 def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3385 (outs FPR32:$Rd), (ins FPR32:$Src, FPR32:$Rn),
3386 !strconcat(asmop, " $Rd, $Rn"),
3388 def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
3389 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn),
3390 !strconcat(asmop, " $Rd, $Rn"),
3395 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
3396 SDPatternOperator Dopnode,
3398 Instruction INSTD> {
3399 def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))),
3401 def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))),
3405 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
3407 Instruction INSTD> {
3408 def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
3410 def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
3414 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
3415 : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3416 (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
3417 !strconcat(asmop, " $Rd, $Rn, $Imm"),
3421 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
3423 def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
3424 (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
3425 !strconcat(asmop, " $Rd, $Rn, $FPImm"),
3428 def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
3429 (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm),
3430 !strconcat(asmop, " $Rd, $Rn, $FPImm"),
3435 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
3437 : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
3438 (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
3439 (INSTD FPR64:$Rn, 0)>;
3441 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
3443 Instruction INSTD> {
3444 def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
3445 (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))),
3446 (INSTS FPR32:$Rn, fpimm:$FPImm)>;
3447 def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
3448 (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))),
3449 (INSTD FPR64:$Rn, 0)>;
3452 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
3453 Instruction INSTD> {
3454 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
3458 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
3463 : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
3464 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
3466 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
3468 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
3472 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
3473 SDPatternOperator opnode,
3476 Instruction INSTD> {
3477 def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
3479 def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
3481 def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
3486 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
3487 SDPatternOperator opnode,
3491 Instruction INSTD> {
3492 def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
3493 (INSTB FPR8:$Src, FPR8:$Rn)>;
3494 def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
3495 (INSTH FPR16:$Src, FPR16:$Rn)>;
3496 def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
3497 (INSTS FPR32:$Src, FPR32:$Rn)>;
3498 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
3499 (INSTD FPR64:$Src, FPR64:$Rn)>;
3502 // Scalar Shift By Immediate
3504 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
3505 RegisterClass FPRC, Operand ImmTy>
3506 : NeonI_ScalarShiftImm<u, opcode,
3507 (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
3508 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3511 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
3513 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3515 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3516 let Inst{21-16} = Imm;
3520 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
3522 : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
3523 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
3525 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3526 let Inst{18-16} = Imm;
3528 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
3530 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3531 let Inst{19-16} = Imm;
3533 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3535 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3536 let Inst{20-16} = Imm;
3540 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
3542 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
3544 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3545 let Inst{21-16} = Imm;
3549 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
3551 : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
3552 def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
3554 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3555 let Inst{18-16} = Imm;
3557 def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
3559 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3560 let Inst{19-16} = Imm;
3562 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
3564 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3565 let Inst{20-16} = Imm;
3569 class NeonI_ScalarShiftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
3570 : NeonI_ScalarShiftImm<u, opcode,
3571 (outs FPR64:$Rd), (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
3572 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3575 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3576 let Inst{21-16} = Imm;
3577 let Constraints = "$Src = $Rd";
3580 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
3581 RegisterClass FPRCD, RegisterClass FPRCS,
3583 : NeonI_ScalarShiftImm<u, opcode,
3584 (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
3585 !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
3588 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
3590 def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
3593 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
3594 let Inst{18-16} = Imm;
3596 def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
3599 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
3600 let Inst{19-16} = Imm;
3602 def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
3605 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3606 let Inst{20-16} = Imm;
3610 multiclass NeonI_ScalarShiftImm_scvtf_SD_size<bit u, bits<5> opcode, string asmop> {
3611 def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
3613 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
3614 let Inst{20-16} = Imm;
3616 def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
3618 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
3619 let Inst{21-16} = Imm;
3623 multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode,
3624 Instruction INSTD> {
3625 def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3626 (INSTD FPR64:$Rn, imm:$Imm)>;
3629 multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode,
3634 : Neon_ScalarShiftImm_D_size_patterns<opnode, INSTD> {
3635 def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 imm:$Imm))),
3636 (INSTB FPR8:$Rn, imm:$Imm)>;
3637 def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3638 (INSTH FPR16:$Rn, imm:$Imm)>;
3639 def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3640 (INSTS FPR32:$Rn, imm:$Imm)>;
3643 class Neon_ScalarShiftImm_accum_D_size_patterns<SDPatternOperator opnode,
3645 : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3646 (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
3648 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
3649 SDPatternOperator opnode,
3652 Instruction INSTD> {
3653 def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 imm:$Imm))),
3654 (INSTH FPR16:$Rn, imm:$Imm)>;
3655 def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3656 (INSTS FPR32:$Rn, imm:$Imm)>;
3657 def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3658 (INSTD FPR64:$Rn, imm:$Imm)>;
3661 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
3662 SDPatternOperator Dopnode,
3664 Instruction INSTD> {
3665 def ssi : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn), (i32 imm:$Imm))),
3666 (INSTS FPR32:$Rn, imm:$Imm)>;
3667 def ddi : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn), (i32 imm:$Imm))),
3668 (INSTD FPR64:$Rn, imm:$Imm)>;
3671 // Scalar Signed Shift Right (Immediate)
3672 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
3673 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
3675 // Scalar Unsigned Shift Right (Immediate)
3676 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
3677 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
3679 // Scalar Signed Rounding Shift Right (Immediate)
3680 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
3681 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrds_n, SRSHRddi>;
3683 // Scalar Unigned Rounding Shift Right (Immediate)
3684 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
3685 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrdu_n, URSHRddi>;
3687 // Scalar Signed Shift Right and Accumulate (Immediate)
3688 def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">;
3689 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrads_n, SSRA>;
3691 // Scalar Unsigned Shift Right and Accumulate (Immediate)
3692 def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">;
3693 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsradu_n, USRA>;
3695 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
3696 def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">;
3697 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsrads_n, SRSRA>;
3699 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
3700 def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">;
3701 def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSRA>;
3703 // Scalar Shift Left (Immediate)
3704 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
3705 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
3707 // Signed Saturating Shift Left (Immediate)
3708 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
3709 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
3711 SQSHLssi, SQSHLddi>;
3713 // Unsigned Saturating Shift Left (Immediate)
3714 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
3715 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
3717 UQSHLssi, UQSHLddi>;
3719 // Signed Saturating Shift Left Unsigned (Immediate)
3720 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
3721 defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlus_n,
3722 SQSHLUbbi, SQSHLUhhi,
3723 SQSHLUssi, SQSHLUddi>;
3725 // Shift Right And Insert (Immediate)
3726 defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">;
3727 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrid_n, SRIddi>;
3729 // Shift Left And Insert (Immediate)
3730 defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">;
3731 defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vslid_n, SLIddi>;
3733 // Signed Saturating Shift Right Narrow (Immediate)
3734 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
3735 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
3736 SQSHRNbhi, SQSHRNhsi,
3739 // Unsigned Saturating Shift Right Narrow (Immediate)
3740 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
3741 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
3742 UQSHRNbhi, UQSHRNhsi,
3745 // Signed Saturating Rounded Shift Right Narrow (Immediate)
3746 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
3747 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
3748 SQRSHRNbhi, SQRSHRNhsi,
3751 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
3752 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
3753 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
3754 UQRSHRNbhi, UQRSHRNhsi,
3757 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
3758 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
3759 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
3760 SQSHRUNbhi, SQSHRUNhsi,
3763 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
3764 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
3765 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
3766 SQRSHRUNbhi, SQRSHRUNhsi,
3769 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
3770 defm SCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b0, 0b11100, "scvtf">;
3771 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
3772 int_aarch64_neon_vcvtf64_n_s64,
3773 SCVTF_Nssi, SCVTF_Nddi>;
3775 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
3776 defm UCVTF_N : NeonI_ScalarShiftImm_scvtf_SD_size<0b1, 0b11100, "ucvtf">;
3777 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
3778 int_aarch64_neon_vcvtf64_n_u64,
3779 UCVTF_Nssi, UCVTF_Nddi>;
3781 // Scalar Integer Add
3782 let isCommutable = 1 in {
3783 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
3786 // Scalar Integer Sub
3787 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
3789 // Pattern for Scalar Integer Add and Sub with D register only
3790 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
3791 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
3793 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
3794 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
3795 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
3796 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
3797 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
3799 // Scalar Integer Saturating Add (Signed, Unsigned)
3800 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
3801 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
3803 // Scalar Integer Saturating Sub (Signed, Unsigned)
3804 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
3805 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
3807 // Patterns to match llvm.arm.* intrinsic for
3808 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3809 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
3810 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
3811 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
3812 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
3814 // Patterns to match llvm.aarch64.* intrinsic for
3815 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
3816 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
3817 SQADDhhh, SQADDsss, SQADDddd>;
3818 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
3819 UQADDhhh, UQADDsss, UQADDddd>;
3820 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
3821 SQSUBhhh, SQSUBsss, SQSUBddd>;
3822 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
3823 UQSUBhhh, UQSUBsss, UQSUBddd>;
3825 // Scalar Integer Saturating Doubling Multiply Half High
3826 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
3828 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3829 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
3831 // Patterns to match llvm.arm.* intrinsic for
3832 // Scalar Integer Saturating Doubling Multiply Half High and
3833 // Scalar Integer Saturating Rounding Doubling Multiply Half High
3834 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
3836 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
3839 // Scalar Floating-point Multiply Extended
3840 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
3842 // Scalar Floating-point Reciprocal Step
3843 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
3845 // Scalar Floating-point Reciprocal Square Root Step
3846 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
3848 // Patterns to match llvm.arm.* intrinsic for
3849 // Scalar Floating-point Reciprocal Step and
3850 // Scalar Floating-point Reciprocal Square Root Step
3851 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
3853 defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
3856 // Patterns to match llvm.aarch64.* intrinsic for
3857 // Scalar Floating-point Multiply Extended,
3858 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
3861 // Scalar Integer Shift Left (Signed, Unsigned)
3862 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
3863 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
3865 // Patterns to match llvm.arm.* intrinsic for
3866 // Scalar Integer Shift Left (Signed, Unsigned)
3867 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
3868 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
3870 // Patterns to match llvm.aarch64.* intrinsic for
3871 // Scalar Integer Shift Left (Signed, Unsigned)
3872 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
3873 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
3875 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
3876 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
3877 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
3879 // Patterns to match llvm.aarch64.* intrinsic for
3880 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3881 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
3882 SQSHLhhh, SQSHLsss, SQSHLddd>;
3883 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
3884 UQSHLhhh, UQSHLsss, UQSHLddd>;
3886 // Patterns to match llvm.arm.* intrinsic for
3887 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
3888 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
3889 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
3891 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3892 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
3893 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
3895 // Patterns to match llvm.aarch64.* intrinsic for
3896 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3897 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
3898 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
3900 // Patterns to match llvm.arm.* intrinsic for
3901 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
3902 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
3903 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
3905 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3906 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
3907 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
3909 // Patterns to match llvm.aarch64.* intrinsic for
3910 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3911 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
3912 SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
3913 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
3914 UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
3916 // Patterns to match llvm.arm.* intrinsic for
3917 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
3918 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
3919 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
3921 // Signed Saturating Doubling Multiply-Add Long
3922 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
3923 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
3924 SQDMLALshh, SQDMLALdss>;
3926 // Signed Saturating Doubling Multiply-Subtract Long
3927 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
3928 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
3929 SQDMLSLshh, SQDMLSLdss>;
3931 // Signed Saturating Doubling Multiply Long
3932 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
3933 defm : Neon_Scalar3Diff_HS_size_patterns<int_aarch64_neon_vqdmull,
3934 SQDMULLshh, SQDMULLdss>;
3936 // Scalar Signed Integer Convert To Floating-point
3937 defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
3938 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
3939 int_aarch64_neon_vcvtf64_s64,
3942 // Scalar Unsigned Integer Convert To Floating-point
3943 defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
3944 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
3945 int_aarch64_neon_vcvtf64_u64,
3948 // Scalar Floating-point Reciprocal Estimate
3949 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
3950 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
3951 FRECPEss, FRECPEdd>;
3953 // Scalar Floating-point Reciprocal Exponent
3954 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
3955 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
3956 FRECPXss, FRECPXdd>;
3958 // Scalar Floating-point Reciprocal Square Root Estimate
3959 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
3960 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
3961 FRSQRTEss, FRSQRTEdd>;
3963 // Scalar Integer Compare
3965 // Scalar Compare Bitwise Equal
3966 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
3967 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
3969 // Scalar Compare Signed Greather Than Or Equal
3970 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
3971 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
3973 // Scalar Compare Unsigned Higher Or Same
3974 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
3975 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
3977 // Scalar Compare Unsigned Higher
3978 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
3979 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
3981 // Scalar Compare Signed Greater Than
3982 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
3983 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
3985 // Scalar Compare Bitwise Test Bits
3986 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
3987 def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
3989 // Scalar Compare Bitwise Equal To Zero
3990 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
3991 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
3994 // Scalar Compare Signed Greather Than Or Equal To Zero
3995 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
3996 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
3999 // Scalar Compare Signed Greater Than Zero
4000 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
4001 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
4004 // Scalar Compare Signed Less Than Or Equal To Zero
4005 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
4006 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
4009 // Scalar Compare Less Than Zero
4010 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
4011 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
4014 // Scalar Floating-point Compare
4016 // Scalar Floating-point Compare Mask Equal
4017 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
4018 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
4019 FCMEQsss, FCMEQddd>;
4021 // Scalar Floating-point Compare Mask Equal To Zero
4022 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
4023 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
4024 FCMEQZssi, FCMEQZddi>;
4026 // Scalar Floating-point Compare Mask Greater Than Or Equal
4027 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
4028 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
4029 FCMGEsss, FCMGEddd>;
4031 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
4032 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
4033 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
4034 FCMGEZssi, FCMGEZddi>;
4036 // Scalar Floating-point Compare Mask Greather Than
4037 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
4038 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
4039 FCMGTsss, FCMGTddd>;
4041 // Scalar Floating-point Compare Mask Greather Than Zero
4042 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
4043 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
4044 FCMGTZssi, FCMGTZddi>;
4046 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
4047 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
4048 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
4049 FCMLEZssi, FCMLEZddi>;
4051 // Scalar Floating-point Compare Mask Less Than Zero
4052 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
4053 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
4054 FCMLTZssi, FCMLTZddi>;
4056 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
4057 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
4058 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
4059 FACGEsss, FACGEddd>;
4061 // Scalar Floating-point Absolute Compare Mask Greater Than
4062 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
4063 defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
4064 FACGTsss, FACGTddd>;
4066 // Scalar Absolute Value
4067 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
4068 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
4070 // Scalar Signed Saturating Absolute Value
4071 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
4072 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
4073 SQABSbb, SQABShh, SQABSss, SQABSdd>;
4076 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
4077 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
4079 // Scalar Signed Saturating Negate
4080 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
4081 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
4082 SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
4084 // Scalar Signed Saturating Accumulated of Unsigned Value
4085 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
4086 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
4088 SUQADDss, SUQADDdd>;
4090 // Scalar Unsigned Saturating Accumulated of Signed Value
4091 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
4092 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
4094 USQADDss, USQADDdd>;
4096 // Scalar Signed Saturating Extract Unsigned Narrow
4097 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
4098 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
4102 // Scalar Signed Saturating Extract Narrow
4103 defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
4104 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
4108 // Scalar Unsigned Saturating Extract Narrow
4109 defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
4110 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
4114 // Scalar Reduce Pairwise
4116 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
4117 string asmop, bit Commutable = 0> {
4118 let isCommutable = Commutable in {
4119 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
4120 (outs FPR64:$Rd), (ins VPR128:$Rn),
4121 !strconcat(asmop, " $Rd, $Rn.2d"),
4127 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
4128 string asmop, bit Commutable = 0>
4129 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
4130 let isCommutable = Commutable in {
4131 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
4132 (outs FPR32:$Rd), (ins VPR64:$Rn),
4133 !strconcat(asmop, " $Rd, $Rn.2s"),
4139 // Scalar Reduce Addition Pairwise (Integer) with
4140 // Pattern to match llvm.arm.* intrinsic
4141 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
4143 // Pattern to match llvm.aarch64.* intrinsic for
4144 // Scalar Reduce Addition Pairwise (Integer)
4145 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
4146 (ADDPvv_D_2D VPR128:$Rn)>;
4148 // Scalar Reduce Addition Pairwise (Floating Point)
4149 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
4151 // Scalar Reduce Maximum Pairwise (Floating Point)
4152 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
4154 // Scalar Reduce Minimum Pairwise (Floating Point)
4155 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
4157 // Scalar Reduce maxNum Pairwise (Floating Point)
4158 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
4160 // Scalar Reduce minNum Pairwise (Floating Point)
4161 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
4163 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
4164 SDPatternOperator opnodeD,
4166 Instruction INSTD> {
4167 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
4169 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
4170 (INSTD VPR128:$Rn)>;
4173 // Patterns to match llvm.aarch64.* intrinsic for
4174 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
4175 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
4176 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
4178 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
4179 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
4181 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
4182 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
4184 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
4185 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
4187 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
4188 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
4192 //===----------------------------------------------------------------------===//
4193 // Non-Instruction Patterns
4194 //===----------------------------------------------------------------------===//
4196 // 64-bit vector bitcasts...
4198 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
4199 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
4200 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
4201 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
4203 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
4204 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
4205 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
4206 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
4208 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
4209 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
4210 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
4211 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
4213 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
4214 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
4215 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
4216 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
4218 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
4219 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4220 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4221 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4223 // ..and 128-bit vector bitcasts...
4225 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
4226 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
4227 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
4228 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
4229 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
4231 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
4232 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
4233 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
4234 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
4235 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
4237 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
4238 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
4239 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
4240 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
4241 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
4243 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
4244 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
4245 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
4246 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
4247 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
4249 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
4250 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
4251 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
4252 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
4253 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
4255 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
4256 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
4257 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
4258 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
4259 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
4262 // ...and scalar bitcasts...
4263 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
4264 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
4265 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
4266 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
4267 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
4269 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
4270 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
4272 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
4273 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
4274 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
4276 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
4277 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
4278 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
4279 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
4280 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
4282 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
4283 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
4284 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
4285 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
4286 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
4287 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
4289 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
4290 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
4291 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4292 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
4293 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
4295 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
4296 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
4298 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
4299 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
4300 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
4301 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
4302 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
4304 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
4305 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
4306 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
4307 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
4308 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
4309 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
4311 def neon_uimm0_bare : Operand<i64>,
4312 ImmLeaf<i64, [{return Imm == 0;}]> {
4313 let ParserMatchClass = neon_uimm0_asmoperand;
4314 let PrintMethod = "printNeonUImm8OperandBare";
4317 def neon_uimm1_bare : Operand<i64>,
4318 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4319 let ParserMatchClass = neon_uimm1_asmoperand;
4320 let PrintMethod = "printNeonUImm8OperandBare";
4323 def neon_uimm2_bare : Operand<i64>,
4324 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4325 let ParserMatchClass = neon_uimm2_asmoperand;
4326 let PrintMethod = "printNeonUImm8OperandBare";
4329 def neon_uimm3_bare : Operand<i64>,
4330 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4331 let ParserMatchClass = uimm3_asmoperand;
4332 let PrintMethod = "printNeonUImm8OperandBare";
4335 def neon_uimm4_bare : Operand<i64>,
4336 ImmLeaf<i64, [{(void)Imm; return true;}]> {
4337 let ParserMatchClass = uimm4_asmoperand;
4338 let PrintMethod = "printNeonUImm8OperandBare";
4341 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
4342 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
4343 : NeonI_copy<0b1, 0b0, 0b0011,
4344 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
4345 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
4346 [(set (ResTy VPR128:$Rd),
4347 (ResTy (vector_insert
4348 (ResTy VPR128:$src),
4353 let Constraints = "$src = $Rd";
4356 // The followings are for instruction class (3V Elem)
4360 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
4361 string asmop, string ResS, string OpS, string EleOpS,
4362 Operand OpImm, RegisterOperand ResVPR,
4363 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4364 : NeonI_2VElem<q, u, size, opcode,
4365 (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
4366 EleOpVPR:$Re, OpImm:$Index),
4367 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4368 ", $Re." # EleOpS # "[$Index]",
4374 let Constraints = "$src = $Rd";
4377 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop>
4379 // vector register class for element is always 128-bit to cover the max index
4380 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4381 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4382 let Inst{11} = {Index{1}};
4383 let Inst{21} = {Index{0}};
4384 let Inst{20-16} = Re;
4387 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4388 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4389 let Inst{11} = {Index{1}};
4390 let Inst{21} = {Index{0}};
4391 let Inst{20-16} = Re;
4394 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4395 def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4396 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4397 let Inst{11} = {Index{2}};
4398 let Inst{21} = {Index{1}};
4399 let Inst{20} = {Index{0}};
4400 let Inst{19-16} = Re{3-0};
4403 def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4404 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4405 let Inst{11} = {Index{2}};
4406 let Inst{21} = {Index{1}};
4407 let Inst{20} = {Index{0}};
4408 let Inst{19-16} = Re{3-0};
4412 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
4413 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
4415 // Pattern for lane in 128-bit vector
4416 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4417 RegisterOperand ResVPR, RegisterOperand OpVPR,
4418 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4419 ValueType EleOpTy, SDPatternOperator coreop>
4420 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4421 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4422 (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4424 // Pattern for lane in 64-bit vector
4425 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4426 RegisterOperand ResVPR, RegisterOperand OpVPR,
4427 RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
4428 ValueType EleOpTy, SDPatternOperator coreop>
4429 : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
4430 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4431 (INST ResVPR:$src, OpVPR:$Rn,
4432 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4434 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
4436 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4437 op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32,
4438 BinOpFrag<(Neon_vduplane
4439 (Neon_low4S node:$LHS), node:$RHS)>>;
4441 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4442 op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32,
4443 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4445 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4446 op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4447 BinOpFrag<(Neon_vduplane
4448 (Neon_low8H node:$LHS), node:$RHS)>>;
4450 def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4451 op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4452 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4454 // Index can only be half of the max value for lane in 64-bit vector
4456 def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4457 op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32,
4458 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4460 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4461 op, VPR128, VPR128, VPR64, v4i32, v4i32, v2i32,
4462 BinOpFrag<(Neon_vduplane
4463 (Neon_combine_4S node:$LHS, undef),
4466 def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4467 op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4468 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4470 def : NI_2VE_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4471 op, VPR128, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4472 BinOpFrag<(Neon_vduplane
4473 (Neon_combine_8H node:$LHS, undef),
4477 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
4478 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
4480 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
4481 string asmop, string ResS, string OpS, string EleOpS,
4482 Operand OpImm, RegisterOperand ResVPR,
4483 RegisterOperand OpVPR, RegisterOperand EleOpVPR>
4484 : NeonI_2VElem<q, u, size, opcode,
4485 (outs ResVPR:$Rd), (ins OpVPR:$Rn,
4486 EleOpVPR:$Re, OpImm:$Index),
4487 asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
4488 ", $Re." # EleOpS # "[$Index]",
4495 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop>
4497 // vector register class for element is always 128-bit to cover the max index
4498 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4499 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4500 let Inst{11} = {Index{1}};
4501 let Inst{21} = {Index{0}};
4502 let Inst{20-16} = Re;
4505 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4506 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4507 let Inst{11} = {Index{1}};
4508 let Inst{21} = {Index{0}};
4509 let Inst{20-16} = Re;
4512 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4513 def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
4514 neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
4515 let Inst{11} = {Index{2}};
4516 let Inst{21} = {Index{1}};
4517 let Inst{20} = {Index{0}};
4518 let Inst{19-16} = Re{3-0};
4521 def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
4522 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4523 let Inst{11} = {Index{2}};
4524 let Inst{21} = {Index{1}};
4525 let Inst{20} = {Index{0}};
4526 let Inst{19-16} = Re{3-0};
4530 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
4531 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
4532 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
4534 // Pattern for lane in 128-bit vector
4535 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4536 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4537 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4538 SDPatternOperator coreop>
4539 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4540 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4541 (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4543 // Pattern for lane in 64-bit vector
4544 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4545 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4546 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4547 SDPatternOperator coreop>
4548 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4549 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4551 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4553 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op>
4555 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4556 op, VPR64, VPR128, v2i32, v2i32, v4i32,
4557 BinOpFrag<(Neon_vduplane
4558 (Neon_low4S node:$LHS), node:$RHS)>>;
4560 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4561 op, VPR128, VPR128, v4i32, v4i32, v4i32,
4562 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4564 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
4565 op, VPR64, VPR128Lo, v4i16, v4i16, v8i16,
4566 BinOpFrag<(Neon_vduplane
4567 (Neon_low8H node:$LHS), node:$RHS)>>;
4569 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
4570 op, VPR128, VPR128Lo, v8i16, v8i16, v8i16,
4571 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4573 // Index can only be half of the max value for lane in 64-bit vector
4575 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4576 op, VPR64, VPR64, v2i32, v2i32, v2i32,
4577 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4579 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4580 op, VPR128, VPR64, v4i32, v4i32, v2i32,
4581 BinOpFrag<(Neon_vduplane
4582 (Neon_combine_4S node:$LHS, undef),
4585 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
4586 op, VPR64, VPR64Lo, v4i16, v4i16, v4i16,
4587 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4589 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_8h8h"), neon_uimm2_bare,
4590 op, VPR128, VPR64Lo, v8i16, v8i16, v4i16,
4591 BinOpFrag<(Neon_vduplane
4592 (Neon_combine_8H node:$LHS, undef),
4596 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
4597 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
4598 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
4602 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop>
4604 // vector register class for element is always 128-bit to cover the max index
4605 def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4606 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4607 let Inst{11} = {Index{1}};
4608 let Inst{21} = {Index{0}};
4609 let Inst{20-16} = Re;
4612 def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4613 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4614 let Inst{11} = {Index{1}};
4615 let Inst{21} = {Index{0}};
4616 let Inst{20-16} = Re;
4619 // _1d2d doesn't exist!
4621 def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4622 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4623 let Inst{11} = {Index{0}};
4625 let Inst{20-16} = Re;
4629 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
4630 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
4632 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
4633 RegisterOperand OpVPR, RegisterOperand EleOpVPR,
4634 ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
4635 SDPatternOperator coreop>
4636 : Pat<(ResTy (op (OpTy OpVPR:$Rn),
4637 (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
4639 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
4641 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op>
4643 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
4644 op, VPR64, VPR128, v2f32, v2f32, v4f32,
4645 BinOpFrag<(Neon_vduplane
4646 (Neon_low4f node:$LHS), node:$RHS)>>;
4648 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
4649 op, VPR128, VPR128, v4f32, v4f32, v4f32,
4650 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4652 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4653 op, VPR128, VPR128, v2f64, v2f64, v2f64,
4654 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4656 // Index can only be half of the max value for lane in 64-bit vector
4658 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
4659 op, VPR64, VPR64, v2f32, v2f32, v2f32,
4660 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4662 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4s"), neon_uimm1_bare,
4663 op, VPR128, VPR64, v4f32, v4f32, v2f32,
4664 BinOpFrag<(Neon_vduplane
4665 (Neon_combine_4f node:$LHS, undef),
4668 def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
4669 op, VPR128, VPR64, v2f64, v2f64, v1f64,
4670 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4673 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
4674 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
4676 // The followings are patterns using fma
4677 // -ffp-contract=fast generates fma
4679 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop>
4681 // vector register class for element is always 128-bit to cover the max index
4682 def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
4683 neon_uimm2_bare, VPR64, VPR64, VPR128> {
4684 let Inst{11} = {Index{1}};
4685 let Inst{21} = {Index{0}};
4686 let Inst{20-16} = Re;
4689 def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
4690 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4691 let Inst{11} = {Index{1}};
4692 let Inst{21} = {Index{0}};
4693 let Inst{20-16} = Re;
4696 // _1d2d doesn't exist!
4698 def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
4699 neon_uimm1_bare, VPR128, VPR128, VPR128> {
4700 let Inst{11} = {Index{0}};
4702 let Inst{20-16} = Re;
4706 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
4707 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
4709 // Pattern for lane in 128-bit vector
4710 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4711 RegisterOperand ResVPR, RegisterOperand OpVPR,
4712 ValueType ResTy, ValueType OpTy,
4713 SDPatternOperator coreop>
4714 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4715 (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
4716 (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
4718 // Pattern for lane in 64-bit vector
4719 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4720 RegisterOperand ResVPR, RegisterOperand OpVPR,
4721 ValueType ResTy, ValueType OpTy,
4722 SDPatternOperator coreop>
4723 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
4724 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4725 (INST ResVPR:$src, ResVPR:$Rn,
4726 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
4728 // Pattern for lane in 64-bit vector
4729 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
4730 SDPatternOperator op,
4731 RegisterOperand ResVPR, RegisterOperand OpVPR,
4732 ValueType ResTy, ValueType OpTy,
4733 SDPatternOperator coreop>
4734 : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
4735 (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
4736 (INST ResVPR:$src, ResVPR:$Rn,
4737 (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
4740 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op>
4742 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4743 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4744 BinOpFrag<(Neon_vduplane
4745 (Neon_low4f node:$LHS), node:$RHS)>>;
4747 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4748 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4749 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4751 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4752 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4753 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4755 // Index can only be half of the max value for lane in 64-bit vector
4757 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4758 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4759 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4761 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4762 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4763 BinOpFrag<(Neon_vduplane
4764 (Neon_combine_4f node:$LHS, undef),
4767 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4768 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4769 BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
4772 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
4774 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
4776 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4777 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4778 BinOpFrag<(fneg (Neon_vduplane
4779 (Neon_low4f node:$LHS), node:$RHS))>>;
4781 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
4782 neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
4783 BinOpFrag<(Neon_vduplane
4784 (Neon_low4f (fneg node:$LHS)),
4787 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4788 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4789 BinOpFrag<(fneg (Neon_vduplane
4790 node:$LHS, node:$RHS))>>;
4792 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
4793 neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
4794 BinOpFrag<(Neon_vduplane
4795 (fneg node:$LHS), node:$RHS)>>;
4797 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4798 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4799 BinOpFrag<(fneg (Neon_vduplane
4800 node:$LHS, node:$RHS))>>;
4802 def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
4803 neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
4804 BinOpFrag<(Neon_vduplane
4805 (fneg node:$LHS), node:$RHS)>>;
4807 // Index can only be half of the max value for lane in 64-bit vector
4809 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4810 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4811 BinOpFrag<(fneg (Neon_vduplane
4812 node:$LHS, node:$RHS))>>;
4814 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
4815 neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
4816 BinOpFrag<(Neon_vduplane
4817 (fneg node:$LHS), node:$RHS)>>;
4819 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4820 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4821 BinOpFrag<(fneg (Neon_vduplane
4822 (Neon_combine_4f node:$LHS, undef),
4825 def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
4826 neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
4827 BinOpFrag<(Neon_vduplane
4828 (Neon_combine_4f (fneg node:$LHS), undef),
4831 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4832 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4833 BinOpFrag<(fneg (Neon_combine_2d
4834 node:$LHS, node:$RHS))>>;
4836 def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
4837 neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
4838 BinOpFrag<(Neon_combine_2d
4839 (fneg node:$LHS), (fneg node:$RHS))>>;
4842 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
4844 // Variant 3: Long type
4845 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
4846 // SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
4848 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop>
4850 // vector register class for element is always 128-bit to cover the max index
4851 def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4852 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4853 let Inst{11} = {Index{1}};
4854 let Inst{21} = {Index{0}};
4855 let Inst{20-16} = Re;
4858 def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4859 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4860 let Inst{11} = {Index{1}};
4861 let Inst{21} = {Index{0}};
4862 let Inst{20-16} = Re;
4865 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4866 def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4867 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4868 let Inst{11} = {Index{2}};
4869 let Inst{21} = {Index{1}};
4870 let Inst{20} = {Index{0}};
4871 let Inst{19-16} = Re{3-0};
4874 def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4875 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4876 let Inst{11} = {Index{2}};
4877 let Inst{21} = {Index{1}};
4878 let Inst{20} = {Index{0}};
4879 let Inst{19-16} = Re{3-0};
4883 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
4884 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
4885 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
4886 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
4887 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
4888 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
4890 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop>
4892 // vector register class for element is always 128-bit to cover the max index
4893 def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
4894 neon_uimm2_bare, VPR128, VPR64, VPR128> {
4895 let Inst{11} = {Index{1}};
4896 let Inst{21} = {Index{0}};
4897 let Inst{20-16} = Re;
4900 def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
4901 neon_uimm2_bare, VPR128, VPR128, VPR128> {
4902 let Inst{11} = {Index{1}};
4903 let Inst{21} = {Index{0}};
4904 let Inst{20-16} = Re;
4907 // Index operations on 16-bit(H) elements are restricted to using v0-v15.
4908 def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
4909 neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
4910 let Inst{11} = {Index{2}};
4911 let Inst{21} = {Index{1}};
4912 let Inst{20} = {Index{0}};
4913 let Inst{19-16} = Re{3-0};
4916 def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
4917 neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
4918 let Inst{11} = {Index{2}};
4919 let Inst{21} = {Index{1}};
4920 let Inst{20} = {Index{0}};
4921 let Inst{19-16} = Re{3-0};
4925 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
4926 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
4927 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
4929 // Pattern for lane in 128-bit vector
4930 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4931 RegisterOperand EleOpVPR, ValueType ResTy,
4932 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4933 SDPatternOperator hiop, SDPatternOperator coreop>
4934 : Pat<(ResTy (op (ResTy VPR128:$src),
4935 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4936 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4937 (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
4939 // Pattern for lane in 64-bit vector
4940 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
4941 RegisterOperand EleOpVPR, ValueType ResTy,
4942 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
4943 SDPatternOperator hiop, SDPatternOperator coreop>
4944 : Pat<(ResTy (op (ResTy VPR128:$src),
4945 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
4946 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
4947 (INST VPR128:$src, VPR128:$Rn,
4948 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
4950 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op>
4952 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
4953 op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
4954 BinOpFrag<(Neon_vduplane
4955 (Neon_low8H node:$LHS), node:$RHS)>>;
4957 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
4958 op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32,
4959 BinOpFrag<(Neon_vduplane
4960 (Neon_low4S node:$LHS), node:$RHS)>>;
4962 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
4963 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H,
4964 BinOpFrag<(Neon_vduplane
4965 (Neon_low8H node:$LHS), node:$RHS)>>;
4967 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
4968 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
4969 BinOpFrag<(Neon_vduplane
4970 (Neon_low4S node:$LHS), node:$RHS)>>;
4972 // Index can only be half of the max value for lane in 64-bit vector
4974 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
4975 op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
4976 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4978 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
4979 op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32,
4980 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4982 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
4983 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
4984 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4986 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
4987 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
4988 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
4991 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
4992 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
4993 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
4994 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
4996 // Pattern for lane in 128-bit vector
4997 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
4998 RegisterOperand EleOpVPR, ValueType ResTy,
4999 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5000 SDPatternOperator hiop, SDPatternOperator coreop>
5002 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5003 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5004 (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
5006 // Pattern for lane in 64-bit vector
5007 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
5008 RegisterOperand EleOpVPR, ValueType ResTy,
5009 ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
5010 SDPatternOperator hiop, SDPatternOperator coreop>
5012 (HalfOpTy (hiop (OpTy VPR128:$Rn))),
5013 (HalfOpTy (coreop (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
5015 (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
5017 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op>
5019 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5020 op, VPR64, VPR128Lo, v4i32, v4i16, v8i16,
5021 BinOpFrag<(Neon_vduplane
5022 (Neon_low8H node:$LHS), node:$RHS)>>;
5024 def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5025 op, VPR64, VPR128, v2i64, v2i32, v4i32,
5026 BinOpFrag<(Neon_vduplane
5027 (Neon_low4S node:$LHS), node:$RHS)>>;
5029 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5030 op, VPR128Lo, v4i32, v8i16, v8i16, v4i16,
5032 BinOpFrag<(Neon_vduplane
5033 (Neon_low8H node:$LHS), node:$RHS)>>;
5035 def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5036 op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5037 BinOpFrag<(Neon_vduplane
5038 (Neon_low4S node:$LHS), node:$RHS)>>;
5040 // Index can only be half of the max value for lane in 64-bit vector
5042 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5043 op, VPR64, VPR64Lo, v4i32, v4i16, v4i16,
5044 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5046 def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5047 op, VPR64, VPR64, v2i64, v2i32, v2i32,
5048 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5050 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5051 op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5052 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5054 def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5055 op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5056 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5059 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
5060 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
5061 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
5063 multiclass NI_qdma<SDPatternOperator op>
5065 def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5067 (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5069 def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
5071 (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
5074 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
5075 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
5077 multiclass NI_2VEL_v3_qdma_pat<string subop, string op>
5079 def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
5080 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
5081 v4i32, v4i16, v8i16,
5082 BinOpFrag<(Neon_vduplane
5083 (Neon_low8H node:$LHS), node:$RHS)>>;
5085 def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
5086 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
5087 v2i64, v2i32, v4i32,
5088 BinOpFrag<(Neon_vduplane
5089 (Neon_low4S node:$LHS), node:$RHS)>>;
5091 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
5092 !cast<PatFrag>(op # "_4s"), VPR128Lo,
5093 v4i32, v8i16, v8i16, v4i16, Neon_High8H,
5094 BinOpFrag<(Neon_vduplane
5095 (Neon_low8H node:$LHS), node:$RHS)>>;
5097 def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
5098 !cast<PatFrag>(op # "_2d"), VPR128,
5099 v2i64, v4i32, v4i32, v2i32, Neon_High4S,
5100 BinOpFrag<(Neon_vduplane
5101 (Neon_low4S node:$LHS), node:$RHS)>>;
5103 // Index can only be half of the max value for lane in 64-bit vector
5105 def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
5106 !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
5107 v4i32, v4i16, v4i16,
5108 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5110 def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
5111 !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
5112 v2i64, v2i32, v2i32,
5113 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5115 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
5116 !cast<PatFrag>(op # "_4s"), VPR64Lo,
5117 v4i32, v8i16, v4i16, v4i16, Neon_High8H,
5118 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5120 def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
5121 !cast<PatFrag>(op # "_2d"), VPR64,
5122 v2i64, v4i32, v2i32, v2i32, Neon_High4S,
5123 BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
5126 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
5127 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
5129 // End of implementation for instruction class (3V Elem)
5131 //Insert element (vector, from main)
5132 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
5134 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5136 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
5138 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5140 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
5142 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5144 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
5146 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5149 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
5150 RegisterClass OpGPR, ValueType OpTy,
5151 Operand OpImm, Instruction INS>
5152 : Pat<(ResTy (vector_insert
5156 (ResTy (EXTRACT_SUBREG
5157 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5158 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
5160 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
5161 neon_uimm3_bare, INSbw>;
5162 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
5163 neon_uimm2_bare, INShw>;
5164 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
5165 neon_uimm1_bare, INSsw>;
5166 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
5167 neon_uimm0_bare, INSdx>;
5169 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
5170 Operand ResImm, ValueType MidTy>
5171 : NeonI_insert<0b1, 0b1,
5172 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
5173 ResImm:$Immd, ResImm:$Immn),
5174 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
5175 [(set (ResTy VPR128:$Rd),
5176 (ResTy (vector_insert
5177 (ResTy VPR128:$src),
5178 (MidTy (vector_extract
5183 let Constraints = "$src = $Rd";
5188 //Insert element (vector, from element)
5189 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
5190 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
5191 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
5193 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
5194 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
5195 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
5196 // bit 11 is unspecified.
5198 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
5199 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
5200 let Inst{14-13} = {Immn{1}, Immn{0}};
5201 // bits 11-12 are unspecified.
5203 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
5204 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
5205 let Inst{14} = Immn{0};
5206 // bits 11-13 are unspecified.
5209 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
5211 RegisterClass OpFPR, Operand ResImm,
5212 SubRegIndex SubIndex, Instruction INS> {
5213 def : Pat<(ResTy (vector_insert
5214 (ResTy VPR128:$src),
5215 (MidTy (vector_extract
5219 (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
5220 ResImm:$Immd, ResImm:$Immn)>;
5222 def : Pat <(ResTy (vector_insert
5223 (ResTy VPR128:$src),
5226 (INS (ResTy VPR128:$src),
5227 (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
5231 def : Pat <(NaTy (vector_insert
5235 (NaTy (EXTRACT_SUBREG
5237 (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
5238 (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
5244 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
5246 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
5249 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
5250 ValueType MidTy, ValueType StTy,
5251 Operand StImm, Instruction INS> {
5252 def : Pat<(NaTy (vector_insert
5254 (MidTy (vector_extract
5258 (NaTy (EXTRACT_SUBREG
5260 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5266 def : Pat<(StTy (vector_insert
5268 (MidTy (vector_extract
5274 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5278 def : Pat<(NaTy (vector_insert
5280 (MidTy (vector_extract
5284 (NaTy (EXTRACT_SUBREG
5286 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
5287 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5293 defm : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
5294 v16i8, neon_uimm4_bare, INSELb>;
5295 defm : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
5296 v8i16, neon_uimm3_bare, INSELh>;
5297 defm : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
5298 v4i32, neon_uimm2_bare, INSELs>;
5299 defm : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
5300 v2i64, neon_uimm1_bare, INSELd>;
5303 class NeonI_SMOV<string asmop, string Res, bit Q,
5304 ValueType OpTy, ValueType eleTy,
5305 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
5306 : NeonI_copy<Q, 0b0, 0b0101,
5307 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5308 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5309 [(set (ResTy ResGPR:$Rd),
5311 (ResTy (vector_extract
5312 (OpTy VPR128:$Rn), (OpImm:$Imm))),
5318 //Signed integer move (main, from element)
5319 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
5321 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5323 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
5325 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5327 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
5329 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5331 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
5333 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5335 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
5337 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5340 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
5341 ValueType eleTy, Operand StImm, Operand NaImm,
5342 Instruction SMOVI> {
5343 def : Pat<(i64 (sext_inreg
5345 (i32 (vector_extract
5346 (StTy VPR128:$Rn), (StImm:$Imm))))),
5348 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5350 def : Pat<(i64 (sext
5351 (i32 (vector_extract
5352 (StTy VPR128:$Rn), (StImm:$Imm))))),
5353 (SMOVI VPR128:$Rn, StImm:$Imm)>;
5355 def : Pat<(i64 (sext_inreg
5356 (i64 (vector_extract
5357 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5359 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5362 def : Pat<(i64 (sext_inreg
5364 (i32 (vector_extract
5365 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5367 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5370 def : Pat<(i64 (sext
5371 (i32 (vector_extract
5372 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
5373 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5377 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5378 neon_uimm3_bare, SMOVxb>;
5379 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5380 neon_uimm2_bare, SMOVxh>;
5381 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5382 neon_uimm1_bare, SMOVxs>;
5384 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
5385 ValueType eleTy, Operand StImm, Operand NaImm,
5387 : Pat<(i32 (sext_inreg
5388 (i32 (vector_extract
5389 (NaTy VPR64:$Rn), (NaImm:$Imm))),
5391 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5394 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
5395 neon_uimm3_bare, SMOVwb>;
5396 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
5397 neon_uimm2_bare, SMOVwh>;
5399 class NeonI_UMOV<string asmop, string Res, bit Q,
5400 ValueType OpTy, Operand OpImm,
5401 RegisterClass ResGPR, ValueType ResTy>
5402 : NeonI_copy<Q, 0b0, 0b0111,
5403 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
5404 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
5405 [(set (ResTy ResGPR:$Rd),
5406 (ResTy (vector_extract
5407 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
5412 //Unsigned integer move (main, from element)
5413 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
5415 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5417 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
5419 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5421 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
5423 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5425 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
5427 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5430 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
5431 Operand StImm, Operand NaImm,
5433 : Pat<(ResTy (vector_extract
5434 (NaTy VPR64:$Rn), NaImm:$Imm)),
5435 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5438 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
5439 neon_uimm3_bare, UMOVwb>;
5440 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
5441 neon_uimm2_bare, UMOVwh>;
5442 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
5443 neon_uimm1_bare, UMOVws>;
5446 (i32 (vector_extract
5447 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
5449 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
5452 (i32 (vector_extract
5453 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
5455 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
5457 def : Pat<(i64 (zext
5458 (i32 (vector_extract
5459 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
5460 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
5463 (i32 (vector_extract
5464 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
5466 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5467 neon_uimm3_bare:$Imm)>;
5470 (i32 (vector_extract
5471 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
5473 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5474 neon_uimm2_bare:$Imm)>;
5476 def : Pat<(i64 (zext
5477 (i32 (vector_extract
5478 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
5479 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
5480 neon_uimm0_bare:$Imm)>;
5482 // Additional copy patterns for scalar types
5483 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
5485 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
5487 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
5489 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
5491 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
5492 (FMOVws FPR32:$Rn)>;
5494 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
5495 (FMOVxd FPR64:$Rn)>;
5497 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
5500 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
5503 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
5504 (v1i8 (EXTRACT_SUBREG (v16i8
5505 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
5508 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
5509 (v1i16 (EXTRACT_SUBREG (v8i16
5510 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
5513 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
5516 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
5519 def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
5521 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
5524 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
5527 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
5528 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
5529 (f64 FPR64:$src), sub_64)>;
5531 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
5532 RegisterOperand ResVPR, ValueType ResTy,
5533 ValueType OpTy, Operand OpImm>
5534 : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
5535 (ins VPR128:$Rn, OpImm:$Imm),
5536 asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
5542 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, v16i8, v16i8,
5544 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5547 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, v8i16, v8i16,
5549 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5552 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, v4i32, v4i32,
5554 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5557 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, v2i64, v2i64,
5559 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5562 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, v8i8, v16i8,
5564 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5567 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, v4i16, v8i16,
5569 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5572 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, v2i32, v4i32,
5574 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5577 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
5578 ValueType OpTy,ValueType NaTy,
5579 ValueType ExTy, Operand OpLImm,
5581 def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
5582 (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
5584 def : Pat<(ResTy (Neon_vduplane
5585 (NaTy VPR64:$Rn), OpNImm:$Imm)),
5587 (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
5589 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
5590 neon_uimm4_bare, neon_uimm3_bare>;
5591 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
5592 neon_uimm4_bare, neon_uimm3_bare>;
5593 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
5594 neon_uimm3_bare, neon_uimm2_bare>;
5595 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
5596 neon_uimm3_bare, neon_uimm2_bare>;
5597 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
5598 neon_uimm2_bare, neon_uimm1_bare>;
5599 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
5600 neon_uimm2_bare, neon_uimm1_bare>;
5601 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
5602 neon_uimm1_bare, neon_uimm0_bare>;
5603 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
5604 neon_uimm2_bare, neon_uimm1_bare>;
5605 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
5606 neon_uimm2_bare, neon_uimm1_bare>;
5607 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
5608 neon_uimm1_bare, neon_uimm0_bare>;
5610 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
5612 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5614 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
5616 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
5618 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
5620 (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
5623 class NeonI_DUP<bit Q, string asmop, string rdlane,
5624 RegisterOperand ResVPR, ValueType ResTy,
5625 RegisterClass OpGPR, ValueType OpTy>
5626 : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
5627 asmop # "\t$Rd" # rdlane # ", $Rn",
5628 [(set (ResTy ResVPR:$Rd),
5629 (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
5632 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
5634 // bits 17-19 are unspecified.
5637 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
5638 let Inst{17-16} = 0b10;
5639 // bits 18-19 are unspecified.
5642 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
5643 let Inst{18-16} = 0b100;
5644 // bit 19 is unspecified.
5647 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
5648 let Inst{19-16} = 0b1000;
5651 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
5653 // bits 17-19 are unspecified.
5656 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
5657 let Inst{17-16} = 0b10;
5658 // bits 18-19 are unspecified.
5661 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
5662 let Inst{18-16} = 0b100;
5663 // bit 19 is unspecified.
5666 // patterns for CONCAT_VECTORS
5667 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
5668 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
5669 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
5670 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
5672 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5673 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
5676 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
5678 (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5682 defm : Concat_Vector_Pattern<v16i8, v8i8>;
5683 defm : Concat_Vector_Pattern<v8i16, v4i16>;
5684 defm : Concat_Vector_Pattern<v4i32, v2i32>;
5685 defm : Concat_Vector_Pattern<v2i64, v1i64>;
5686 defm : Concat_Vector_Pattern<v4f32, v2f32>;
5687 defm : Concat_Vector_Pattern<v2f64, v1f64>;
5689 //patterns for EXTRACT_SUBVECTOR
5690 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
5691 (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5692 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
5693 (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5694 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
5695 (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5696 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
5697 (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5698 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
5699 (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
5700 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
5701 (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;