1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1,
45 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
47 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
49 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
50 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
53 //===----------------------------------------------------------------------===//
55 //===----------------------------------------------------------------------===//
57 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
58 string asmop, SDPatternOperator opnode8B,
59 SDPatternOperator opnode16B,
62 let isCommutable = Commutable in {
63 def _8B : NeonI_3VSame<0b0, u, size, opcode,
64 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66 [(set (v8i8 VPR64:$Rd),
67 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
70 def _16B : NeonI_3VSame<0b1, u, size, opcode,
71 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73 [(set (v16i8 VPR128:$Rd),
74 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81 string asmop, SDPatternOperator opnode,
84 let isCommutable = Commutable in {
85 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
86 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
87 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
88 [(set (v4i16 VPR64:$Rd),
89 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
92 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
93 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
94 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
95 [(set (v8i16 VPR128:$Rd),
96 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
99 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
100 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
101 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
102 [(set (v2i32 VPR64:$Rd),
103 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
106 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
107 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
108 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
109 [(set (v4i32 VPR128:$Rd),
110 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
114 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
115 string asmop, SDPatternOperator opnode,
117 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
119 let isCommutable = Commutable in {
120 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
121 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123 [(set (v8i8 VPR64:$Rd),
124 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
127 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
128 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130 [(set (v16i8 VPR128:$Rd),
131 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
136 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
137 string asmop, SDPatternOperator opnode,
139 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
141 let isCommutable = Commutable in {
142 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
143 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
144 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
145 [(set (v2i64 VPR128:$Rd),
146 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
151 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
152 // but Result types can be integer or floating point types.
153 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
154 string asmop, SDPatternOperator opnode2S,
155 SDPatternOperator opnode4S,
156 SDPatternOperator opnode2D,
157 ValueType ResTy2S, ValueType ResTy4S,
158 ValueType ResTy2D, bit Commutable = 0>
160 let isCommutable = Commutable in {
161 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
162 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
163 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
164 [(set (ResTy2S VPR64:$Rd),
165 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
168 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
169 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
170 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
171 [(set (ResTy4S VPR128:$Rd),
172 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
175 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
176 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
177 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
178 [(set (ResTy2D VPR128:$Rd),
179 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
184 //===----------------------------------------------------------------------===//
185 // Instruction Definitions
186 //===----------------------------------------------------------------------===//
188 // Vector Arithmetic Instructions
190 // Vector Add (Integer and Floating-Point)
192 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
193 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
194 v2f32, v4f32, v2f64, 1>;
196 // Vector Sub (Integer and Floating-Point)
198 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
199 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
200 v2f32, v4f32, v2f64, 0>;
202 // Vector Multiply (Integer and Floating-Point)
204 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
205 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
206 v2f32, v4f32, v2f64, 1>;
208 // Vector Multiply (Polynomial)
210 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
211 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
213 // Vector Multiply-accumulate and Multiply-subtract (Integer)
215 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
216 // two operands constraints.
217 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
218 RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
219 bits<5> opcode, SDPatternOperator opnode>
220 : NeonI_3VSame<q, u, size, opcode,
221 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
222 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
223 [(set (OpTy VPRC:$Rd),
224 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
226 let Constraints = "$src = $Rd";
229 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
230 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
232 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
236 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
237 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
238 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
239 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
240 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
241 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
242 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
243 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
244 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
245 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
246 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
247 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
249 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
250 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
251 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
252 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
253 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
254 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
255 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
256 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
257 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
258 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
259 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
260 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
262 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
264 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
265 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
267 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
270 let Predicates = [HasNEON, UseFusedMAC] in {
271 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
272 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
273 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
274 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
275 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
276 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
278 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
279 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
280 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
281 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
282 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
283 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
286 // We're also allowed to match the fma instruction regardless of compile
288 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
289 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
290 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
291 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
292 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
293 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
296 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
297 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
298 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
299 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
300 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
302 // Vector Divide (Floating-Point)
304 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
305 v2f32, v4f32, v2f64, 0>;
307 // Vector Bitwise Operations
309 // Vector Bitwise AND
311 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
313 // Vector Bitwise Exclusive OR
315 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
319 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
321 // ORR disassembled as MOV if Vn==Vm
323 // Vector Move - register
324 // Alias for ORR if Vn=Vm.
325 // FIXME: This is actually the preferred syntax but TableGen can't deal with
326 // custom printing of aliases.
327 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
328 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
329 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
330 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
332 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
333 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
334 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
336 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
337 OpCmodeConstVal->getZExtValue(), EltBits);
338 return (EltBits == 8 && EltVal == 0xff);
342 def Neon_not8B : PatFrag<(ops node:$in),
343 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
344 def Neon_not16B : PatFrag<(ops node:$in),
345 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
347 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
348 (or node:$Rn, (Neon_not8B node:$Rm))>;
350 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
351 (or node:$Rn, (Neon_not16B node:$Rm))>;
353 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
354 (and node:$Rn, (Neon_not8B node:$Rm))>;
356 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
357 (and node:$Rn, (Neon_not16B node:$Rm))>;
360 // Vector Bitwise OR NOT - register
362 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
363 Neon_orn8B, Neon_orn16B, 0>;
365 // Vector Bitwise Bit Clear (AND NOT) - register
367 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
368 Neon_bic8B, Neon_bic16B, 0>;
370 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
371 SDPatternOperator opnode16B,
373 Instruction INST16B> {
374 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
375 (INST8B VPR64:$Rn, VPR64:$Rm)>;
376 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
377 (INST8B VPR64:$Rn, VPR64:$Rm)>;
378 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
379 (INST8B VPR64:$Rn, VPR64:$Rm)>;
380 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
381 (INST16B VPR128:$Rn, VPR128:$Rm)>;
382 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
383 (INST16B VPR128:$Rn, VPR128:$Rm)>;
384 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
385 (INST16B VPR128:$Rn, VPR128:$Rm)>;
388 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
389 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
390 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
391 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
392 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
393 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
395 // Vector Bitwise Select
396 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
397 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
399 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
400 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
402 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
404 Instruction INST16B> {
405 // Disassociate type from instruction definition
406 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
407 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
408 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
409 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
410 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
411 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
412 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
413 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
414 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
415 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
416 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
417 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
419 // Allow to match BSL instruction pattern with non-constant operand
420 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
421 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
422 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
423 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
424 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
425 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
426 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
427 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
428 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
429 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
430 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
431 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
432 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
433 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
434 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
435 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
436 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
437 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
438 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
439 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
440 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
441 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
442 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
443 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
445 // Allow to match llvm.arm.* intrinsics.
446 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
447 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
448 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
449 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
450 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
451 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
452 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
453 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
454 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
455 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
456 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
457 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
458 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
459 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
460 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
461 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
462 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
463 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
464 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
465 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
466 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
467 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
468 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
469 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
471 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
472 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
473 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
474 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
475 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
477 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
478 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
481 // Additional patterns for bitwise instruction BSL
482 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
484 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
485 (Neon_bsl node:$src, node:$Rn, node:$Rm),
486 [{ (void)N; return false; }]>;
488 // Vector Bitwise Insert if True
490 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
491 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
492 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
493 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
495 // Vector Bitwise Insert if False
497 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
498 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
499 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
500 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
502 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
504 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
505 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
506 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
507 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
509 // Vector Absolute Difference and Accumulate (Unsigned)
510 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
511 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
512 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
513 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
514 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
515 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
516 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
517 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
518 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
519 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
520 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
521 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
523 // Vector Absolute Difference and Accumulate (Signed)
524 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
525 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
526 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
527 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
528 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
529 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
530 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
531 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
532 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
533 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
534 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
535 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
538 // Vector Absolute Difference (Signed, Unsigned)
539 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
540 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
542 // Vector Absolute Difference (Floating Point)
543 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
544 int_arm_neon_vabds, int_arm_neon_vabds,
545 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
547 // Vector Reciprocal Step (Floating Point)
548 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
549 int_arm_neon_vrecps, int_arm_neon_vrecps,
551 v2f32, v4f32, v2f64, 0>;
553 // Vector Reciprocal Square Root Step (Floating Point)
554 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
555 int_arm_neon_vrsqrts,
556 int_arm_neon_vrsqrts,
557 int_arm_neon_vrsqrts,
558 v2f32, v4f32, v2f64, 0>;
560 // Vector Comparisons
562 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
563 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
564 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
565 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
566 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
567 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
568 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
569 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
570 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
571 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
573 // NeonI_compare_aliases class: swaps register operands to implement
574 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
575 class NeonI_compare_aliases<string asmop, string asmlane,
576 Instruction inst, RegisterOperand VPRC>
577 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
579 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
581 // Vector Comparisons (Integer)
583 // Vector Compare Mask Equal (Integer)
584 let isCommutable =1 in {
585 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
588 // Vector Compare Mask Higher or Same (Unsigned Integer)
589 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
591 // Vector Compare Mask Greater Than or Equal (Integer)
592 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
594 // Vector Compare Mask Higher (Unsigned Integer)
595 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
597 // Vector Compare Mask Greater Than (Integer)
598 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
600 // Vector Compare Mask Bitwise Test (Integer)
601 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
603 // Vector Compare Mask Less or Same (Unsigned Integer)
604 // CMLS is alias for CMHS with operands reversed.
605 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
606 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
607 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
608 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
609 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
610 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
611 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
613 // Vector Compare Mask Less Than or Equal (Integer)
614 // CMLE is alias for CMGE with operands reversed.
615 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
616 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
617 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
618 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
619 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
620 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
621 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
623 // Vector Compare Mask Lower (Unsigned Integer)
624 // CMLO is alias for CMHI with operands reversed.
625 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
626 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
627 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
628 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
629 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
630 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
631 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
633 // Vector Compare Mask Less Than (Integer)
634 // CMLT is alias for CMGT with operands reversed.
635 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
636 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
637 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
638 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
639 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
640 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
641 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
644 def neon_uimm0_asmoperand : AsmOperandClass
647 let PredicateMethod = "isUImm<0>";
648 let RenderMethod = "addImmOperands";
651 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
652 let ParserMatchClass = neon_uimm0_asmoperand;
653 let PrintMethod = "printNeonUImm0Operand";
657 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
659 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
660 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
661 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
662 [(set (v8i8 VPR64:$Rd),
663 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
666 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
667 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
668 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
669 [(set (v16i8 VPR128:$Rd),
670 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
673 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
674 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
675 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
676 [(set (v4i16 VPR64:$Rd),
677 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
680 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
681 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
682 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
683 [(set (v8i16 VPR128:$Rd),
684 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
687 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
688 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
689 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
690 [(set (v2i32 VPR64:$Rd),
691 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
694 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
695 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
696 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
697 [(set (v4i32 VPR128:$Rd),
698 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
701 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
702 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
703 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
704 [(set (v2i64 VPR128:$Rd),
705 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
709 // Vector Compare Mask Equal to Zero (Integer)
710 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
712 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
713 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
715 // Vector Compare Mask Greater Than Zero (Signed Integer)
716 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
718 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
719 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
721 // Vector Compare Mask Less Than Zero (Signed Integer)
722 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
724 // Vector Comparisons (Floating Point)
726 // Vector Compare Mask Equal (Floating Point)
727 let isCommutable =1 in {
728 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
729 Neon_cmeq, Neon_cmeq,
730 v2i32, v4i32, v2i64, 0>;
733 // Vector Compare Mask Greater Than Or Equal (Floating Point)
734 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
735 Neon_cmge, Neon_cmge,
736 v2i32, v4i32, v2i64, 0>;
738 // Vector Compare Mask Greater Than (Floating Point)
739 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
740 Neon_cmgt, Neon_cmgt,
741 v2i32, v4i32, v2i64, 0>;
743 // Vector Compare Mask Less Than Or Equal (Floating Point)
744 // FCMLE is alias for FCMGE with operands reversed.
745 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
746 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
747 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
749 // Vector Compare Mask Less Than (Floating Point)
750 // FCMLT is alias for FCMGT with operands reversed.
751 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
752 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
753 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
756 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
757 string asmop, CondCode CC>
759 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
760 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
761 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
762 [(set (v2i32 VPR64:$Rd),
763 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
766 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
767 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
768 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
769 [(set (v4i32 VPR128:$Rd),
770 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
773 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
774 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
775 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
776 [(set (v2i64 VPR128:$Rd),
777 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
781 // Vector Compare Mask Equal to Zero (Floating Point)
782 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
784 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
785 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
787 // Vector Compare Mask Greater Than Zero (Floating Point)
788 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
790 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
791 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
793 // Vector Compare Mask Less Than Zero (Floating Point)
794 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
796 // Vector Absolute Comparisons (Floating Point)
798 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
799 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
800 int_arm_neon_vacged, int_arm_neon_vacgeq,
801 int_aarch64_neon_vacgeq,
802 v2i32, v4i32, v2i64, 0>;
804 // Vector Absolute Compare Mask Greater Than (Floating Point)
805 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
806 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
807 int_aarch64_neon_vacgtq,
808 v2i32, v4i32, v2i64, 0>;
810 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
811 // FACLE is alias for FACGE with operands reversed.
812 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
813 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
814 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
816 // Vector Absolute Compare Mask Less Than (Floating Point)
817 // FACLT is alias for FACGT with operands reversed.
818 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
819 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
820 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
822 // Vector halving add (Integer Signed, Unsigned)
823 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
824 int_arm_neon_vhadds, 1>;
825 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
826 int_arm_neon_vhaddu, 1>;
828 // Vector halving sub (Integer Signed, Unsigned)
829 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
830 int_arm_neon_vhsubs, 0>;
831 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
832 int_arm_neon_vhsubu, 0>;
834 // Vector rouding halving add (Integer Signed, Unsigned)
835 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
836 int_arm_neon_vrhadds, 1>;
837 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
838 int_arm_neon_vrhaddu, 1>;
840 // Vector Saturating add (Integer Signed, Unsigned)
841 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
842 int_arm_neon_vqadds, 1>;
843 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
844 int_arm_neon_vqaddu, 1>;
846 // Vector Saturating sub (Integer Signed, Unsigned)
847 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
848 int_arm_neon_vqsubs, 1>;
849 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
850 int_arm_neon_vqsubu, 1>;
852 // Vector Shift Left (Signed and Unsigned Integer)
853 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
854 int_arm_neon_vshifts, 1>;
855 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
856 int_arm_neon_vshiftu, 1>;
858 // Vector Saturating Shift Left (Signed and Unsigned Integer)
859 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
860 int_arm_neon_vqshifts, 1>;
861 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
862 int_arm_neon_vqshiftu, 1>;
864 // Vector Rouding Shift Left (Signed and Unsigned Integer)
865 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
866 int_arm_neon_vrshifts, 1>;
867 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
868 int_arm_neon_vrshiftu, 1>;
870 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
871 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
872 int_arm_neon_vqrshifts, 1>;
873 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
874 int_arm_neon_vqrshiftu, 1>;
876 // Vector Maximum (Signed and Unsigned Integer)
877 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
878 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
880 // Vector Minimum (Signed and Unsigned Integer)
881 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
882 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
884 // Vector Maximum (Floating Point)
885 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
886 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
887 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
889 // Vector Minimum (Floating Point)
890 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
891 int_arm_neon_vmins, int_arm_neon_vmins,
892 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
894 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
895 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
896 int_aarch64_neon_vmaxnm,
897 int_aarch64_neon_vmaxnm,
898 int_aarch64_neon_vmaxnm,
899 v2f32, v4f32, v2f64, 1>;
901 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
902 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
903 int_aarch64_neon_vminnm,
904 int_aarch64_neon_vminnm,
905 int_aarch64_neon_vminnm,
906 v2f32, v4f32, v2f64, 1>;
908 // Vector Maximum Pairwise (Signed and Unsigned Integer)
909 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
910 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
912 // Vector Minimum Pairwise (Signed and Unsigned Integer)
913 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
914 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
916 // Vector Maximum Pairwise (Floating Point)
917 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
918 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
919 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
921 // Vector Minimum Pairwise (Floating Point)
922 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
923 int_arm_neon_vpmins, int_arm_neon_vpmins,
924 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
926 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
927 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
928 int_aarch64_neon_vpmaxnm,
929 int_aarch64_neon_vpmaxnm,
930 int_aarch64_neon_vpmaxnm,
931 v2f32, v4f32, v2f64, 1>;
933 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
934 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
935 int_aarch64_neon_vpminnm,
936 int_aarch64_neon_vpminnm,
937 int_aarch64_neon_vpminnm,
938 v2f32, v4f32, v2f64, 1>;
940 // Vector Addition Pairwise (Integer)
941 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
943 // Vector Addition Pairwise (Floating Point)
944 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
948 v2f32, v4f32, v2f64, 1>;
950 // Vector Saturating Doubling Multiply High
951 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
952 int_arm_neon_vqdmulh, 1>;
954 // Vector Saturating Rouding Doubling Multiply High
955 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
956 int_arm_neon_vqrdmulh, 1>;
958 // Vector Multiply Extended (Floating Point)
959 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
960 int_aarch64_neon_vmulx,
961 int_aarch64_neon_vmulx,
962 int_aarch64_neon_vmulx,
963 v2f32, v4f32, v2f64, 1>;
965 // Vector Immediate Instructions
967 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
969 def _asmoperand : AsmOperandClass
971 let Name = "NeonMovImmShift" # PREFIX;
972 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
973 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
977 // Definition of vector immediates shift operands
979 // The selectable use-cases extract the shift operation
980 // information from the OpCmode fields encoded in the immediate.
981 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
982 uint64_t OpCmode = N->getZExtValue();
984 unsigned ShiftOnesIn;
986 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
987 if (!HasShift) return SDValue();
988 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
991 // Vector immediates shift operands which accept LSL and MSL
992 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
993 // or 0, 8 (LSLH) or 8, 16 (MSL).
994 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
995 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
996 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
997 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
999 multiclass neon_mov_imm_shift_operands<string PREFIX,
1000 string HALF, string ISHALF, code pred>
1002 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1005 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1007 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1008 let ParserMatchClass =
1009 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1013 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1015 unsigned ShiftOnesIn;
1017 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1018 return (HasShift && !ShiftOnesIn);
1021 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1023 unsigned ShiftOnesIn;
1025 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1026 return (HasShift && ShiftOnesIn);
1029 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1031 unsigned ShiftOnesIn;
1033 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1034 return (HasShift && !ShiftOnesIn);
1037 def neon_uimm1_asmoperand : AsmOperandClass
1040 let PredicateMethod = "isUImm<1>";
1041 let RenderMethod = "addImmOperands";
1044 def neon_uimm2_asmoperand : AsmOperandClass
1047 let PredicateMethod = "isUImm<2>";
1048 let RenderMethod = "addImmOperands";
1051 def neon_uimm8_asmoperand : AsmOperandClass
1054 let PredicateMethod = "isUImm<8>";
1055 let RenderMethod = "addImmOperands";
1058 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1059 let ParserMatchClass = neon_uimm8_asmoperand;
1060 let PrintMethod = "printNeonUImm8Operand";
1063 def neon_uimm64_mask_asmoperand : AsmOperandClass
1065 let Name = "NeonUImm64Mask";
1066 let PredicateMethod = "isNeonUImm64Mask";
1067 let RenderMethod = "addNeonUImm64MaskOperands";
1070 // MCOperand for 64-bit bytemask with each byte having only the
1071 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1072 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1073 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1074 let PrintMethod = "printNeonUImm64MaskOperand";
1077 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1078 SDPatternOperator opnode>
1080 // shift zeros, per word
1081 def _2S : NeonI_1VModImm<0b0, op,
1083 (ins neon_uimm8:$Imm,
1084 neon_mov_imm_LSL_operand:$Simm),
1085 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1086 [(set (v2i32 VPR64:$Rd),
1087 (v2i32 (opnode (timm:$Imm),
1088 (neon_mov_imm_LSL_operand:$Simm))))],
1091 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1094 def _4S : NeonI_1VModImm<0b1, op,
1096 (ins neon_uimm8:$Imm,
1097 neon_mov_imm_LSL_operand:$Simm),
1098 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1099 [(set (v4i32 VPR128:$Rd),
1100 (v4i32 (opnode (timm:$Imm),
1101 (neon_mov_imm_LSL_operand:$Simm))))],
1104 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1107 // shift zeros, per halfword
1108 def _4H : NeonI_1VModImm<0b0, op,
1110 (ins neon_uimm8:$Imm,
1111 neon_mov_imm_LSLH_operand:$Simm),
1112 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1113 [(set (v4i16 VPR64:$Rd),
1114 (v4i16 (opnode (timm:$Imm),
1115 (neon_mov_imm_LSLH_operand:$Simm))))],
1118 let cmode = {0b1, 0b0, Simm, 0b0};
1121 def _8H : NeonI_1VModImm<0b1, op,
1123 (ins neon_uimm8:$Imm,
1124 neon_mov_imm_LSLH_operand:$Simm),
1125 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1126 [(set (v8i16 VPR128:$Rd),
1127 (v8i16 (opnode (timm:$Imm),
1128 (neon_mov_imm_LSLH_operand:$Simm))))],
1131 let cmode = {0b1, 0b0, Simm, 0b0};
1135 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1136 SDPatternOperator opnode,
1137 SDPatternOperator neonopnode>
1139 let Constraints = "$src = $Rd" in {
1140 // shift zeros, per word
1141 def _2S : NeonI_1VModImm<0b0, op,
1143 (ins VPR64:$src, neon_uimm8:$Imm,
1144 neon_mov_imm_LSL_operand:$Simm),
1145 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1146 [(set (v2i32 VPR64:$Rd),
1147 (v2i32 (opnode (v2i32 VPR64:$src),
1148 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1149 neon_mov_imm_LSL_operand:$Simm)))))))],
1152 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1155 def _4S : NeonI_1VModImm<0b1, op,
1157 (ins VPR128:$src, neon_uimm8:$Imm,
1158 neon_mov_imm_LSL_operand:$Simm),
1159 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1160 [(set (v4i32 VPR128:$Rd),
1161 (v4i32 (opnode (v4i32 VPR128:$src),
1162 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1163 neon_mov_imm_LSL_operand:$Simm)))))))],
1166 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1169 // shift zeros, per halfword
1170 def _4H : NeonI_1VModImm<0b0, op,
1172 (ins VPR64:$src, neon_uimm8:$Imm,
1173 neon_mov_imm_LSLH_operand:$Simm),
1174 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1175 [(set (v4i16 VPR64:$Rd),
1176 (v4i16 (opnode (v4i16 VPR64:$src),
1177 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1178 neon_mov_imm_LSL_operand:$Simm)))))))],
1181 let cmode = {0b1, 0b0, Simm, 0b1};
1184 def _8H : NeonI_1VModImm<0b1, op,
1186 (ins VPR128:$src, neon_uimm8:$Imm,
1187 neon_mov_imm_LSLH_operand:$Simm),
1188 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1189 [(set (v8i16 VPR128:$Rd),
1190 (v8i16 (opnode (v8i16 VPR128:$src),
1191 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1192 neon_mov_imm_LSL_operand:$Simm)))))))],
1195 let cmode = {0b1, 0b0, Simm, 0b1};
1200 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1201 SDPatternOperator opnode>
1203 // shift ones, per word
1204 def _2S : NeonI_1VModImm<0b0, op,
1206 (ins neon_uimm8:$Imm,
1207 neon_mov_imm_MSL_operand:$Simm),
1208 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1209 [(set (v2i32 VPR64:$Rd),
1210 (v2i32 (opnode (timm:$Imm),
1211 (neon_mov_imm_MSL_operand:$Simm))))],
1214 let cmode = {0b1, 0b1, 0b0, Simm};
1217 def _4S : NeonI_1VModImm<0b1, op,
1219 (ins neon_uimm8:$Imm,
1220 neon_mov_imm_MSL_operand:$Simm),
1221 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1222 [(set (v4i32 VPR128:$Rd),
1223 (v4i32 (opnode (timm:$Imm),
1224 (neon_mov_imm_MSL_operand:$Simm))))],
1227 let cmode = {0b1, 0b1, 0b0, Simm};
1231 // Vector Move Immediate Shifted
1232 let isReMaterializable = 1 in {
1233 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1236 // Vector Move Inverted Immediate Shifted
1237 let isReMaterializable = 1 in {
1238 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1241 // Vector Bitwise Bit Clear (AND NOT) - immediate
1242 let isReMaterializable = 1 in {
1243 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1247 // Vector Bitwise OR - immedidate
1249 let isReMaterializable = 1 in {
1250 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1254 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1255 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1256 // BIC immediate instructions selection requires additional patterns to
1257 // transform Neon_movi operands into BIC immediate operands
1259 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1260 uint64_t OpCmode = N->getZExtValue();
1262 unsigned ShiftOnesIn;
1263 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1264 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1265 // Transform encoded shift amount 0 to 1 and 1 to 0.
1266 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1269 def neon_mov_imm_LSLH_transform_operand
1272 unsigned ShiftOnesIn;
1274 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1275 return (HasShift && !ShiftOnesIn); }],
1276 neon_mov_imm_LSLH_transform_XFORM>;
1278 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1279 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1280 def : Pat<(v4i16 (and VPR64:$src,
1281 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1282 (BICvi_lsl_4H VPR64:$src, 0,
1283 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1285 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1286 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1287 def : Pat<(v8i16 (and VPR128:$src,
1288 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1289 (BICvi_lsl_8H VPR128:$src, 0,
1290 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1293 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1294 SDPatternOperator neonopnode,
1296 Instruction INST8H> {
1297 def : Pat<(v8i8 (opnode VPR64:$src,
1298 (bitconvert(v4i16 (neonopnode timm:$Imm,
1299 neon_mov_imm_LSLH_operand:$Simm))))),
1300 (INST4H VPR64:$src, neon_uimm8:$Imm,
1301 neon_mov_imm_LSLH_operand:$Simm)>;
1302 def : Pat<(v1i64 (opnode VPR64:$src,
1303 (bitconvert(v4i16 (neonopnode timm:$Imm,
1304 neon_mov_imm_LSLH_operand:$Simm))))),
1305 (INST4H VPR64:$src, neon_uimm8:$Imm,
1306 neon_mov_imm_LSLH_operand:$Simm)>;
1308 def : Pat<(v16i8 (opnode VPR128:$src,
1309 (bitconvert(v8i16 (neonopnode timm:$Imm,
1310 neon_mov_imm_LSLH_operand:$Simm))))),
1311 (INST8H VPR128:$src, neon_uimm8:$Imm,
1312 neon_mov_imm_LSLH_operand:$Simm)>;
1313 def : Pat<(v4i32 (opnode VPR128:$src,
1314 (bitconvert(v8i16 (neonopnode timm:$Imm,
1315 neon_mov_imm_LSLH_operand:$Simm))))),
1316 (INST8H VPR128:$src, neon_uimm8:$Imm,
1317 neon_mov_imm_LSLH_operand:$Simm)>;
1318 def : Pat<(v2i64 (opnode VPR128:$src,
1319 (bitconvert(v8i16 (neonopnode timm:$Imm,
1320 neon_mov_imm_LSLH_operand:$Simm))))),
1321 (INST8H VPR128:$src, neon_uimm8:$Imm,
1322 neon_mov_imm_LSLH_operand:$Simm)>;
1325 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1326 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1328 // Additional patterns for Vector Bitwise OR - immedidate
1329 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1332 // Vector Move Immediate Masked
1333 let isReMaterializable = 1 in {
1334 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1337 // Vector Move Inverted Immediate Masked
1338 let isReMaterializable = 1 in {
1339 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1342 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1343 Instruction inst, RegisterOperand VPRC>
1344 : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1345 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1347 // Aliases for Vector Move Immediate Shifted
1348 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1349 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1350 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1351 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1353 // Aliases for Vector Move Inverted Immediate Shifted
1354 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1355 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1356 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1357 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1359 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1360 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1361 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1362 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1363 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1365 // Aliases for Vector Bitwise OR - immedidate
1366 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1367 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1368 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1369 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1371 // Vector Move Immediate - per byte
1372 let isReMaterializable = 1 in {
1373 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1374 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1375 "movi\t$Rd.8b, $Imm",
1376 [(set (v8i8 VPR64:$Rd),
1377 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1382 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1383 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1384 "movi\t$Rd.16b, $Imm",
1385 [(set (v16i8 VPR128:$Rd),
1386 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1392 // Vector Move Immediate - bytemask, per double word
1393 let isReMaterializable = 1 in {
1394 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1395 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1396 "movi\t $Rd.2d, $Imm",
1397 [(set (v2i64 VPR128:$Rd),
1398 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1404 // Vector Move Immediate - bytemask, one doubleword
1406 let isReMaterializable = 1 in {
1407 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1408 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1410 [(set (f64 FPR64:$Rd),
1412 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1418 // Vector Floating Point Move Immediate
1420 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1421 Operand immOpType, bit q, bit op>
1422 : NeonI_1VModImm<q, op,
1423 (outs VPRC:$Rd), (ins immOpType:$Imm),
1424 "fmov\t$Rd" # asmlane # ", $Imm",
1425 [(set (OpTy VPRC:$Rd),
1426 (OpTy (Neon_fmovi (timm:$Imm))))],
1431 let isReMaterializable = 1 in {
1432 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1433 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1434 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1437 // Vector Shift (Immediate)
1438 // Immediate in [0, 63]
1439 def imm0_63 : Operand<i32> {
1440 let ParserMatchClass = uimm6_asmoperand;
1443 // Shift Right Immediate - A shift right immediate is encoded differently from
1444 // other shift immediates. The immh:immb field is encoded like so:
1447 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1448 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1449 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1450 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1451 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1452 let Name = "ShrImm" # OFFSET;
1453 let RenderMethod = "addImmOperands";
1454 let DiagnosticType = "ShrImm" # OFFSET;
1457 class shr_imm<string OFFSET> : Operand<i32> {
1458 let EncoderMethod = "getShiftRightImm" # OFFSET;
1459 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1460 let ParserMatchClass =
1461 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1464 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1465 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1466 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1467 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1469 def shr_imm8 : shr_imm<"8">;
1470 def shr_imm16 : shr_imm<"16">;
1471 def shr_imm32 : shr_imm<"32">;
1472 def shr_imm64 : shr_imm<"64">;
1474 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1475 RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1476 : NeonI_2VShiftImm<q, u, opcode,
1477 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1478 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1479 [(set (Ty VPRC:$Rd),
1480 (Ty (OpNode (Ty VPRC:$Rn),
1481 (Ty (Neon_dupImm (i32 imm:$Imm))))))],
1484 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1485 // 64-bit vector types.
1486 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1487 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1490 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1491 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1494 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1495 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1498 // 128-bit vector types.
1499 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1500 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1503 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1504 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1507 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1508 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1511 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1512 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1516 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1517 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1519 let Inst{22-19} = 0b0001;
1522 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1524 let Inst{22-20} = 0b001;
1527 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1529 let Inst{22-21} = 0b01;
1532 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1534 let Inst{22-19} = 0b0001;
1537 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1539 let Inst{22-20} = 0b001;
1542 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1544 let Inst{22-21} = 0b01;
1547 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1554 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1557 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1558 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1560 def Neon_top16B : PatFrag<(ops node:$in),
1561 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1562 def Neon_top8H : PatFrag<(ops node:$in),
1563 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1564 def Neon_top4S : PatFrag<(ops node:$in),
1565 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1567 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1568 string SrcT, ValueType DestTy, ValueType SrcTy,
1569 Operand ImmTy, SDPatternOperator ExtOp>
1570 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1571 (ins VPR64:$Rn, ImmTy:$Imm),
1572 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1573 [(set (DestTy VPR128:$Rd),
1575 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1576 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1579 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1580 string SrcT, ValueType DestTy, ValueType SrcTy,
1581 int StartIndex, Operand ImmTy,
1582 SDPatternOperator ExtOp, PatFrag getTop>
1583 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1584 (ins VPR128:$Rn, ImmTy:$Imm),
1585 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1586 [(set (DestTy VPR128:$Rd),
1589 (SrcTy (getTop VPR128:$Rn)))),
1590 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1593 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1595 // 64-bit vector types.
1596 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1598 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1601 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1603 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1606 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1608 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1611 // 128-bit vector types
1612 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1613 v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
1614 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1617 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1618 v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
1619 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1622 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1623 v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
1624 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1627 // Use other patterns to match when the immediate is 0.
1628 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1629 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1631 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1632 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1634 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1635 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1637 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))),
1638 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1640 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))),
1641 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1643 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))),
1644 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1648 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1649 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1651 // Rounding/Saturating shift
1652 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1653 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1654 SDPatternOperator OpNode>
1655 : NeonI_2VShiftImm<q, u, opcode,
1656 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1657 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1658 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1662 // shift right (vector by immediate)
1663 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1664 SDPatternOperator OpNode> {
1665 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1667 let Inst{22-19} = 0b0001;
1670 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1672 let Inst{22-20} = 0b001;
1675 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1677 let Inst{22-21} = 0b01;
1680 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1682 let Inst{22-19} = 0b0001;
1685 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1687 let Inst{22-20} = 0b001;
1690 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1692 let Inst{22-21} = 0b01;
1695 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1701 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1702 SDPatternOperator OpNode> {
1703 // 64-bit vector types.
1704 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1706 let Inst{22-19} = 0b0001;
1709 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1711 let Inst{22-20} = 0b001;
1714 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1716 let Inst{22-21} = 0b01;
1719 // 128-bit vector types.
1720 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1722 let Inst{22-19} = 0b0001;
1725 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1727 let Inst{22-20} = 0b001;
1730 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1732 let Inst{22-21} = 0b01;
1735 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1741 // Rounding shift right
1742 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1743 int_aarch64_neon_vsrshr>;
1744 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1745 int_aarch64_neon_vurshr>;
1747 // Saturating shift left unsigned
1748 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1750 // Saturating shift left
1751 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1752 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1754 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1755 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1757 : NeonI_2VShiftImm<q, u, opcode,
1758 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1759 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1760 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1761 (Ty (OpNode (Ty VPRC:$Rn),
1762 (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
1764 let Constraints = "$src = $Rd";
1767 // Shift Right accumulate
1768 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1769 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1771 let Inst{22-19} = 0b0001;
1774 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1776 let Inst{22-20} = 0b001;
1779 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1781 let Inst{22-21} = 0b01;
1784 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1786 let Inst{22-19} = 0b0001;
1789 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1791 let Inst{22-20} = 0b001;
1794 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1796 let Inst{22-21} = 0b01;
1799 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1805 // Shift right and accumulate
1806 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1807 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1809 // Rounding shift accumulate
1810 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1811 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1812 SDPatternOperator OpNode>
1813 : NeonI_2VShiftImm<q, u, opcode,
1814 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1815 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1816 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1817 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1819 let Constraints = "$src = $Rd";
1822 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1823 SDPatternOperator OpNode> {
1824 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1826 let Inst{22-19} = 0b0001;
1829 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1831 let Inst{22-20} = 0b001;
1834 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1836 let Inst{22-21} = 0b01;
1839 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1841 let Inst{22-19} = 0b0001;
1844 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1846 let Inst{22-20} = 0b001;
1849 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1851 let Inst{22-21} = 0b01;
1854 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1860 // Rounding shift right and accumulate
1861 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1862 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1864 // Shift insert by immediate
1865 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1866 RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1867 SDPatternOperator OpNode>
1868 : NeonI_2VShiftImm<q, u, opcode,
1869 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1870 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1871 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1874 let Constraints = "$src = $Rd";
1877 // shift left insert (vector by immediate)
1878 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1879 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1880 int_aarch64_neon_vsli> {
1881 let Inst{22-19} = 0b0001;
1884 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1885 int_aarch64_neon_vsli> {
1886 let Inst{22-20} = 0b001;
1889 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1890 int_aarch64_neon_vsli> {
1891 let Inst{22-21} = 0b01;
1894 // 128-bit vector types
1895 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1896 int_aarch64_neon_vsli> {
1897 let Inst{22-19} = 0b0001;
1900 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1901 int_aarch64_neon_vsli> {
1902 let Inst{22-20} = 0b001;
1905 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1906 int_aarch64_neon_vsli> {
1907 let Inst{22-21} = 0b01;
1910 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1911 int_aarch64_neon_vsli> {
1916 // shift right insert (vector by immediate)
1917 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1918 // 64-bit vector types.
1919 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1920 int_aarch64_neon_vsri> {
1921 let Inst{22-19} = 0b0001;
1924 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1925 int_aarch64_neon_vsri> {
1926 let Inst{22-20} = 0b001;
1929 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1930 int_aarch64_neon_vsri> {
1931 let Inst{22-21} = 0b01;
1934 // 128-bit vector types
1935 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1936 int_aarch64_neon_vsri> {
1937 let Inst{22-19} = 0b0001;
1940 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1941 int_aarch64_neon_vsri> {
1942 let Inst{22-20} = 0b001;
1945 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1946 int_aarch64_neon_vsri> {
1947 let Inst{22-21} = 0b01;
1950 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1951 int_aarch64_neon_vsri> {
1956 // Shift left and insert
1957 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1959 // Shift right and insert
1960 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1962 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1963 string SrcT, Operand ImmTy>
1964 : NeonI_2VShiftImm<q, u, opcode,
1965 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1966 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1969 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1970 string SrcT, Operand ImmTy>
1971 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1972 (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
1973 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1975 let Constraints = "$src = $Rd";
1978 // left long shift by immediate
1979 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
1980 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
1981 let Inst{22-19} = 0b0001;
1984 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
1985 let Inst{22-20} = 0b001;
1988 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
1989 let Inst{22-21} = 0b01;
1992 // Shift Narrow High
1993 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
1995 let Inst{22-19} = 0b0001;
1998 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2000 let Inst{22-20} = 0b001;
2003 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2005 let Inst{22-21} = 0b01;
2009 // Shift right narrow
2010 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2012 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2013 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2014 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2015 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2016 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2017 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2018 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2019 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2021 def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
2022 (v2i64 (concat_vectors (v1i64 node:$Rm),
2023 (v1i64 node:$Rn)))>;
2025 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2026 (v8i16 (srl (v8i16 node:$lhs),
2027 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2028 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2029 (v4i32 (srl (v4i32 node:$lhs),
2030 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2031 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2032 (v2i64 (srl (v2i64 node:$lhs),
2033 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2034 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2035 (v8i16 (sra (v8i16 node:$lhs),
2036 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2037 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2038 (v4i32 (sra (v4i32 node:$lhs),
2039 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2040 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2041 (v2i64 (sra (v2i64 node:$lhs),
2042 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2044 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2045 multiclass Neon_shiftNarrow_patterns<string shr> {
2046 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2048 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2049 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2051 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2052 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2054 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2056 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2057 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2058 VPR128:$Rn, imm:$Imm)))))),
2059 (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2060 VPR128:$Rn, imm:$Imm)>;
2061 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2062 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2063 VPR128:$Rn, imm:$Imm)))))),
2064 (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2065 VPR128:$Rn, imm:$Imm)>;
2066 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2067 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2068 VPR128:$Rn, imm:$Imm)))))),
2069 (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2070 VPR128:$Rn, imm:$Imm)>;
2073 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2074 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2075 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2076 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2077 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2078 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2079 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2081 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2082 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2083 (!cast<Instruction>(prefix # "_16B")
2084 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2085 VPR128:$Rn, imm:$Imm)>;
2086 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2087 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2088 (!cast<Instruction>(prefix # "_8H")
2089 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2090 VPR128:$Rn, imm:$Imm)>;
2091 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2092 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2093 (!cast<Instruction>(prefix # "_4S")
2094 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2095 VPR128:$Rn, imm:$Imm)>;
2098 defm : Neon_shiftNarrow_patterns<"lshr">;
2099 defm : Neon_shiftNarrow_patterns<"ashr">;
2101 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2102 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2103 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2104 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2105 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2106 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2107 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2109 // Convert fix-point and float-pointing
2110 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2111 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2112 Operand ImmTy, SDPatternOperator IntOp>
2113 : NeonI_2VShiftImm<q, u, opcode,
2114 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2115 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2116 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2120 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2121 SDPatternOperator IntOp> {
2122 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2124 let Inst{22-21} = 0b01;
2127 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2129 let Inst{22-21} = 0b01;
2132 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2138 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2139 SDPatternOperator IntOp> {
2140 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2142 let Inst{22-21} = 0b01;
2145 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2147 let Inst{22-21} = 0b01;
2150 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2156 // Convert fixed-point to floating-point
2157 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2158 int_arm_neon_vcvtfxs2fp>;
2159 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2160 int_arm_neon_vcvtfxu2fp>;
2162 // Convert floating-point to fixed-point
2163 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2164 int_arm_neon_vcvtfp2fxs>;
2165 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2166 int_arm_neon_vcvtfp2fxu>;
2168 multiclass Neon_sshll2_0<SDNode ext>
2170 def _v8i8 : PatFrag<(ops node:$Rn),
2171 (v8i16 (ext (v8i8 (Neon_top16B node:$Rn))))>;
2172 def _v4i16 : PatFrag<(ops node:$Rn),
2173 (v4i32 (ext (v4i16 (Neon_top8H node:$Rn))))>;
2174 def _v2i32 : PatFrag<(ops node:$Rn),
2175 (v2i64 (ext (v2i32 (Neon_top4S node:$Rn))))>;
2178 defm NI_sext_high : Neon_sshll2_0<sext>;
2179 defm NI_zext_high : Neon_sshll2_0<zext>;
2181 // The followings are for instruction class (3V Diff)
2183 // normal long/long2 pattern
2184 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2185 string asmop, string ResS, string OpS,
2186 SDPatternOperator opnode, SDPatternOperator ext,
2187 RegisterOperand OpVPR,
2188 ValueType ResTy, ValueType OpTy>
2189 : NeonI_3VDiff<q, u, size, opcode,
2190 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2191 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2192 [(set (ResTy VPR128:$Rd),
2193 (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2194 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2197 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2198 string asmop, SDPatternOperator opnode,
2201 let isCommutable = Commutable in {
2202 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2203 opnode, sext, VPR64, v8i16, v8i8>;
2204 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2205 opnode, sext, VPR64, v4i32, v4i16>;
2206 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2207 opnode, sext, VPR64, v2i64, v2i32>;
2211 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode,
2212 string asmop, SDPatternOperator opnode,
2215 let isCommutable = Commutable in {
2216 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2217 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2218 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2219 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2220 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2221 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2225 multiclass NeonI_3VDL_u<bit u, bits<4> opcode,
2226 string asmop, SDPatternOperator opnode,
2229 let isCommutable = Commutable in {
2230 def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2231 opnode, zext, VPR64, v8i16, v8i8>;
2232 def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2233 opnode, zext, VPR64, v4i32, v4i16>;
2234 def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2235 opnode, zext, VPR64, v2i64, v2i32>;
2239 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode,
2240 string asmop, SDPatternOperator opnode,
2243 let isCommutable = Commutable in {
2244 def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2245 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2246 def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2247 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2248 def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2249 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2253 defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2254 defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2256 defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2257 defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2259 defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2260 defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2262 defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2263 defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2265 // normal wide/wide2 pattern
2266 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2267 string asmop, string ResS, string OpS,
2268 SDPatternOperator opnode, SDPatternOperator ext,
2269 RegisterOperand OpVPR,
2270 ValueType ResTy, ValueType OpTy>
2271 : NeonI_3VDiff<q, u, size, opcode,
2272 (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2273 asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2274 [(set (ResTy VPR128:$Rd),
2275 (ResTy (opnode (ResTy VPR128:$Rn),
2276 (ResTy (ext (OpTy OpVPR:$Rm))))))],
2279 multiclass NeonI_3VDW_s<bit u, bits<4> opcode,
2280 string asmop, SDPatternOperator opnode>
2282 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2283 opnode, sext, VPR64, v8i16, v8i8>;
2284 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2285 opnode, sext, VPR64, v4i32, v4i16>;
2286 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2287 opnode, sext, VPR64, v2i64, v2i32>;
2290 defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2291 defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2293 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode,
2294 string asmop, SDPatternOperator opnode>
2296 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2297 opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2298 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2299 opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2300 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2301 opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2304 defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2305 defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2307 multiclass NeonI_3VDW_u<bit u, bits<4> opcode,
2308 string asmop, SDPatternOperator opnode>
2310 def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2311 opnode, zext, VPR64, v8i16, v8i8>;
2312 def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2313 opnode, zext, VPR64, v4i32, v4i16>;
2314 def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2315 opnode, zext, VPR64, v2i64, v2i32>;
2318 defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2319 defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2321 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode,
2322 string asmop, SDPatternOperator opnode>
2324 def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2325 opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2326 def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2327 opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2328 def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2329 opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2332 defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2333 defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2335 // Get the high half part of the vector element.
2336 multiclass NeonI_get_high
2338 def _8h : PatFrag<(ops node:$Rn),
2339 (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2340 (v8i16 (Neon_dupImm 8))))))>;
2341 def _4s : PatFrag<(ops node:$Rn),
2342 (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2343 (v4i32 (Neon_dupImm 16))))))>;
2344 def _2d : PatFrag<(ops node:$Rn),
2345 (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2346 (v2i64 (Neon_dupImm 32))))))>;
2349 defm NI_get_hi : NeonI_get_high;
2351 // pattern for addhn/subhn with 2 operands
2352 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2353 string asmop, string ResS, string OpS,
2354 SDPatternOperator opnode, SDPatternOperator get_hi,
2355 ValueType ResTy, ValueType OpTy>
2356 : NeonI_3VDiff<q, u, size, opcode,
2357 (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2358 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2359 [(set (ResTy VPR64:$Rd),
2361 (OpTy (opnode (OpTy VPR128:$Rn),
2362 (OpTy VPR128:$Rm))))))],
2365 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode,
2366 string asmop, SDPatternOperator opnode,
2369 let isCommutable = Commutable in {
2370 def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2371 opnode, NI_get_hi_8h, v8i8, v8i16>;
2372 def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2373 opnode, NI_get_hi_4s, v4i16, v4i32>;
2374 def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2375 opnode, NI_get_hi_2d, v2i32, v2i64>;
2379 defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2380 defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2382 // pattern for operation with 2 operands
2383 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2384 string asmop, string ResS, string OpS,
2385 SDPatternOperator opnode,
2386 RegisterOperand ResVPR, RegisterOperand OpVPR,
2387 ValueType ResTy, ValueType OpTy>
2388 : NeonI_3VDiff<q, u, size, opcode,
2389 (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2390 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2391 [(set (ResTy ResVPR:$Rd),
2392 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2395 // normal narrow pattern
2396 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
2397 string asmop, SDPatternOperator opnode,
2400 let isCommutable = Commutable in {
2401 def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2402 opnode, VPR64, VPR128, v8i8, v8i16>;
2403 def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2404 opnode, VPR64, VPR128, v4i16, v4i32>;
2405 def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2406 opnode, VPR64, VPR128, v2i32, v2i64>;
2410 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2411 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2413 // pattern for acle intrinsic with 3 operands
2414 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2415 string asmop, string ResS, string OpS>
2416 : NeonI_3VDiff<q, u, size, opcode,
2417 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2418 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2420 let Constraints = "$src = $Rd";
2421 let neverHasSideEffects = 1;
2424 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
2426 def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2427 def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2428 def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2431 defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2432 defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2434 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2435 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2437 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2439 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2440 SDPatternOperator coreop>
2441 : Pat<(Neon_combine (v1i64 VPR64:$src),
2442 (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2443 (SrcTy VPR128:$Rm)))))),
2444 (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2445 VPR128:$Rn, VPR128:$Rm)>;
2448 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
2449 BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2450 def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
2451 BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2452 def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
2453 BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2456 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
2457 BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2458 def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
2459 BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2460 def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
2461 BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2464 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
2465 def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
2466 def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
2469 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
2470 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
2471 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
2473 // pattern that need to extend result
2474 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2475 string asmop, string ResS, string OpS,
2476 SDPatternOperator opnode,
2477 RegisterOperand OpVPR,
2478 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2479 : NeonI_3VDiff<q, u, size, opcode,
2480 (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2481 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2482 [(set (ResTy VPR128:$Rd),
2483 (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2484 (OpTy OpVPR:$Rm))))))],
2487 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode,
2488 string asmop, SDPatternOperator opnode,
2491 let isCommutable = Commutable in {
2492 def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2493 opnode, VPR64, v8i16, v8i8, v8i8>;
2494 def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2495 opnode, VPR64, v4i32, v4i16, v4i16>;
2496 def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2497 opnode, VPR64, v2i64, v2i32, v2i32>;
2501 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2502 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2504 multiclass NeonI_Op_High<SDPatternOperator op>
2506 def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2507 (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>;
2508 def _8H : PatFrag<(ops node:$Rn, node:$Rm),
2509 (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>;
2510 def _4S : PatFrag<(ops node:$Rn, node:$Rm),
2511 (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>;
2515 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2516 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2517 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2518 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2519 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2520 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2522 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode,
2523 string asmop, string opnode,
2526 let isCommutable = Commutable in {
2527 def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2528 !cast<PatFrag>(opnode # "_16B"),
2529 VPR128, v8i16, v16i8, v8i8>;
2530 def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2531 !cast<PatFrag>(opnode # "_8H"),
2532 VPR128, v4i32, v8i16, v4i16>;
2533 def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2534 !cast<PatFrag>(opnode # "_4S"),
2535 VPR128, v2i64, v4i32, v2i32>;
2539 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2540 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2542 // For pattern that need two operators being chained.
2543 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2544 string asmop, string ResS, string OpS,
2545 SDPatternOperator opnode, SDPatternOperator subop,
2546 RegisterOperand OpVPR,
2547 ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2548 : NeonI_3VDiff<q, u, size, opcode,
2549 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2550 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2551 [(set (ResTy VPR128:$Rd),
2553 (ResTy VPR128:$src),
2554 (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2555 (OpTy OpVPR:$Rm))))))))],
2557 let Constraints = "$src = $Rd";
2560 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode,
2561 string asmop, SDPatternOperator opnode,
2562 SDPatternOperator subop>
2564 def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2565 opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2566 def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2567 opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2568 def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2569 opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2572 defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2573 add, int_arm_neon_vabds>;
2574 defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2575 add, int_arm_neon_vabdu>;
2577 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode,
2578 string asmop, SDPatternOperator opnode,
2581 def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2582 opnode, !cast<PatFrag>(subop # "_16B"),
2583 VPR128, v8i16, v16i8, v8i8>;
2584 def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2585 opnode, !cast<PatFrag>(subop # "_8H"),
2586 VPR128, v4i32, v8i16, v4i16>;
2587 def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2588 opnode, !cast<PatFrag>(subop # "_4S"),
2589 VPR128, v2i64, v4i32, v2i32>;
2592 defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2594 defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2597 // Long pattern with 2 operands
2598 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode,
2599 string asmop, SDPatternOperator opnode,
2602 let isCommutable = Commutable in {
2603 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2604 opnode, VPR128, VPR64, v8i16, v8i8>;
2605 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2606 opnode, VPR128, VPR64, v4i32, v4i16>;
2607 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2608 opnode, VPR128, VPR64, v2i64, v2i32>;
2612 defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2613 defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2615 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2616 string asmop, string ResS, string OpS,
2617 SDPatternOperator opnode,
2618 ValueType ResTy, ValueType OpTy>
2619 : NeonI_3VDiff<q, u, size, opcode,
2620 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2621 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2622 [(set (ResTy VPR128:$Rd),
2623 (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2627 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode,
2632 let isCommutable = Commutable in {
2633 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2634 !cast<PatFrag>(opnode # "_16B"),
2636 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637 !cast<PatFrag>(opnode # "_8H"),
2639 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2640 !cast<PatFrag>(opnode # "_4S"),
2645 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2647 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2650 // Long pattern with 3 operands
2651 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2652 string asmop, string ResS, string OpS,
2653 SDPatternOperator opnode,
2654 ValueType ResTy, ValueType OpTy>
2655 : NeonI_3VDiff<q, u, size, opcode,
2656 (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2657 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2658 [(set (ResTy VPR128:$Rd),
2660 (ResTy VPR128:$src),
2661 (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2663 let Constraints = "$src = $Rd";
2666 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode,
2667 string asmop, SDPatternOperator opnode>
2669 def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2670 opnode, v8i16, v8i8>;
2671 def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2672 opnode, v4i32, v4i16>;
2673 def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2674 opnode, v2i64, v2i32>;
2677 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2679 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2681 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2683 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2685 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2687 (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2689 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn),
2691 (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2693 defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2694 defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2696 defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2697 defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2699 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2700 string asmop, string ResS, string OpS,
2701 SDPatternOperator subop, SDPatternOperator opnode,
2702 RegisterOperand OpVPR,
2703 ValueType ResTy, ValueType OpTy>
2704 : NeonI_3VDiff<q, u, size, opcode,
2705 (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2706 asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2707 [(set (ResTy VPR128:$Rd),
2709 (ResTy VPR128:$src),
2710 (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
2712 let Constraints = "$src = $Rd";
2715 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode,
2717 SDPatternOperator subop,
2720 def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2721 subop, !cast<PatFrag>(opnode # "_16B"),
2722 VPR128, v8i16, v16i8>;
2723 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2724 subop, !cast<PatFrag>(opnode # "_8H"),
2725 VPR128, v4i32, v8i16>;
2726 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2727 subop, !cast<PatFrag>(opnode # "_4S"),
2728 VPR128, v2i64, v4i32>;
2731 defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
2732 add, "NI_smull_hi">;
2733 defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
2734 add, "NI_umull_hi">;
2736 defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
2737 sub, "NI_smull_hi">;
2738 defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
2739 sub, "NI_umull_hi">;
2741 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode,
2742 string asmop, SDPatternOperator opnode>
2744 def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2745 opnode, int_arm_neon_vqdmull,
2746 VPR64, v4i32, v4i16>;
2747 def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2748 opnode, int_arm_neon_vqdmull,
2749 VPR64, v2i64, v2i32>;
2752 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
2753 int_arm_neon_vqadds>;
2754 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
2755 int_arm_neon_vqsubs>;
2757 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode,
2758 string asmop, SDPatternOperator opnode,
2761 let isCommutable = Commutable in {
2762 def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2763 opnode, VPR128, VPR64, v4i32, v4i16>;
2764 def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2765 opnode, VPR128, VPR64, v2i64, v2i32>;
2769 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
2770 int_arm_neon_vqdmull, 1>;
2772 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode,
2777 let isCommutable = Commutable in {
2778 def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2779 !cast<PatFrag>(opnode # "_8H"),
2781 def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2782 !cast<PatFrag>(opnode # "_4S"),
2787 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
2790 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode,
2792 SDPatternOperator opnode>
2794 def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2795 opnode, NI_qdmull_hi_8H,
2796 VPR128, v4i32, v8i16>;
2797 def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2798 opnode, NI_qdmull_hi_4S,
2799 VPR128, v2i64, v4i32>;
2802 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
2803 int_arm_neon_vqadds>;
2804 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
2805 int_arm_neon_vqsubs>;
2807 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode,
2808 string asmop, SDPatternOperator opnode,
2811 let isCommutable = Commutable in {
2812 def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2813 opnode, VPR128, VPR64, v8i16, v8i8>;
2817 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
2819 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode,
2824 let isCommutable = Commutable in {
2825 def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2826 !cast<PatFrag>(opnode # "_16B"),
2831 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",
2834 // End of implementation for instruction class (3V Diff)
2836 // Scalar Arithmetic
2838 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
2839 : NeonI_Scalar3Same<u, 0b11, opcode,
2840 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2841 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2845 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
2846 string asmop, bit Commutable = 0>
2848 let isCommutable = Commutable in {
2849 def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
2850 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
2851 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2854 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
2855 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
2856 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2859 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
2860 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
2861 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2864 def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
2865 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2866 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2872 multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
2873 Instruction INSTD> {
2874 def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
2875 (INSTD FPR64:$Rn, FPR64:$Rm)>;
2878 multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
2879 Instruction INSTB, Instruction INSTH,
2880 Instruction INSTS, Instruction INSTD>
2881 : Neon_Scalar_D_size_patterns<opnode, INSTD> {
2882 def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
2883 (INSTB FPR8:$Rn, FPR8:$Rm)>;
2885 def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
2886 (INSTH FPR16:$Rn, FPR16:$Rm)>;
2888 def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
2889 (INSTS FPR32:$Rn, FPR32:$Rm)>;
2892 // Scalar Integer Add
2893 let isCommutable = 1 in {
2894 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
2897 // Scalar Integer Sub
2898 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
2900 // Pattern for Scalar Integer Add and Sub with D register only
2901 defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
2902 defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
2904 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
2905 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
2906 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
2907 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
2908 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
2910 // Scalar Integer Saturating Add (Signed, Unsigned)
2911 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
2912 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
2914 // Scalar Integer Saturating Sub (Signed, Unsigned)
2915 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
2916 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
2918 // Patterns to match llvm.arm.* intrinsic for
2919 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
2920 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
2921 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
2922 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
2923 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
2925 // Patterns to match llvm.aarch64.* intrinsic for
2926 // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
2927 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
2928 SQADDsss, SQADDddd>;
2929 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
2930 UQADDsss, UQADDddd>;
2931 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
2932 SQSUBsss, SQSUBddd>;
2933 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
2934 UQSUBsss, UQSUBddd>;
2936 // Scalar Integer Shift Left (Signed, Unsigned)
2937 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
2938 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
2940 // Patterns to match llvm.arm.* intrinsic for
2941 // Scalar Integer Shift Left (Signed, Unsigned)
2942 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
2943 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
2945 // Patterns to match llvm.aarch64.* intrinsic for
2946 // Scalar Integer Shift Left (Signed, Unsigned)
2947 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
2948 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
2950 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
2951 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
2952 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
2954 // Patterns to match llvm.aarch64.* intrinsic for
2955 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
2956 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
2957 SQSHLsss, SQSHLddd>;
2958 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
2959 UQSHLsss, UQSHLddd>;
2961 // Patterns to match llvm.arm.* intrinsic for
2962 // Scalar Integer Saturating Shift Letf (Signed, Unsigned)
2963 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
2964 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
2966 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2967 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
2968 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
2970 // Patterns to match llvm.aarch64.* intrinsic for
2971 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2972 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
2973 defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
2975 // Patterns to match llvm.arm.* intrinsic for
2976 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
2977 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
2978 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
2980 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2981 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
2982 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
2984 // Patterns to match llvm.aarch64.* intrinsic for
2985 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2986 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
2987 SQRSHLsss, SQRSHLddd>;
2988 defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
2989 UQRSHLsss, UQRSHLddd>;
2991 // Patterns to match llvm.arm.* intrinsic for
2992 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2993 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
2994 defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
2996 // Scalar Reduce Pairwise
2998 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
2999 string asmop, bit Commutable = 0> {
3000 let isCommutable = Commutable in {
3001 def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
3002 (outs FPR64:$Rd), (ins VPR128:$Rn),
3003 !strconcat(asmop, " $Rd, $Rn.2d"),
3009 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
3010 string asmop, bit Commutable = 0>
3011 : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
3012 let isCommutable = Commutable in {
3013 def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
3014 (outs FPR32:$Rd), (ins VPR64:$Rn),
3015 !strconcat(asmop, " $Rd, $Rn.2s"),
3021 // Scalar Reduce Addition Pairwise (Integer) with
3022 // Pattern to match llvm.arm.* intrinsic
3023 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
3025 // Pattern to match llvm.aarch64.* intrinsic for
3026 // Scalar Reduce Addition Pairwise (Integer)
3027 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
3028 (ADDPvv_D_2D VPR128:$Rn)>;
3030 // Scalar Reduce Addition Pairwise (Floating Point)
3031 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
3033 // Scalar Reduce Maximum Pairwise (Floating Point)
3034 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
3036 // Scalar Reduce Minimum Pairwise (Floating Point)
3037 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
3039 // Scalar Reduce maxNum Pairwise (Floating Point)
3040 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
3042 // Scalar Reduce minNum Pairwise (Floating Point)
3043 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
3045 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
3046 SDPatternOperator opnodeD,
3048 Instruction INSTD> {
3049 def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
3051 def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
3052 (INSTD VPR128:$Rn)>;
3055 // Patterns to match llvm.aarch64.* intrinsic for
3056 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
3057 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
3058 int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
3060 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
3061 int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
3063 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
3064 int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
3066 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
3067 int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
3069 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
3070 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
3074 //===----------------------------------------------------------------------===//
3075 // Non-Instruction Patterns
3076 //===----------------------------------------------------------------------===//
3078 // 64-bit vector bitcasts...
3080 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
3081 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
3082 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
3083 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
3085 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
3086 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
3087 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
3088 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
3090 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
3091 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
3092 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
3093 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
3095 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
3096 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
3097 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
3098 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
3100 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
3101 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3102 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3103 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3105 // ..and 128-bit vector bitcasts...
3107 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
3108 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
3109 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
3110 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
3111 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
3113 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
3114 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
3115 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
3116 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
3117 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
3119 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
3120 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
3121 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
3122 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
3123 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
3125 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
3126 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
3127 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
3128 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
3129 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
3131 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
3132 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
3133 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
3134 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
3135 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
3137 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
3138 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
3139 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
3140 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
3141 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
3144 // ...and scalar bitcasts...
3145 def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
3146 def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
3147 def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
3148 def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
3149 def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
3151 def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
3152 def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
3154 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
3155 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
3156 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
3158 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
3159 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
3160 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
3161 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
3162 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
3164 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
3165 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
3166 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
3167 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
3168 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
3169 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
3171 def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
3172 def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
3173 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3174 def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
3175 def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
3177 def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
3178 def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
3180 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
3181 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
3182 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
3183 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
3184 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
3186 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
3187 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
3188 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
3189 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
3190 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
3191 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
3193 def neon_uimm0_bare : Operand<i64>,
3194 ImmLeaf<i64, [{return Imm == 0;}]> {
3195 let ParserMatchClass = neon_uimm0_asmoperand;
3196 let PrintMethod = "printNeonUImm8OperandBare";
3199 def neon_uimm1_bare : Operand<i64>,
3200 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3201 let ParserMatchClass = neon_uimm1_asmoperand;
3202 let PrintMethod = "printNeonUImm8OperandBare";
3205 def neon_uimm2_bare : Operand<i64>,
3206 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3207 let ParserMatchClass = neon_uimm2_asmoperand;
3208 let PrintMethod = "printNeonUImm8OperandBare";
3211 def neon_uimm3_bare : Operand<i64>,
3212 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3213 let ParserMatchClass = uimm3_asmoperand;
3214 let PrintMethod = "printNeonUImm8OperandBare";
3217 def neon_uimm4_bare : Operand<i64>,
3218 ImmLeaf<i64, [{(void)Imm; return true;}]> {
3219 let ParserMatchClass = uimm4_asmoperand;
3220 let PrintMethod = "printNeonUImm8OperandBare";
3223 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
3224 RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
3225 : NeonI_copy<0b1, 0b0, 0b0011,
3226 (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
3227 asmop # "\t$Rd." # Res # "[$Imm], $Rn",
3228 [(set (ResTy VPR128:$Rd),
3229 (ResTy (vector_insert
3230 (ResTy VPR128:$src),
3235 let Constraints = "$src = $Rd";
3239 //Insert element (vector, from main)
3240 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
3242 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3244 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
3246 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3248 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
3250 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3252 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
3254 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
3257 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
3258 RegisterClass OpGPR, ValueType OpTy,
3259 Operand OpImm, Instruction INS>
3260 : Pat<(ResTy (vector_insert
3264 (ResTy (EXTRACT_SUBREG
3265 (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3266 OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
3268 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
3269 neon_uimm3_bare, INSbw>;
3270 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
3271 neon_uimm2_bare, INShw>;
3272 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
3273 neon_uimm1_bare, INSsw>;
3274 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
3275 neon_uimm0_bare, INSdx>;
3277 class NeonI_INS_element<string asmop, string Res, ValueType ResTy,
3278 Operand ResImm, ValueType MidTy>
3279 : NeonI_insert<0b1, 0b1,
3280 (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
3281 ResImm:$Immd, ResImm:$Immn),
3282 asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
3283 [(set (ResTy VPR128:$Rd),
3284 (ResTy (vector_insert
3285 (ResTy VPR128:$src),
3286 (MidTy (vector_extract
3291 let Constraints = "$src = $Rd";
3296 //Insert element (vector, from element)
3297 def INSELb : NeonI_INS_element<"ins", "b", v16i8, neon_uimm4_bare, i32> {
3298 let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
3299 let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
3301 def INSELh : NeonI_INS_element<"ins", "h", v8i16, neon_uimm3_bare, i32> {
3302 let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
3303 let Inst{14-12} = {Immn{2}, Immn{1}, Immn{0}};
3304 // bit 11 is unspecified.
3306 def INSELs : NeonI_INS_element<"ins", "s", v4i32, neon_uimm2_bare, i32> {
3307 let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
3308 let Inst{14-13} = {Immn{1}, Immn{0}};
3309 // bits 11-12 are unspecified.
3311 def INSELd : NeonI_INS_element<"ins", "d", v2i64, neon_uimm1_bare, i64> {
3312 let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
3313 let Inst{14} = Immn{0};
3314 // bits 11-13 are unspecified.
3317 multiclass Neon_INS_elt_pattern <ValueType NaTy, Operand NaImm,
3318 ValueType MidTy, ValueType StTy,
3319 Operand StImm, Instruction INS> {
3320 def : Pat<(NaTy (vector_insert
3322 (MidTy (vector_extract
3326 (NaTy (EXTRACT_SUBREG
3328 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3334 def : Pat<(StTy (vector_insert
3336 (MidTy (vector_extract
3342 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3346 def : Pat<(NaTy (vector_insert
3348 (MidTy (vector_extract
3352 (NaTy (EXTRACT_SUBREG
3354 (StTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
3355 (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3361 defm INSb_pattern : Neon_INS_elt_pattern<v8i8, neon_uimm3_bare, i32,
3362 v16i8, neon_uimm4_bare, INSELb>;
3363 defm INSh_pattern : Neon_INS_elt_pattern<v4i16, neon_uimm2_bare, i32,
3364 v8i16, neon_uimm3_bare, INSELh>;
3365 defm INSs_pattern : Neon_INS_elt_pattern<v2i32, neon_uimm1_bare, i32,
3366 v4i32, neon_uimm2_bare, INSELs>;
3367 defm INSd_pattern : Neon_INS_elt_pattern<v1i64, neon_uimm0_bare, i64,
3368 v2i64, neon_uimm1_bare, INSELd>;
3370 class NeonI_SMOV<string asmop, string Res, bit Q,
3371 ValueType OpTy, ValueType eleTy,
3372 Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
3373 : NeonI_copy<Q, 0b0, 0b0101,
3374 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
3375 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
3376 [(set (ResTy ResGPR:$Rd),
3378 (ResTy (vector_extract
3379 (OpTy VPR128:$Rn), (OpImm:$Imm))),
3385 //Signed integer move (main, from element)
3386 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
3388 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3390 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
3392 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3394 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
3396 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3398 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
3400 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3402 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
3404 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3407 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
3408 ValueType eleTy, Operand StImm, Operand NaImm,
3409 Instruction SMOVI> {
3410 def : Pat<(i64 (sext_inreg
3412 (i32 (vector_extract
3413 (StTy VPR128:$Rn), (StImm:$Imm))))),
3415 (SMOVI VPR128:$Rn, StImm:$Imm)>;
3417 def : Pat<(i64 (sext
3418 (i32 (vector_extract
3419 (StTy VPR128:$Rn), (StImm:$Imm))))),
3420 (SMOVI VPR128:$Rn, StImm:$Imm)>;
3422 def : Pat<(i64 (sext_inreg
3423 (i64 (vector_extract
3424 (NaTy VPR64:$Rn), (NaImm:$Imm))),
3426 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3429 def : Pat<(i64 (sext_inreg
3431 (i32 (vector_extract
3432 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
3434 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3437 def : Pat<(i64 (sext
3438 (i32 (vector_extract
3439 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
3440 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3444 defm SMOVxb_pattern : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
3445 neon_uimm3_bare, SMOVxb>;
3446 defm SMOVxh_pattern : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
3447 neon_uimm2_bare, SMOVxh>;
3448 defm SMOVxs_pattern : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
3449 neon_uimm1_bare, SMOVxs>;
3451 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
3452 ValueType eleTy, Operand StImm, Operand NaImm,
3454 : Pat<(i32 (sext_inreg
3455 (i32 (vector_extract
3456 (NaTy VPR64:$Rn), (NaImm:$Imm))),
3458 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3461 def SMOVwb_pattern : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
3462 neon_uimm3_bare, SMOVwb>;
3463 def SMOVwh_pattern : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
3464 neon_uimm2_bare, SMOVwh>;
3467 class NeonI_UMOV<string asmop, string Res, bit Q,
3468 ValueType OpTy, Operand OpImm,
3469 RegisterClass ResGPR, ValueType ResTy>
3470 : NeonI_copy<Q, 0b0, 0b0111,
3471 (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
3472 asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
3473 [(set (ResTy ResGPR:$Rd),
3474 (ResTy (vector_extract
3475 (OpTy VPR128:$Rn), (OpImm:$Imm))))],
3480 //Unsigned integer move (main, from element)
3481 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
3483 let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
3485 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
3487 let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
3489 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
3491 let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
3493 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
3495 let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
3498 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
3499 Operand StImm, Operand NaImm,
3501 : Pat<(ResTy (vector_extract
3502 (NaTy VPR64:$Rn), NaImm:$Imm)),
3503 (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
3506 def UMOVwb_pattern : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
3507 neon_uimm3_bare, UMOVwb>;
3508 def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
3509 neon_uimm2_bare, UMOVwh>;
3510 def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
3511 neon_uimm1_bare, UMOVws>;
3514 (i32 (vector_extract
3515 (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
3517 (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
3520 (i32 (vector_extract
3521 (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
3523 (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
3525 def : Pat<(i64 (zext
3526 (i32 (vector_extract
3527 (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
3528 (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
3531 (i32 (vector_extract
3532 (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
3534 (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3535 neon_uimm3_bare:$Imm)>;
3538 (i32 (vector_extract
3539 (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
3541 (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3542 neon_uimm2_bare:$Imm)>;
3544 def : Pat<(i64 (zext
3545 (i32 (vector_extract
3546 (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
3547 (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
3548 neon_uimm0_bare:$Imm)>;
3550 // Additional copy patterns for scalar types
3551 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
3553 (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
3555 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
3557 (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
3559 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
3560 (FMOVws FPR32:$Rn)>;
3562 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
3563 (FMOVxd FPR64:$Rn)>;
3565 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
3568 def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
3571 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
3572 (v1i8 (EXTRACT_SUBREG (v16i8
3573 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
3576 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
3577 (v1i16 (EXTRACT_SUBREG (v8i16
3578 (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
3581 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
3584 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),