1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the AArch64 NEON instruction set.
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17 def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3,
18 [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
19 SDTCisSameAs<0, 3>]>>;
21 // (outs Result), (ins Imm, OpCmode)
22 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
24 def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
26 def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
28 // (outs Result), (ins Imm)
29 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
30 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
32 // (outs Result), (ins LHS, RHS, CondCode)
33 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
34 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
36 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
37 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
38 [SDTCisVec<0>, SDTCisVec<1>]>>;
40 // (outs Result), (ins LHS, RHS)
41 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
42 [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
44 def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1,
45 [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
47 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
49 def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
50 def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
53 //===----------------------------------------------------------------------===//
55 //===----------------------------------------------------------------------===//
57 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
58 string asmop, SDPatternOperator opnode8B,
59 SDPatternOperator opnode16B,
62 let isCommutable = Commutable in {
63 def _8B : NeonI_3VSame<0b0, u, size, opcode,
64 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
65 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
66 [(set (v8i8 VPR64:$Rd),
67 (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
70 def _16B : NeonI_3VSame<0b1, u, size, opcode,
71 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
72 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
73 [(set (v16i8 VPR128:$Rd),
74 (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
80 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
81 string asmop, SDPatternOperator opnode,
84 let isCommutable = Commutable in {
85 def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
86 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
87 asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
88 [(set (v4i16 VPR64:$Rd),
89 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
92 def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
93 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
94 asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
95 [(set (v8i16 VPR128:$Rd),
96 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
99 def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
100 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
101 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
102 [(set (v2i32 VPR64:$Rd),
103 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
106 def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
107 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
108 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
109 [(set (v4i32 VPR128:$Rd),
110 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
114 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
115 string asmop, SDPatternOperator opnode,
117 : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
119 let isCommutable = Commutable in {
120 def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
121 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
122 asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
123 [(set (v8i8 VPR64:$Rd),
124 (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
127 def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
128 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
129 asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
130 [(set (v16i8 VPR128:$Rd),
131 (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
136 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
137 string asmop, SDPatternOperator opnode,
139 : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
141 let isCommutable = Commutable in {
142 def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
143 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
144 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
145 [(set (v2i64 VPR128:$Rd),
146 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
151 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
152 // but Result types can be integer or floating point types.
153 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
154 string asmop, SDPatternOperator opnode2S,
155 SDPatternOperator opnode4S,
156 SDPatternOperator opnode2D,
157 ValueType ResTy2S, ValueType ResTy4S,
158 ValueType ResTy2D, bit Commutable = 0>
160 let isCommutable = Commutable in {
161 def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
162 (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
163 asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
164 [(set (ResTy2S VPR64:$Rd),
165 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
168 def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
169 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
170 asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
171 [(set (ResTy4S VPR128:$Rd),
172 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
175 def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
176 (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
177 asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
178 [(set (ResTy2D VPR128:$Rd),
179 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
184 //===----------------------------------------------------------------------===//
185 // Instruction Definitions
186 //===----------------------------------------------------------------------===//
188 // Vector Arithmetic Instructions
190 // Vector Add (Integer and Floating-Point)
192 defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
193 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
194 v2f32, v4f32, v2f64, 1>;
196 // Vector Sub (Integer and Floating-Point)
198 defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
199 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
200 v2f32, v4f32, v2f64, 0>;
202 // Vector Multiply (Integer and Floating-Point)
204 defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
205 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
206 v2f32, v4f32, v2f64, 1>;
208 // Vector Multiply (Polynomial)
210 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
211 int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
213 // Vector Multiply-accumulate and Multiply-subtract (Integer)
215 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
216 // two operands constraints.
217 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
218 RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
219 SDPatternOperator opnode>
220 : NeonI_3VSame<q, u, size, opcode,
221 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
222 asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
223 [(set (OpTy VPRC:$Rd),
224 (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
226 let Constraints = "$src = $Rd";
229 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
230 (add node:$Ra, (mul node:$Rn, node:$Rm))>;
232 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
233 (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
236 def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
237 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
238 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
239 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
240 def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
241 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
242 def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
243 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
244 def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
245 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
246 def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
247 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
249 def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
250 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
251 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
252 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
253 def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
254 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
255 def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
256 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
257 def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
258 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
259 def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
260 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
262 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
264 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
265 (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>;
267 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
268 (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>;
270 let Predicates = [HasNEON, UseFusedMAC] in {
271 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
272 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
273 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
274 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
275 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
276 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
278 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
279 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
280 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
281 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
282 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
283 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
286 // We're also allowed to match the fma instruction regardless of compile
288 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
289 (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
290 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
291 (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
292 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
293 (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
295 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
296 (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
297 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
298 (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
299 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
300 (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
302 // Vector Divide (Floating-Point)
304 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
305 v2f32, v4f32, v2f64, 0>;
307 // Vector Bitwise Operations
309 // Vector Bitwise AND
311 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
313 // Vector Bitwise Exclusive OR
315 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
319 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
321 // ORR disassembled as MOV if Vn==Vm
323 // Vector Move - register
324 // Alias for ORR if Vn=Vm and it is the preferred syntax
325 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
326 (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
327 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
328 (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
330 def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
331 ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
332 ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
334 uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
335 OpCmodeConstVal->getZExtValue(), EltBits);
336 return (EltBits == 8 && EltVal == 0xff);
340 def Neon_not8B : PatFrag<(ops node:$in),
341 (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
342 def Neon_not16B : PatFrag<(ops node:$in),
343 (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
345 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
346 (or node:$Rn, (Neon_not8B node:$Rm))>;
348 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
349 (or node:$Rn, (Neon_not16B node:$Rm))>;
351 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
352 (and node:$Rn, (Neon_not8B node:$Rm))>;
354 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
355 (and node:$Rn, (Neon_not16B node:$Rm))>;
358 // Vector Bitwise OR NOT - register
360 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
361 Neon_orn8B, Neon_orn16B, 0>;
363 // Vector Bitwise Bit Clear (AND NOT) - register
365 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
366 Neon_bic8B, Neon_bic16B, 0>;
368 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
369 SDPatternOperator opnode16B,
371 Instruction INST16B> {
372 def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
373 (INST8B VPR64:$Rn, VPR64:$Rm)>;
374 def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
375 (INST8B VPR64:$Rn, VPR64:$Rm)>;
376 def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
377 (INST8B VPR64:$Rn, VPR64:$Rm)>;
378 def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
379 (INST16B VPR128:$Rn, VPR128:$Rm)>;
380 def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
381 (INST16B VPR128:$Rn, VPR128:$Rm)>;
382 def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
383 (INST16B VPR128:$Rn, VPR128:$Rm)>;
386 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
387 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
388 defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
389 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
390 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
391 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
393 // Vector Bitwise Select
394 def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
395 0b0, 0b1, 0b01, 0b00011, Neon_bsl>;
397 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
398 0b1, 0b1, 0b01, 0b00011, Neon_bsl>;
400 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
402 Instruction INST16B> {
403 // Disassociate type from instruction definition
404 def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)),
405 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
406 def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
407 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
408 def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)),
409 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
410 def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
411 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
412 def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
413 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
414 def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)),
415 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
417 // Allow to match BSL instruction pattern with non-constant operand
418 def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
419 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
420 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
421 def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
422 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
423 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
424 def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
425 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
426 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
427 def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
428 (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
429 (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
430 def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
431 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
432 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
433 def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
434 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
435 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
436 def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
437 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
438 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
439 def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
440 (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
441 (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
443 // Allow to match llvm.arm.* intrinsics.
444 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
445 (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
446 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
447 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
448 (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
449 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
450 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
451 (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
452 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
453 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
454 (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
455 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
456 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
457 (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
458 (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
459 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
460 (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
461 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
462 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
463 (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
464 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
465 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
466 (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
467 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
468 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
469 (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
470 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
471 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
472 (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
473 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474 def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
475 (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
476 (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
479 // Additional patterns for bitwise instruction BSL
480 defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>;
482 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
483 (Neon_bsl node:$src, node:$Rn, node:$Rm),
484 [{ (void)N; return false; }]>;
486 // Vector Bitwise Insert if True
488 def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
489 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
490 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
491 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
493 // Vector Bitwise Insert if False
495 def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
496 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
497 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
498 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
500 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
502 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
503 (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
504 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
505 (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
507 // Vector Absolute Difference and Accumulate (Unsigned)
508 def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
509 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
510 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
511 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
512 def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
513 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
514 def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
515 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
516 def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
517 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
518 def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
519 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
521 // Vector Absolute Difference and Accumulate (Signed)
522 def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
523 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
524 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
525 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
526 def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
527 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
528 def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
529 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
530 def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
531 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
532 def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
533 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
536 // Vector Absolute Difference (Signed, Unsigned)
537 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
538 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
540 // Vector Absolute Difference (Floating Point)
541 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
542 int_arm_neon_vabds, int_arm_neon_vabds,
543 int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
545 // Vector Reciprocal Step (Floating Point)
546 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
547 int_arm_neon_vrecps, int_arm_neon_vrecps,
549 v2f32, v4f32, v2f64, 0>;
551 // Vector Reciprocal Square Root Step (Floating Point)
552 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
553 int_arm_neon_vrsqrts,
554 int_arm_neon_vrsqrts,
555 int_arm_neon_vrsqrts,
556 v2f32, v4f32, v2f64, 0>;
558 // Vector Comparisons
560 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
561 (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
562 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
563 (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
564 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
565 (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
566 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
567 (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
568 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
569 (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
571 // NeonI_compare_aliases class: swaps register operands to implement
572 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
573 class NeonI_compare_aliases<string asmop, string asmlane,
574 Instruction inst, RegisterClass VPRC>
575 : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
577 (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
579 // Vector Comparisons (Integer)
581 // Vector Compare Mask Equal (Integer)
582 let isCommutable =1 in {
583 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
586 // Vector Compare Mask Higher or Same (Unsigned Integer)
587 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
589 // Vector Compare Mask Greater Than or Equal (Integer)
590 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
592 // Vector Compare Mask Higher (Unsigned Integer)
593 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
595 // Vector Compare Mask Greater Than (Integer)
596 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
598 // Vector Compare Mask Bitwise Test (Integer)
599 defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
601 // Vector Compare Mask Less or Same (Unsigned Integer)
602 // CMLS is alias for CMHS with operands reversed.
603 def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
604 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
605 def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
606 def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
607 def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
608 def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
609 def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
611 // Vector Compare Mask Less Than or Equal (Integer)
612 // CMLE is alias for CMGE with operands reversed.
613 def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
614 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
615 def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
616 def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
617 def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
618 def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
619 def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
621 // Vector Compare Mask Lower (Unsigned Integer)
622 // CMLO is alias for CMHI with operands reversed.
623 def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
624 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
625 def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
626 def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
627 def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
628 def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
629 def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
631 // Vector Compare Mask Less Than (Integer)
632 // CMLT is alias for CMGT with operands reversed.
633 def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
634 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
635 def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
636 def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
637 def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
638 def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
639 def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
642 def neon_uimm0_asmoperand : AsmOperandClass
645 let PredicateMethod = "isUImm<0>";
646 let RenderMethod = "addImmOperands";
649 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
650 let ParserMatchClass = neon_uimm0_asmoperand;
651 let PrintMethod = "printNeonUImm0Operand";
655 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
657 def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
658 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
659 asmop # "\t$Rd.8b, $Rn.8b, $Imm",
660 [(set (v8i8 VPR64:$Rd),
661 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
664 def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
665 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
666 asmop # "\t$Rd.16b, $Rn.16b, $Imm",
667 [(set (v16i8 VPR128:$Rd),
668 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
671 def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
672 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
673 asmop # "\t$Rd.4h, $Rn.4h, $Imm",
674 [(set (v4i16 VPR64:$Rd),
675 (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
678 def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
679 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
680 asmop # "\t$Rd.8h, $Rn.8h, $Imm",
681 [(set (v8i16 VPR128:$Rd),
682 (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
685 def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
686 (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
687 asmop # "\t$Rd.2s, $Rn.2s, $Imm",
688 [(set (v2i32 VPR64:$Rd),
689 (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
692 def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
693 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
694 asmop # "\t$Rd.4s, $Rn.4s, $Imm",
695 [(set (v4i32 VPR128:$Rd),
696 (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
699 def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
700 (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
701 asmop # "\t$Rd.2d, $Rn.2d, $Imm",
702 [(set (v2i64 VPR128:$Rd),
703 (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
707 // Vector Compare Mask Equal to Zero (Integer)
708 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
710 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
711 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
713 // Vector Compare Mask Greater Than Zero (Signed Integer)
714 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
716 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
717 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
719 // Vector Compare Mask Less Than Zero (Signed Integer)
720 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
722 // Vector Comparisons (Floating Point)
724 // Vector Compare Mask Equal (Floating Point)
725 let isCommutable =1 in {
726 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
727 Neon_cmeq, Neon_cmeq,
728 v2i32, v4i32, v2i64, 0>;
731 // Vector Compare Mask Greater Than Or Equal (Floating Point)
732 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
733 Neon_cmge, Neon_cmge,
734 v2i32, v4i32, v2i64, 0>;
736 // Vector Compare Mask Greater Than (Floating Point)
737 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
738 Neon_cmgt, Neon_cmgt,
739 v2i32, v4i32, v2i64, 0>;
741 // Vector Compare Mask Less Than Or Equal (Floating Point)
742 // FCMLE is alias for FCMGE with operands reversed.
743 def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
744 def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
745 def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
747 // Vector Compare Mask Less Than (Floating Point)
748 // FCMLT is alias for FCMGT with operands reversed.
749 def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
750 def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
751 def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
754 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
755 string asmop, CondCode CC>
757 def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
758 (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
759 asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
760 [(set (v2i32 VPR64:$Rd),
761 (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))],
764 def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
765 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
766 asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
767 [(set (v4i32 VPR128:$Rd),
768 (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
771 def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
772 (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
773 asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
774 [(set (v2i64 VPR128:$Rd),
775 (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))],
779 // Vector Compare Mask Equal to Zero (Floating Point)
780 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
782 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
783 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
785 // Vector Compare Mask Greater Than Zero (Floating Point)
786 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
788 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
789 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
791 // Vector Compare Mask Less Than Zero (Floating Point)
792 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
794 // Vector Absolute Comparisons (Floating Point)
796 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
797 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
798 int_arm_neon_vacged, int_arm_neon_vacgeq,
799 int_aarch64_neon_vacgeq,
800 v2i32, v4i32, v2i64, 0>;
802 // Vector Absolute Compare Mask Greater Than (Floating Point)
803 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
804 int_arm_neon_vacgtd, int_arm_neon_vacgtq,
805 int_aarch64_neon_vacgtq,
806 v2i32, v4i32, v2i64, 0>;
808 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
809 // FACLE is alias for FACGE with operands reversed.
810 def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
811 def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
812 def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
814 // Vector Absolute Compare Mask Less Than (Floating Point)
815 // FACLT is alias for FACGT with operands reversed.
816 def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
817 def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
818 def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
820 // Vector halving add (Integer Signed, Unsigned)
821 defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
822 int_arm_neon_vhadds, 1>;
823 defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
824 int_arm_neon_vhaddu, 1>;
826 // Vector halving sub (Integer Signed, Unsigned)
827 defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
828 int_arm_neon_vhsubs, 0>;
829 defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
830 int_arm_neon_vhsubu, 0>;
832 // Vector rouding halving add (Integer Signed, Unsigned)
833 defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
834 int_arm_neon_vrhadds, 1>;
835 defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
836 int_arm_neon_vrhaddu, 1>;
838 // Vector Saturating add (Integer Signed, Unsigned)
839 defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
840 int_arm_neon_vqadds, 1>;
841 defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
842 int_arm_neon_vqaddu, 1>;
844 // Vector Saturating sub (Integer Signed, Unsigned)
845 defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
846 int_arm_neon_vqsubs, 1>;
847 defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
848 int_arm_neon_vqsubu, 1>;
850 // Vector Shift Left (Signed and Unsigned Integer)
851 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
852 int_arm_neon_vshifts, 1>;
853 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
854 int_arm_neon_vshiftu, 1>;
856 // Vector Saturating Shift Left (Signed and Unsigned Integer)
857 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
858 int_arm_neon_vqshifts, 1>;
859 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
860 int_arm_neon_vqshiftu, 1>;
862 // Vector Rouding Shift Left (Signed and Unsigned Integer)
863 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
864 int_arm_neon_vrshifts, 1>;
865 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
866 int_arm_neon_vrshiftu, 1>;
868 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
869 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
870 int_arm_neon_vqrshifts, 1>;
871 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
872 int_arm_neon_vqrshiftu, 1>;
874 // Vector Maximum (Signed and Unsigned Integer)
875 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
876 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
878 // Vector Minimum (Signed and Unsigned Integer)
879 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
880 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
882 // Vector Maximum (Floating Point)
883 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
884 int_arm_neon_vmaxs, int_arm_neon_vmaxs,
885 int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
887 // Vector Minimum (Floating Point)
888 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
889 int_arm_neon_vmins, int_arm_neon_vmins,
890 int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
892 // Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
893 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
894 int_aarch64_neon_vmaxnm,
895 int_aarch64_neon_vmaxnm,
896 int_aarch64_neon_vmaxnm,
897 v2f32, v4f32, v2f64, 1>;
899 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
900 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
901 int_aarch64_neon_vminnm,
902 int_aarch64_neon_vminnm,
903 int_aarch64_neon_vminnm,
904 v2f32, v4f32, v2f64, 1>;
906 // Vector Maximum Pairwise (Signed and Unsigned Integer)
907 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
908 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
910 // Vector Minimum Pairwise (Signed and Unsigned Integer)
911 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
912 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
914 // Vector Maximum Pairwise (Floating Point)
915 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
916 int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
917 int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
919 // Vector Minimum Pairwise (Floating Point)
920 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
921 int_arm_neon_vpmins, int_arm_neon_vpmins,
922 int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
924 // Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
925 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
926 int_aarch64_neon_vpmaxnm,
927 int_aarch64_neon_vpmaxnm,
928 int_aarch64_neon_vpmaxnm,
929 v2f32, v4f32, v2f64, 1>;
931 // Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
932 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
933 int_aarch64_neon_vpminnm,
934 int_aarch64_neon_vpminnm,
935 int_aarch64_neon_vpminnm,
936 v2f32, v4f32, v2f64, 1>;
938 // Vector Addition Pairwise (Integer)
939 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
941 // Vector Addition Pairwise (Floating Point)
942 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
946 v2f32, v4f32, v2f64, 1>;
948 // Vector Saturating Doubling Multiply High
949 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
950 int_arm_neon_vqdmulh, 1>;
952 // Vector Saturating Rouding Doubling Multiply High
953 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
954 int_arm_neon_vqrdmulh, 1>;
956 // Vector Multiply Extended (Floating Point)
957 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
958 int_aarch64_neon_vmulx,
959 int_aarch64_neon_vmulx,
960 int_aarch64_neon_vmulx,
961 v2f32, v4f32, v2f64, 1>;
963 // Vector Immediate Instructions
965 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
967 def _asmoperand : AsmOperandClass
969 let Name = "NeonMovImmShift" # PREFIX;
970 let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
971 let PredicateMethod = "isNeonMovImmShift" # PREFIX;
975 // Definition of vector immediates shift operands
977 // The selectable use-cases extract the shift operation
978 // information from the OpCmode fields encoded in the immediate.
979 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
980 uint64_t OpCmode = N->getZExtValue();
982 unsigned ShiftOnesIn;
984 A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
985 if (!HasShift) return SDValue();
986 return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
989 // Vector immediates shift operands which accept LSL and MSL
990 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
991 // or 0, 8 (LSLH) or 8, 16 (MSL).
992 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
993 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
994 // LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
995 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
997 multiclass neon_mov_imm_shift_operands<string PREFIX,
998 string HALF, string ISHALF, code pred>
1000 def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1003 "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1005 "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1006 let ParserMatchClass =
1007 !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1011 defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1013 unsigned ShiftOnesIn;
1015 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1016 return (HasShift && !ShiftOnesIn);
1019 defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1021 unsigned ShiftOnesIn;
1023 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1024 return (HasShift && ShiftOnesIn);
1027 defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1029 unsigned ShiftOnesIn;
1031 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1032 return (HasShift && !ShiftOnesIn);
1035 def neon_uimm8_asmoperand : AsmOperandClass
1038 let PredicateMethod = "isUImm<8>";
1039 let RenderMethod = "addImmOperands";
1042 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1043 let ParserMatchClass = neon_uimm8_asmoperand;
1044 let PrintMethod = "printNeonUImm8Operand";
1047 def neon_uimm64_mask_asmoperand : AsmOperandClass
1049 let Name = "NeonUImm64Mask";
1050 let PredicateMethod = "isNeonUImm64Mask";
1051 let RenderMethod = "addNeonUImm64MaskOperands";
1054 // MCOperand for 64-bit bytemask with each byte having only the
1055 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1056 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1057 let ParserMatchClass = neon_uimm64_mask_asmoperand;
1058 let PrintMethod = "printNeonUImm64MaskOperand";
1061 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1062 SDPatternOperator opnode>
1064 // shift zeros, per word
1065 def _2S : NeonI_1VModImm<0b0, op,
1067 (ins neon_uimm8:$Imm,
1068 neon_mov_imm_LSL_operand:$Simm),
1069 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1070 [(set (v2i32 VPR64:$Rd),
1071 (v2i32 (opnode (timm:$Imm),
1072 (neon_mov_imm_LSL_operand:$Simm))))],
1075 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1078 def _4S : NeonI_1VModImm<0b1, op,
1080 (ins neon_uimm8:$Imm,
1081 neon_mov_imm_LSL_operand:$Simm),
1082 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1083 [(set (v4i32 VPR128:$Rd),
1084 (v4i32 (opnode (timm:$Imm),
1085 (neon_mov_imm_LSL_operand:$Simm))))],
1088 let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1091 // shift zeros, per halfword
1092 def _4H : NeonI_1VModImm<0b0, op,
1094 (ins neon_uimm8:$Imm,
1095 neon_mov_imm_LSLH_operand:$Simm),
1096 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1097 [(set (v4i16 VPR64:$Rd),
1098 (v4i16 (opnode (timm:$Imm),
1099 (neon_mov_imm_LSLH_operand:$Simm))))],
1102 let cmode = {0b1, 0b0, Simm, 0b0};
1105 def _8H : NeonI_1VModImm<0b1, op,
1107 (ins neon_uimm8:$Imm,
1108 neon_mov_imm_LSLH_operand:$Simm),
1109 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1110 [(set (v8i16 VPR128:$Rd),
1111 (v8i16 (opnode (timm:$Imm),
1112 (neon_mov_imm_LSLH_operand:$Simm))))],
1115 let cmode = {0b1, 0b0, Simm, 0b0};
1119 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1120 SDPatternOperator opnode,
1121 SDPatternOperator neonopnode>
1123 let Constraints = "$src = $Rd" in {
1124 // shift zeros, per word
1125 def _2S : NeonI_1VModImm<0b0, op,
1127 (ins VPR64:$src, neon_uimm8:$Imm,
1128 neon_mov_imm_LSL_operand:$Simm),
1129 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1130 [(set (v2i32 VPR64:$Rd),
1131 (v2i32 (opnode (v2i32 VPR64:$src),
1132 (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1133 neon_mov_imm_LSL_operand:$Simm)))))))],
1136 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1139 def _4S : NeonI_1VModImm<0b1, op,
1141 (ins VPR128:$src, neon_uimm8:$Imm,
1142 neon_mov_imm_LSL_operand:$Simm),
1143 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1144 [(set (v4i32 VPR128:$Rd),
1145 (v4i32 (opnode (v4i32 VPR128:$src),
1146 (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1147 neon_mov_imm_LSL_operand:$Simm)))))))],
1150 let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1153 // shift zeros, per halfword
1154 def _4H : NeonI_1VModImm<0b0, op,
1156 (ins VPR64:$src, neon_uimm8:$Imm,
1157 neon_mov_imm_LSLH_operand:$Simm),
1158 !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
1159 [(set (v4i16 VPR64:$Rd),
1160 (v4i16 (opnode (v4i16 VPR64:$src),
1161 (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1162 neon_mov_imm_LSL_operand:$Simm)))))))],
1165 let cmode = {0b1, 0b0, Simm, 0b1};
1168 def _8H : NeonI_1VModImm<0b1, op,
1170 (ins VPR128:$src, neon_uimm8:$Imm,
1171 neon_mov_imm_LSLH_operand:$Simm),
1172 !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
1173 [(set (v8i16 VPR128:$Rd),
1174 (v8i16 (opnode (v8i16 VPR128:$src),
1175 (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1176 neon_mov_imm_LSL_operand:$Simm)))))))],
1179 let cmode = {0b1, 0b0, Simm, 0b1};
1184 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1185 SDPatternOperator opnode>
1187 // shift ones, per word
1188 def _2S : NeonI_1VModImm<0b0, op,
1190 (ins neon_uimm8:$Imm,
1191 neon_mov_imm_MSL_operand:$Simm),
1192 !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
1193 [(set (v2i32 VPR64:$Rd),
1194 (v2i32 (opnode (timm:$Imm),
1195 (neon_mov_imm_MSL_operand:$Simm))))],
1198 let cmode = {0b1, 0b1, 0b0, Simm};
1201 def _4S : NeonI_1VModImm<0b1, op,
1203 (ins neon_uimm8:$Imm,
1204 neon_mov_imm_MSL_operand:$Simm),
1205 !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
1206 [(set (v4i32 VPR128:$Rd),
1207 (v4i32 (opnode (timm:$Imm),
1208 (neon_mov_imm_MSL_operand:$Simm))))],
1211 let cmode = {0b1, 0b1, 0b0, Simm};
1215 // Vector Move Immediate Shifted
1216 let isReMaterializable = 1 in {
1217 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1220 // Vector Move Inverted Immediate Shifted
1221 let isReMaterializable = 1 in {
1222 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1225 // Vector Bitwise Bit Clear (AND NOT) - immediate
1226 let isReMaterializable = 1 in {
1227 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1231 // Vector Bitwise OR - immedidate
1233 let isReMaterializable = 1 in {
1234 defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1238 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1239 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1240 // BIC immediate instructions selection requires additional patterns to
1241 // transform Neon_movi operands into BIC immediate operands
1243 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1244 uint64_t OpCmode = N->getZExtValue();
1246 unsigned ShiftOnesIn;
1247 (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1248 // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
1249 // Transform encoded shift amount 0 to 1 and 1 to 0.
1250 return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1253 def neon_mov_imm_LSLH_transform_operand
1256 unsigned ShiftOnesIn;
1258 A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1259 return (HasShift && !ShiftOnesIn); }],
1260 neon_mov_imm_LSLH_transform_XFORM>;
1262 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1263 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1264 def : Pat<(v4i16 (and VPR64:$src,
1265 (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1266 (BICvi_lsl_4H VPR64:$src, 0,
1267 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1269 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1270 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1271 def : Pat<(v8i16 (and VPR128:$src,
1272 (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1273 (BICvi_lsl_8H VPR128:$src, 0,
1274 neon_mov_imm_LSLH_transform_operand:$Simm)>;
1277 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1278 SDPatternOperator neonopnode,
1280 Instruction INST8H> {
1281 def : Pat<(v8i8 (opnode VPR64:$src,
1282 (bitconvert(v4i16 (neonopnode timm:$Imm,
1283 neon_mov_imm_LSLH_operand:$Simm))))),
1284 (INST4H VPR64:$src, neon_uimm8:$Imm,
1285 neon_mov_imm_LSLH_operand:$Simm)>;
1286 def : Pat<(v1i64 (opnode VPR64:$src,
1287 (bitconvert(v4i16 (neonopnode timm:$Imm,
1288 neon_mov_imm_LSLH_operand:$Simm))))),
1289 (INST4H VPR64:$src, neon_uimm8:$Imm,
1290 neon_mov_imm_LSLH_operand:$Simm)>;
1292 def : Pat<(v16i8 (opnode VPR128:$src,
1293 (bitconvert(v8i16 (neonopnode timm:$Imm,
1294 neon_mov_imm_LSLH_operand:$Simm))))),
1295 (INST8H VPR128:$src, neon_uimm8:$Imm,
1296 neon_mov_imm_LSLH_operand:$Simm)>;
1297 def : Pat<(v4i32 (opnode VPR128:$src,
1298 (bitconvert(v8i16 (neonopnode timm:$Imm,
1299 neon_mov_imm_LSLH_operand:$Simm))))),
1300 (INST8H VPR128:$src, neon_uimm8:$Imm,
1301 neon_mov_imm_LSLH_operand:$Simm)>;
1302 def : Pat<(v2i64 (opnode VPR128:$src,
1303 (bitconvert(v8i16 (neonopnode timm:$Imm,
1304 neon_mov_imm_LSLH_operand:$Simm))))),
1305 (INST8H VPR128:$src, neon_uimm8:$Imm,
1306 neon_mov_imm_LSLH_operand:$Simm)>;
1309 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1310 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1312 // Additional patterns for Vector Bitwise OR - immedidate
1313 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1316 // Vector Move Immediate Masked
1317 let isReMaterializable = 1 in {
1318 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1321 // Vector Move Inverted Immediate Masked
1322 let isReMaterializable = 1 in {
1323 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1326 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1327 Instruction inst, RegisterClass VPRC>
1328 : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
1329 (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
1331 // Aliases for Vector Move Immediate Shifted
1332 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1333 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1334 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1335 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1337 // Aliases for Vector Move Inverted Immediate Shifted
1338 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1339 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1340 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1341 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1343 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1344 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1345 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1346 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1347 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1349 // Aliases for Vector Bitwise OR - immedidate
1350 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1351 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1352 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1353 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1355 // Vector Move Immediate - per byte
1356 let isReMaterializable = 1 in {
1357 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1358 (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1359 "movi\t$Rd.8b, $Imm",
1360 [(set (v8i8 VPR64:$Rd),
1361 (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1366 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1367 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1368 "movi\t$Rd.16b, $Imm",
1369 [(set (v16i8 VPR128:$Rd),
1370 (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1376 // Vector Move Immediate - bytemask, per double word
1377 let isReMaterializable = 1 in {
1378 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1379 (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1380 "movi\t $Rd.2d, $Imm",
1381 [(set (v2i64 VPR128:$Rd),
1382 (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1388 // Vector Move Immediate - bytemask, one doubleword
1390 let isReMaterializable = 1 in {
1391 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1392 (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1394 [(set (f64 FPR64:$Rd),
1396 (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
1402 // Vector Floating Point Move Immediate
1404 class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
1405 Operand immOpType, bit q, bit op>
1406 : NeonI_1VModImm<q, op,
1407 (outs VPRC:$Rd), (ins immOpType:$Imm),
1408 "fmov\t$Rd" # asmlane # ", $Imm",
1409 [(set (OpTy VPRC:$Rd),
1410 (OpTy (Neon_fmovi (timm:$Imm))))],
1415 let isReMaterializable = 1 in {
1416 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
1417 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1418 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1421 // Vector Shift (Immediate)
1422 // Immediate in [0, 63]
1423 def imm0_63 : Operand<i32> {
1424 let ParserMatchClass = uimm6_asmoperand;
1427 // Shift Right Immediate - A shift right immediate is encoded differently from
1428 // other shift immediates. The immh:immb field is encoded like so:
1431 // 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1432 // 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1433 // 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1434 // 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1435 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1436 let Name = "ShrImm" # OFFSET;
1437 let RenderMethod = "addImmOperands";
1438 let DiagnosticType = "ShrImm" # OFFSET;
1441 class shr_imm<string OFFSET> : Operand<i32> {
1442 let EncoderMethod = "getShiftRightImm" # OFFSET;
1443 let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1444 let ParserMatchClass =
1445 !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1448 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1449 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1450 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1451 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1453 def shr_imm8 : shr_imm<"8">;
1454 def shr_imm16 : shr_imm<"16">;
1455 def shr_imm32 : shr_imm<"32">;
1456 def shr_imm64 : shr_imm<"64">;
1458 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1459 RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1460 : NeonI_2VShiftImm<q, u, opcode,
1461 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1462 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1463 [(set (Ty VPRC:$Rd),
1464 (Ty (OpNode (Ty VPRC:$Rn),
1465 (Ty (Neon_dupImm (i32 imm:$Imm))))))],
1468 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1469 // 64-bit vector types.
1470 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3, shl> {
1471 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1474 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4, shl> {
1475 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1478 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5, shl> {
1479 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1482 // 128-bit vector types.
1483 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3, shl> {
1484 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1487 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4, shl> {
1488 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1491 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5, shl> {
1492 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1495 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63, shl> {
1496 let Inst{22} = 0b1; // immh:immb = 1xxxxxx
1500 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1501 def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1503 let Inst{22-19} = 0b0001;
1506 def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1508 let Inst{22-20} = 0b001;
1511 def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1513 let Inst{22-21} = 0b01;
1516 def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1518 let Inst{22-19} = 0b0001;
1521 def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1523 let Inst{22-20} = 0b001;
1526 def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1528 let Inst{22-21} = 0b01;
1531 def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1538 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1541 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1542 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1544 def Neon_top16B : PatFrag<(ops node:$in),
1545 (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1546 def Neon_top8H : PatFrag<(ops node:$in),
1547 (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1548 def Neon_top4S : PatFrag<(ops node:$in),
1549 (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1551 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1552 string SrcT, ValueType DestTy, ValueType SrcTy,
1553 Operand ImmTy, SDPatternOperator ExtOp>
1554 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1555 (ins VPR64:$Rn, ImmTy:$Imm),
1556 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1557 [(set (DestTy VPR128:$Rd),
1559 (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1560 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1563 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1564 string SrcT, ValueType DestTy, ValueType SrcTy,
1565 int StartIndex, Operand ImmTy,
1566 SDPatternOperator ExtOp, PatFrag getTop>
1567 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1568 (ins VPR128:$Rn, ImmTy:$Imm),
1569 asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1570 [(set (DestTy VPR128:$Rd),
1573 (SrcTy (getTop VPR128:$Rn)))),
1574 (DestTy (Neon_dupImm (i32 imm:$Imm))))))],
1577 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1579 // 64-bit vector types.
1580 def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1582 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1585 def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1587 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1590 def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1592 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1595 // 128-bit vector types
1596 def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b",
1597 v8i16, v8i8, 8, uimm3, ExtOp, Neon_top16B> {
1598 let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
1601 def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h",
1602 v4i32, v4i16, 4, uimm4, ExtOp, Neon_top8H> {
1603 let Inst{22-20} = 0b001; // immh:immb = 001xxxx
1606 def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s",
1607 v2i64, v2i32, 2, uimm5, ExtOp, Neon_top4S> {
1608 let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
1611 // Use other patterns to match when the immediate is 0.
1612 def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1613 (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1615 def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1616 (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1618 def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1619 (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1621 def : Pat<(v8i16 (ExtOp (v8i8 (Neon_top16B VPR128:$Rn)))),
1622 (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1624 def : Pat<(v4i32 (ExtOp (v4i16 (Neon_top8H VPR128:$Rn)))),
1625 (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1627 def : Pat<(v2i64 (ExtOp (v2i32 (Neon_top4S VPR128:$Rn)))),
1628 (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1632 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1633 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1635 // Rounding/Saturating shift
1636 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1637 RegisterClass VPRC, ValueType Ty, Operand ImmTy,
1638 SDPatternOperator OpNode>
1639 : NeonI_2VShiftImm<q, u, opcode,
1640 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1641 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1642 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1646 // shift right (vector by immediate)
1647 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1648 SDPatternOperator OpNode> {
1649 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1651 let Inst{22-19} = 0b0001;
1654 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1656 let Inst{22-20} = 0b001;
1659 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1661 let Inst{22-21} = 0b01;
1664 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1666 let Inst{22-19} = 0b0001;
1669 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1671 let Inst{22-20} = 0b001;
1674 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1676 let Inst{22-21} = 0b01;
1679 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1685 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1686 SDPatternOperator OpNode> {
1687 // 64-bit vector types.
1688 def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1690 let Inst{22-19} = 0b0001;
1693 def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1695 let Inst{22-20} = 0b001;
1698 def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1700 let Inst{22-21} = 0b01;
1703 // 128-bit vector types.
1704 def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1706 let Inst{22-19} = 0b0001;
1709 def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1711 let Inst{22-20} = 0b001;
1714 def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1716 let Inst{22-21} = 0b01;
1719 def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1725 // Rounding shift right
1726 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1727 int_aarch64_neon_vsrshr>;
1728 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1729 int_aarch64_neon_vurshr>;
1731 // Saturating shift left unsigned
1732 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1734 // Saturating shift left
1735 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1736 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1738 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1739 RegisterClass VPRC, ValueType Ty, Operand ImmTy,
1741 : NeonI_2VShiftImm<q, u, opcode,
1742 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1743 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1744 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1745 (Ty (OpNode (Ty VPRC:$Rn),
1746 (Ty (Neon_dupImm (i32 imm:$Imm))))))))],
1748 let Constraints = "$src = $Rd";
1751 // Shift Right accumulate
1752 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1753 def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1755 let Inst{22-19} = 0b0001;
1758 def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1760 let Inst{22-20} = 0b001;
1763 def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1765 let Inst{22-21} = 0b01;
1768 def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1770 let Inst{22-19} = 0b0001;
1773 def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1775 let Inst{22-20} = 0b001;
1778 def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1780 let Inst{22-21} = 0b01;
1783 def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1789 // Shift right and accumulate
1790 defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1791 defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1793 // Rounding shift accumulate
1794 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1795 RegisterClass VPRC, ValueType Ty, Operand ImmTy,
1796 SDPatternOperator OpNode>
1797 : NeonI_2VShiftImm<q, u, opcode,
1798 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1799 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1800 [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1801 (Ty (OpNode (Ty VPRC:$Rn), (i32 imm:$Imm))))))],
1803 let Constraints = "$src = $Rd";
1806 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1807 SDPatternOperator OpNode> {
1808 def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1810 let Inst{22-19} = 0b0001;
1813 def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1815 let Inst{22-20} = 0b001;
1818 def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1820 let Inst{22-21} = 0b01;
1823 def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1825 let Inst{22-19} = 0b0001;
1828 def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1830 let Inst{22-20} = 0b001;
1833 def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1835 let Inst{22-21} = 0b01;
1838 def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1844 // Rounding shift right and accumulate
1845 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1846 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1848 // Shift insert by immediate
1849 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1850 RegisterClass VPRC, ValueType Ty, Operand ImmTy,
1851 SDPatternOperator OpNode>
1852 : NeonI_2VShiftImm<q, u, opcode,
1853 (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1854 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1855 [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1858 let Constraints = "$src = $Rd";
1861 // shift left insert (vector by immediate)
1862 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
1863 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, uimm3,
1864 int_aarch64_neon_vsli> {
1865 let Inst{22-19} = 0b0001;
1868 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, uimm4,
1869 int_aarch64_neon_vsli> {
1870 let Inst{22-20} = 0b001;
1873 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, uimm5,
1874 int_aarch64_neon_vsli> {
1875 let Inst{22-21} = 0b01;
1878 // 128-bit vector types
1879 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, uimm3,
1880 int_aarch64_neon_vsli> {
1881 let Inst{22-19} = 0b0001;
1884 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, uimm4,
1885 int_aarch64_neon_vsli> {
1886 let Inst{22-20} = 0b001;
1889 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, uimm5,
1890 int_aarch64_neon_vsli> {
1891 let Inst{22-21} = 0b01;
1894 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, imm0_63,
1895 int_aarch64_neon_vsli> {
1900 // shift right insert (vector by immediate)
1901 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
1902 // 64-bit vector types.
1903 def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1904 int_aarch64_neon_vsri> {
1905 let Inst{22-19} = 0b0001;
1908 def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1909 int_aarch64_neon_vsri> {
1910 let Inst{22-20} = 0b001;
1913 def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1914 int_aarch64_neon_vsri> {
1915 let Inst{22-21} = 0b01;
1918 // 128-bit vector types
1919 def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1920 int_aarch64_neon_vsri> {
1921 let Inst{22-19} = 0b0001;
1924 def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1925 int_aarch64_neon_vsri> {
1926 let Inst{22-20} = 0b001;
1929 def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1930 int_aarch64_neon_vsri> {
1931 let Inst{22-21} = 0b01;
1934 def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1935 int_aarch64_neon_vsri> {
1940 // Shift left and insert
1941 defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
1943 // Shift right and insert
1944 defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
1946 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1947 string SrcT, Operand ImmTy>
1948 : NeonI_2VShiftImm<q, u, opcode,
1949 (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
1950 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1953 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1954 string SrcT, Operand ImmTy>
1955 : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1956 (ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm),
1957 asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1959 let Constraints = "$src = $Rd";
1962 // left long shift by immediate
1963 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
1964 def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
1965 let Inst{22-19} = 0b0001;
1968 def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
1969 let Inst{22-20} = 0b001;
1972 def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
1973 let Inst{22-21} = 0b01;
1976 // Shift Narrow High
1977 def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
1979 let Inst{22-19} = 0b0001;
1982 def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
1984 let Inst{22-20} = 0b001;
1987 def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
1989 let Inst{22-21} = 0b01;
1993 // Shift right narrow
1994 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
1996 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
1997 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
1998 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
1999 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2000 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2001 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2002 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2003 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2005 def Neon_combine : PatFrag<(ops node:$Rm, node:$Rn),
2006 (v2i64 (concat_vectors (v1i64 node:$Rm),
2007 (v1i64 node:$Rn)))>;
2009 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2010 (v8i16 (srl (v8i16 node:$lhs),
2011 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2012 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2013 (v4i32 (srl (v4i32 node:$lhs),
2014 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2015 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2016 (v2i64 (srl (v2i64 node:$lhs),
2017 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2018 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2019 (v8i16 (sra (v8i16 node:$lhs),
2020 (v8i16 (Neon_dupImm (i32 node:$rhs)))))>;
2021 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2022 (v4i32 (sra (v4i32 node:$lhs),
2023 (v4i32 (Neon_dupImm (i32 node:$rhs)))))>;
2024 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2025 (v2i64 (sra (v2i64 node:$lhs),
2026 (v2i64 (Neon_dupImm (i32 node:$rhs)))))>;
2028 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2029 multiclass Neon_shiftNarrow_patterns<string shr> {
2030 def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2032 (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2033 def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2035 (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2036 def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2038 (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2040 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2041 (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2042 VPR128:$Rn, imm:$Imm)))))),
2043 (SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2044 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2045 (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2046 VPR128:$Rn, imm:$Imm)))))),
2047 (SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2048 def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
2049 (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2050 VPR128:$Rn, imm:$Imm)))))),
2051 (SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2054 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2055 def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm)),
2056 (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2057 def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm)),
2058 (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2059 def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm)),
2060 (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2062 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2063 (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
2064 (!cast<Instruction>(prefix # "_16B")
2065 VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2066 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2067 (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
2068 (!cast<Instruction>(prefix # "_8H")
2069 VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2070 def : Pat<(Neon_combine (v1i64 VPR64:$src),
2071 (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
2072 (!cast<Instruction>(prefix # "_4S")
2073 VPR64:$src, VPR128:$Rn, imm:$Imm)>;
2076 defm : Neon_shiftNarrow_patterns<"lshr">;
2077 defm : Neon_shiftNarrow_patterns<"ashr">;
2079 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2080 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2081 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2082 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2083 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2084 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2085 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2087 // Convert fix-point and float-pointing
2088 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2089 RegisterClass VPRC, ValueType DestTy, ValueType SrcTy,
2090 Operand ImmTy, SDPatternOperator IntOp>
2091 : NeonI_2VShiftImm<q, u, opcode,
2092 (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2093 asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2094 [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2098 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2099 SDPatternOperator IntOp> {
2100 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2102 let Inst{22-21} = 0b01;
2105 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2107 let Inst{22-21} = 0b01;
2110 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2116 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2117 SDPatternOperator IntOp> {
2118 def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2120 let Inst{22-21} = 0b01;
2123 def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2125 let Inst{22-21} = 0b01;
2128 def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2134 // Convert fixed-point to floating-point
2135 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2136 int_arm_neon_vcvtfxs2fp>;
2137 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2138 int_arm_neon_vcvtfxu2fp>;
2140 // Convert floating-point to fixed-point
2141 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2142 int_arm_neon_vcvtfp2fxs>;
2143 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2144 int_arm_neon_vcvtfp2fxu>;
2146 // Scalar Arithmetic
2148 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
2149 : NeonI_Scalar3Same<u, 0b11, opcode,
2150 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2151 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2155 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
2156 string asmop, bit Commutable = 0>
2158 let isCommutable = Commutable in {
2159 def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
2160 (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
2161 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2164 def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
2165 (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
2166 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2169 def sss : NeonI_Scalar3Same<u, 0b10, opcode,
2170 (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
2171 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2174 def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
2175 (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
2176 !strconcat(asmop, " $Rd, $Rn, $Rm"),
2182 class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
2183 : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
2184 (SUBREG_TO_REG (i64 0),
2185 (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
2186 (EXTRACT_SUBREG VPR64:$Rm, sub_64)),
2190 // Scalar Integer Add
2191 let isCommutable = 1 in {
2192 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
2195 // Scalar Integer Sub
2196 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
2198 // Pattern for Scalar Integer Add and Sub with D register
2199 def : Neon_Scalar_D_size_patterns<add, ADDddd>;
2200 def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
2202 // Scalar Integer Saturating Add (Signed, Unsigned)
2203 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
2204 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
2206 // Scalar Integer Saturating Sub (Signed, Unsigned)
2207 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
2208 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
2210 // Patterns for Scalar Integer Saturating Add, Sub with D register only
2211 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
2212 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
2213 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
2214 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
2216 // Scalar Integer Shift Left (Signed, Unsigned)
2217 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
2218 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
2220 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
2221 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
2222 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
2224 // Scalar Integer Rouding Shift Left (Signed, Unsigned)
2225 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
2226 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
2228 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
2229 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
2230 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
2232 // Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
2233 // Rounding Shift Left, Rounding Saturating Shift Left with D register only
2234 def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
2235 def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
2236 def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
2237 def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
2238 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
2239 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
2240 def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
2241 def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
2242 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
2243 def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
2246 //===----------------------------------------------------------------------===//
2247 // Non-Instruction Patterns
2248 //===----------------------------------------------------------------------===//
2250 // 64-bit vector bitcasts...
2252 def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
2253 def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
2254 def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
2255 def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
2257 def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
2258 def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
2259 def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
2260 def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
2262 def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
2263 def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
2264 def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
2265 def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
2267 def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
2268 def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
2269 def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
2270 def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
2272 def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
2273 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
2274 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
2275 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
2277 // ..and 128-bit vector bitcasts...
2279 def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
2280 def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
2281 def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
2282 def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
2283 def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
2285 def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
2286 def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
2287 def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
2288 def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
2289 def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
2291 def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
2292 def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
2293 def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
2294 def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
2295 def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
2297 def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
2298 def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
2299 def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
2300 def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
2301 def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
2303 def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
2304 def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
2305 def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
2306 def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
2307 def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
2309 def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
2310 def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
2311 def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
2312 def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
2313 def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
2316 // ...and scalar bitcasts...
2318 def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
2319 def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
2320 def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
2322 def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))),
2323 (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>;
2324 def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))),
2325 (f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>;
2326 def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))),
2327 (f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>;
2328 def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))),
2329 (f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>;
2330 def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))),
2331 (f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>;
2332 def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))),
2333 (f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>;
2334 def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))),
2335 (f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>;
2336 def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))),
2337 (f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>;
2338 def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))),
2339 (f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>;
2340 def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))),
2341 (f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>;
2342 def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))),
2343 (f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>;
2345 def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
2346 (v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
2347 def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
2348 (v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
2349 def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
2350 (v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
2351 def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
2352 (v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
2353 def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))),
2354 (v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
2355 def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
2356 (v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
2358 def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
2359 (v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
2361 def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
2362 (v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
2364 def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
2365 (v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
2367 def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
2368 (v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
2370 def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
2371 (v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),