[AArch64]Can't select shift left 0 of type v1i64
[oota-llvm.git] / lib / Target / AArch64 / AArch64InstrNEON.td
1 //===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file describes the AArch64 NEON instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // NEON-specific DAG Nodes.
16 //===----------------------------------------------------------------------===//
17
18 // (outs Result), (ins Imm, OpCmode)
19 def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
20
21 def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
22
23 def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
24
25 // (outs Result), (ins Imm)
26 def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
27                         [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
28
29 // (outs Result), (ins LHS, RHS, CondCode)
30 def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
31                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
32
33 // (outs Result), (ins LHS, 0/0.0 constant, CondCode)
34 def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
35                  [SDTCisVec<0>,  SDTCisVec<1>]>>;
36
37 // (outs Result), (ins LHS, RHS)
38 def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
39                  [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>;
40
41 def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
42                                      SDTCisVT<2, i32>]>;
43 def Neon_sqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
44 def Neon_uqrshlImm   : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
45
46 def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
47                                SDTCisSameAs<0, 2>]>;
48 def Neon_uzp1    : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
49 def Neon_uzp2    : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
50 def Neon_zip1    : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
51 def Neon_zip2    : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
52 def Neon_trn1    : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
53 def Neon_trn2    : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
54
55 def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
56 def Neon_rev64    : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
57 def Neon_rev32    : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
58 def Neon_rev16    : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
59 def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
60                        [SDTCisVec<0>]>>;
61 def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
62                            [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
63 def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
64                            [SDTCisVec<0>,  SDTCisSameAs<0, 1>,
65                            SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
66
67 def SDT_assertext : SDTypeProfile<1, 1,
68   [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
69 def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
70 def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
71
72 //===----------------------------------------------------------------------===//
73 // Addressing-mode instantiations
74 //===----------------------------------------------------------------------===//
75
76 multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
77 defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
78                       !foreach(decls.pattern, Offset,
79                                !subst(OFFSET, dword_uimm12, decls.pattern)),
80                       !foreach(decls.pattern, address,
81                                !subst(OFFSET, dword_uimm12,
82                                !subst(ALIGN, min_align8, decls.pattern))),
83                       Ty>;
84 }
85
86 multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
87 defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
88                        !foreach(decls.pattern, Offset,
89                                 !subst(OFFSET, qword_uimm12, decls.pattern)),
90                        !foreach(decls.pattern, address,
91                                 !subst(OFFSET, qword_uimm12,
92                                 !subst(ALIGN, min_align16, decls.pattern))),
93                       Ty>;
94 }
95
96 multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
97   defm : ls_64_pats<address, Base, Offset, v8i8>;
98   defm : ls_64_pats<address, Base, Offset, v4i16>;
99   defm : ls_64_pats<address, Base, Offset, v2i32>;
100   defm : ls_64_pats<address, Base, Offset, v1i64>;
101   defm : ls_64_pats<address, Base, Offset, v2f32>;
102   defm : ls_64_pats<address, Base, Offset, v1f64>;
103
104   defm : ls_128_pats<address, Base, Offset, v16i8>;
105   defm : ls_128_pats<address, Base, Offset, v8i16>;
106   defm : ls_128_pats<address, Base, Offset, v4i32>;
107   defm : ls_128_pats<address, Base, Offset, v2i64>;
108   defm : ls_128_pats<address, Base, Offset, v4f32>;
109   defm : ls_128_pats<address, Base, Offset, v2f64>;
110 }
111
112 defm : uimm12_neon_pats<(A64WrapperSmall
113                           tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
114                         (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
115
116 //===----------------------------------------------------------------------===//
117 // Multiclasses
118 //===----------------------------------------------------------------------===//
119
120 multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
121                                 string asmop, SDPatternOperator opnode8B,
122                                 SDPatternOperator opnode16B,
123                                 bit Commutable = 0> {
124   let isCommutable = Commutable in {
125     def _8B :  NeonI_3VSame<0b0, u, size, opcode,
126                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
127                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
128                [(set (v8i8 VPR64:$Rd),
129                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
130                NoItinerary>;
131
132     def _16B : NeonI_3VSame<0b1, u, size, opcode,
133                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
134                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
135                [(set (v16i8 VPR128:$Rd),
136                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
137                NoItinerary>;
138   }
139
140 }
141
142 multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
143                                   string asmop, SDPatternOperator opnode,
144                                   bit Commutable = 0> {
145   let isCommutable = Commutable in {
146     def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
147               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
148               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
149               [(set (v4i16 VPR64:$Rd),
150                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
151               NoItinerary>;
152
153     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
154               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
155               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
156               [(set (v8i16 VPR128:$Rd),
157                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
158               NoItinerary>;
159
160     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
161               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
162               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
163               [(set (v2i32 VPR64:$Rd),
164                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
165               NoItinerary>;
166
167     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
168               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
169               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
170               [(set (v4i32 VPR128:$Rd),
171                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
172               NoItinerary>;
173   }
174 }
175 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
176                                   string asmop, SDPatternOperator opnode,
177                                   bit Commutable = 0>
178    : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> {
179   let isCommutable = Commutable in {
180     def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode,
181                (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
182                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
183                [(set (v8i8 VPR64:$Rd),
184                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
185                NoItinerary>;
186
187     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
188                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
189                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
190                [(set (v16i8 VPR128:$Rd),
191                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
192                NoItinerary>;
193   }
194 }
195
196 multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
197                                    string asmop, SDPatternOperator opnode,
198                                    bit Commutable = 0>
199    : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> {
200   let isCommutable = Commutable in {
201     def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
202               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
203               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
204               [(set (v2i64 VPR128:$Rd),
205                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
206               NoItinerary>;
207   }
208 }
209
210 // Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
211 // but Result types can be integer or floating point types.
212 multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
213                                  string asmop, SDPatternOperator opnode2S,
214                                  SDPatternOperator opnode4S,
215                                  SDPatternOperator opnode2D,
216                                  ValueType ResTy2S, ValueType ResTy4S,
217                                  ValueType ResTy2D, bit Commutable = 0> {
218   let isCommutable = Commutable in {
219     def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
220               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
221               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
222               [(set (ResTy2S VPR64:$Rd),
223                  (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
224               NoItinerary>;
225
226     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
227               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
228               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
229               [(set (ResTy4S VPR128:$Rd),
230                  (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
231               NoItinerary>;
232
233     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
234               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
235               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
236               [(set (ResTy2D VPR128:$Rd),
237                  (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
238                NoItinerary>;
239   }
240 }
241
242 //===----------------------------------------------------------------------===//
243 // Instruction Definitions
244 //===----------------------------------------------------------------------===//
245
246 // Vector Arithmetic Instructions
247
248 // Vector Add (Integer and Floating-Point)
249
250 defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
251 defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd,
252                                      v2f32, v4f32, v2f64, 1>;
253
254 // Vector Sub (Integer and Floating-Point)
255
256 defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
257 defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub,
258                                      v2f32, v4f32, v2f64, 0>;
259
260 // Vector Multiply (Integer and Floating-Point)
261
262 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
263 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul,
264                                      v2f32, v4f32, v2f64, 1>;
265
266 // Vector Multiply (Polynomial)
267
268 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
269                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
270
271 // Vector Multiply-accumulate and Multiply-subtract (Integer)
272
273 // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
274 // two operands constraints.
275 class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
276   RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
277   bits<5> opcode, SDPatternOperator opnode>
278   : NeonI_3VSame<q, u, size, opcode,
279     (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
280     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
281     [(set (OpTy VPRC:$Rd),
282        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
283     NoItinerary> {
284   let Constraints = "$src = $Rd";
285 }
286
287 def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
288                        (add node:$Ra, (mul node:$Rn, node:$Rm))>;
289
290 def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
291                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
292
293
294 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
295                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
296 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
297                                              0b1, 0b0, 0b00, 0b10010, Neon_mla>;
298 def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16,
299                                              0b0, 0b0, 0b01, 0b10010, Neon_mla>;
300 def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16,
301                                              0b1, 0b0, 0b01, 0b10010, Neon_mla>;
302 def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32,
303                                              0b0, 0b0, 0b10, 0b10010, Neon_mla>;
304 def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32,
305                                              0b1, 0b0, 0b10, 0b10010, Neon_mla>;
306
307 def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8,
308                                              0b0, 0b1, 0b00, 0b10010, Neon_mls>;
309 def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
310                                              0b1, 0b1, 0b00, 0b10010, Neon_mls>;
311 def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16,
312                                              0b0, 0b1, 0b01, 0b10010, Neon_mls>;
313 def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16,
314                                              0b1, 0b1, 0b01, 0b10010, Neon_mls>;
315 def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
316                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
317 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
318                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
319
320 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
321
322 def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
323                         (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
324
325 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
326                         (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
327
328 let Predicates = [HasNEON, UseFusedMAC] in {
329 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
330                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
331 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
332                                              0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
333 def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64,
334                                              0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
335
336 def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32,
337                                               0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
338 def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32,
339                                              0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
340 def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64,
341                                              0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
342 }
343
344 // We're also allowed to match the fma instruction regardless of compile
345 // options.
346 def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
347           (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
348 def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
349           (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
350 def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
351           (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
352
353 def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
354           (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
355 def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
356           (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
357 def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
358           (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
359
360 // Vector Divide (Floating-Point)
361
362 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv,
363                                      v2f32, v4f32, v2f64, 0>;
364
365 // Vector Bitwise Operations
366
367 // Vector Bitwise AND
368
369 defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
370
371 // Vector Bitwise Exclusive OR
372
373 defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
374
375 // Vector Bitwise OR
376
377 defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
378
379 // ORR disassembled as MOV if Vn==Vm
380
381 // Vector Move - register
382 // Alias for ORR if Vn=Vm.
383 // FIXME: This is actually the preferred syntax but TableGen can't deal with
384 // custom printing of aliases.
385 def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
386                     (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
387 def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
388                     (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
389
390 // The MOVI instruction takes two immediate operands.  The first is the
391 // immediate encoding, while the second is the cmode.  A cmode of 14, or
392 // 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
393 def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
394 def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
395
396 def Neon_not8B  : PatFrag<(ops node:$in),
397                           (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
398 def Neon_not16B : PatFrag<(ops node:$in),
399                           (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
400
401 def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
402                          (or node:$Rn, (Neon_not8B node:$Rm))>;
403
404 def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
405                           (or node:$Rn, (Neon_not16B node:$Rm))>;
406
407 def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
408                          (and node:$Rn, (Neon_not8B node:$Rm))>;
409
410 def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
411                           (and node:$Rn, (Neon_not16B node:$Rm))>;
412
413
414 // Vector Bitwise OR NOT - register
415
416 defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
417                                    Neon_orn8B, Neon_orn16B, 0>;
418
419 // Vector Bitwise Bit Clear (AND NOT) - register
420
421 defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
422                                    Neon_bic8B, Neon_bic16B, 0>;
423
424 multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
425                                    SDPatternOperator opnode16B,
426                                    Instruction INST8B,
427                                    Instruction INST16B> {
428   def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
429             (INST8B VPR64:$Rn, VPR64:$Rm)>;
430   def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
431             (INST8B VPR64:$Rn, VPR64:$Rm)>;
432   def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
433             (INST8B VPR64:$Rn, VPR64:$Rm)>;
434   def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
435             (INST16B VPR128:$Rn, VPR128:$Rm)>;
436   def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
437             (INST16B VPR128:$Rn, VPR128:$Rm)>;
438   def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
439             (INST16B VPR128:$Rn, VPR128:$Rm)>;
440 }
441
442 // Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
443 defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
444 defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>;
445 defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
446 defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
447 defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
448
449 //   Vector Bitwise Select
450 def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8,
451                                               0b0, 0b1, 0b01, 0b00011, vselect>;
452
453 def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
454                                               0b1, 0b1, 0b01, 0b00011, vselect>;
455
456 multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
457                                    Instruction INST8B,
458                                    Instruction INST16B> {
459   // Disassociate type from instruction definition
460   def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
461             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
462   def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
463             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
464   def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
465             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
466   def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
467             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
468   def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
469             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
470   def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
471             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
472   def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
473             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
474   def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
475             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
476
477   // Allow to match BSL instruction pattern with non-constant operand
478   def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
479                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
480           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
481   def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
482                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
483           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
484   def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
485                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
486           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
487   def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
488                      (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
489           (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
490   def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
491                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
492           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
493   def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
494                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
495           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
496   def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
497                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
498           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
499   def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
500                      (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
501           (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
502
503   // Allow to match llvm.arm.* intrinsics.
504   def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
505                     (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
506             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
507   def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
508                     (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
509             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
510   def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
511                     (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
512             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
513   def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
514                     (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
515             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
516   def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
517                     (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
518             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
519   def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
520                     (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
521             (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
522   def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
523                     (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
524             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
525   def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
526                     (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
527             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
528   def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
529                     (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
530             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
531   def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
532                     (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
533             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
534   def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
535                     (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
536             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
537   def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
538                     (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
539             (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
540 }
541
542 // Additional patterns for bitwise instruction BSL
543 defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
544
545 def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
546                            (vselect node:$src, node:$Rn, node:$Rm),
547                            [{ (void)N; return false; }]>;
548
549 // Vector Bitwise Insert if True
550
551 def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8,
552                    0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
553 def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
554                    0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
555
556 // Vector Bitwise Insert if False
557
558 def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8,
559                                 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
560 def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
561                                 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
562
563 // Vector Absolute Difference and Accumulate (Signed, Unsigned)
564
565 def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
566                        (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
567 def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
568                        (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
569
570 // Vector Absolute Difference and Accumulate (Unsigned)
571 def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8,
572                     0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
573 def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
574                     0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
575 def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16,
576                     0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
577 def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16,
578                     0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
579 def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32,
580                     0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
581 def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32,
582                     0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
583
584 // Vector Absolute Difference and Accumulate (Signed)
585 def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8,
586                     0b0, 0b0, 0b00, 0b01111, Neon_saba>;
587 def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
588                     0b1, 0b0, 0b00, 0b01111, Neon_saba>;
589 def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16,
590                     0b0, 0b0, 0b01, 0b01111, Neon_saba>;
591 def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16,
592                     0b1, 0b0, 0b01, 0b01111, Neon_saba>;
593 def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32,
594                     0b0, 0b0, 0b10, 0b01111, Neon_saba>;
595 def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32,
596                     0b1, 0b0, 0b10, 0b01111, Neon_saba>;
597
598
599 // Vector Absolute Difference (Signed, Unsigned)
600 defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
601 defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
602
603 // Vector Absolute Difference (Floating Point)
604 defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
605                                     int_arm_neon_vabds, int_arm_neon_vabds,
606                                     int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
607
608 // Vector Reciprocal Step (Floating Point)
609 defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
610                                        int_arm_neon_vrecps, int_arm_neon_vrecps,
611                                        int_arm_neon_vrecps,
612                                        v2f32, v4f32, v2f64, 0>;
613
614 // Vector Reciprocal Square Root Step (Floating Point)
615 defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
616                                         int_arm_neon_vrsqrts,
617                                         int_arm_neon_vrsqrts,
618                                         int_arm_neon_vrsqrts,
619                                         v2f32, v4f32, v2f64, 0>;
620
621 // Vector Comparisons
622
623 def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
624                         (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
625 def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
626                          (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
627 def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
628                         (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
629 def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
630                         (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
631 def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
632                         (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
633
634 // NeonI_compare_aliases class: swaps register operands to implement
635 // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
636 class NeonI_compare_aliases<string asmop, string asmlane,
637                             Instruction inst, RegisterOperand VPRC>
638   : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
639                     ", $Rm" # asmlane,
640                   (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
641
642 // Vector Comparisons (Integer)
643
644 // Vector Compare Mask Equal (Integer)
645 let isCommutable =1 in {
646 defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
647 }
648
649 // Vector Compare Mask Higher or Same (Unsigned Integer)
650 defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
651
652 // Vector Compare Mask Greater Than or Equal (Integer)
653 defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
654
655 // Vector Compare Mask Higher (Unsigned Integer)
656 defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
657
658 // Vector Compare Mask Greater Than (Integer)
659 defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
660
661 // Vector Compare Mask Bitwise Test (Integer)
662 defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
663
664 // Vector Compare Mask Less or Same (Unsigned Integer)
665 // CMLS is alias for CMHS with operands reversed.
666 def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>;
667 def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
668 def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>;
669 def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>;
670 def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>;
671 def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>;
672 def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>;
673
674 // Vector Compare Mask Less Than or Equal (Integer)
675 // CMLE is alias for CMGE with operands reversed.
676 def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>;
677 def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
678 def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>;
679 def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>;
680 def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>;
681 def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>;
682 def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>;
683
684 // Vector Compare Mask Lower (Unsigned Integer)
685 // CMLO is alias for CMHI with operands reversed.
686 def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>;
687 def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
688 def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>;
689 def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>;
690 def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>;
691 def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>;
692 def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>;
693
694 // Vector Compare Mask Less Than (Integer)
695 // CMLT is alias for CMGT with operands reversed.
696 def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>;
697 def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
698 def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>;
699 def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>;
700 def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>;
701 def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>;
702 def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>;
703
704
705 def neon_uimm0_asmoperand : AsmOperandClass
706 {
707   let Name = "UImm0";
708   let PredicateMethod = "isUImm<0>";
709   let RenderMethod = "addImmOperands";
710 }
711
712 def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
713   let ParserMatchClass = neon_uimm0_asmoperand;
714   let PrintMethod = "printNeonUImm0Operand";
715
716 }
717
718 multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
719 {
720   def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode,
721              (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
722              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
723              [(set (v8i8 VPR64:$Rd),
724                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
725              NoItinerary>;
726
727   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
728              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
729              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
730              [(set (v16i8 VPR128:$Rd),
731                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
732              NoItinerary>;
733
734   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
735             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
736             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
737             [(set (v4i16 VPR64:$Rd),
738                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
739             NoItinerary>;
740
741   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
742             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
743             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
744             [(set (v8i16 VPR128:$Rd),
745                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
746             NoItinerary>;
747
748   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
749             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
750             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
751             [(set (v2i32 VPR64:$Rd),
752                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
753             NoItinerary>;
754
755   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
756             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
757             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
758             [(set (v4i32 VPR128:$Rd),
759                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
760             NoItinerary>;
761
762   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
763             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
764             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
765             [(set (v2i64 VPR128:$Rd),
766                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
767             NoItinerary>;
768 }
769
770 // Vector Compare Mask Equal to Zero (Integer)
771 defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
772
773 // Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
774 defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
775
776 // Vector Compare Mask Greater Than Zero (Signed Integer)
777 defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
778
779 // Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
780 defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
781
782 // Vector Compare Mask Less Than Zero (Signed Integer)
783 defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
784
785 // Vector Comparisons (Floating Point)
786
787 // Vector Compare Mask Equal (Floating Point)
788 let isCommutable =1 in {
789 defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
790                                       Neon_cmeq, Neon_cmeq,
791                                       v2i32, v4i32, v2i64, 0>;
792 }
793
794 // Vector Compare Mask Greater Than Or Equal (Floating Point)
795 defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
796                                       Neon_cmge, Neon_cmge,
797                                       v2i32, v4i32, v2i64, 0>;
798
799 // Vector Compare Mask Greater Than (Floating Point)
800 defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
801                                       Neon_cmgt, Neon_cmgt,
802                                       v2i32, v4i32, v2i64, 0>;
803
804 // Vector Compare Mask Less Than Or Equal (Floating Point)
805 // FCMLE is alias for FCMGE with operands reversed.
806 def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>;
807 def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>;
808 def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>;
809
810 // Vector Compare Mask Less Than (Floating Point)
811 // FCMLT is alias for FCMGT with operands reversed.
812 def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>;
813 def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>;
814 def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>;
815
816
817 multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
818                               string asmop, CondCode CC>
819 {
820   def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
821             (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm),
822             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
823             [(set (v2i32 VPR64:$Rd),
824                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpz32:$FPImm), CC)))],
825             NoItinerary>;
826
827   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
828             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
829             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
830             [(set (v4i32 VPR128:$Rd),
831                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
832             NoItinerary>;
833
834   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
835             (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm),
836             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
837             [(set (v2i64 VPR128:$Rd),
838                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))],
839             NoItinerary>;
840 }
841
842 // Vector Compare Mask Equal to Zero (Floating Point)
843 defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
844
845 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
846 defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
847
848 // Vector Compare Mask Greater Than Zero (Floating Point)
849 defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
850
851 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
852 defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
853
854 // Vector Compare Mask Less Than Zero (Floating Point)
855 defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
856
857 // Vector Absolute Comparisons (Floating Point)
858
859 // Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
860 defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
861                                       int_arm_neon_vacged, int_arm_neon_vacgeq,
862                                       int_aarch64_neon_vacgeq,
863                                       v2i32, v4i32, v2i64, 0>;
864
865 // Vector Absolute Compare Mask Greater Than (Floating Point)
866 defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
867                                       int_arm_neon_vacgtd, int_arm_neon_vacgtq,
868                                       int_aarch64_neon_vacgtq,
869                                       v2i32, v4i32, v2i64, 0>;
870
871 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
872 // FACLE is alias for FACGE with operands reversed.
873 def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>;
874 def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>;
875 def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>;
876
877 // Vector Absolute Compare Mask Less Than (Floating Point)
878 // FACLT is alias for FACGT with operands reversed.
879 def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>;
880 def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>;
881 def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>;
882
883 // Vector halving add (Integer Signed, Unsigned)
884 defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
885                                         int_arm_neon_vhadds, 1>;
886 defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
887                                         int_arm_neon_vhaddu, 1>;
888
889 // Vector halving sub (Integer Signed, Unsigned)
890 defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
891                                         int_arm_neon_vhsubs, 0>;
892 defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
893                                         int_arm_neon_vhsubu, 0>;
894
895 // Vector rouding halving add (Integer Signed, Unsigned)
896 defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
897                                          int_arm_neon_vrhadds, 1>;
898 defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
899                                          int_arm_neon_vrhaddu, 1>;
900
901 // Vector Saturating add (Integer Signed, Unsigned)
902 defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
903                    int_arm_neon_vqadds, 1>;
904 defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
905                    int_arm_neon_vqaddu, 1>;
906
907 // Vector Saturating sub (Integer Signed, Unsigned)
908 defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
909                    int_arm_neon_vqsubs, 1>;
910 defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
911                    int_arm_neon_vqsubu, 1>;
912
913 // Vector Shift Left (Signed and Unsigned Integer)
914 defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
915                  int_arm_neon_vshifts, 1>;
916 defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
917                  int_arm_neon_vshiftu, 1>;
918
919 // Vector Saturating Shift Left (Signed and Unsigned Integer)
920 defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
921                   int_arm_neon_vqshifts, 1>;
922 defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
923                   int_arm_neon_vqshiftu, 1>;
924
925 // Vector Rouding Shift Left (Signed and Unsigned Integer)
926 defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
927                   int_arm_neon_vrshifts, 1>;
928 defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
929                   int_arm_neon_vrshiftu, 1>;
930
931 // Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
932 defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
933                    int_arm_neon_vqrshifts, 1>;
934 defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
935                    int_arm_neon_vqrshiftu, 1>;
936
937 // Vector Maximum (Signed and Unsigned Integer)
938 defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
939 defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
940
941 // Vector Minimum (Signed and Unsigned Integer)
942 defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
943 defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
944
945 // Vector Maximum (Floating Point)
946 defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
947                                      int_arm_neon_vmaxs, int_arm_neon_vmaxs,
948                                      int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>;
949
950 // Vector Minimum (Floating Point)
951 defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
952                                      int_arm_neon_vmins, int_arm_neon_vmins,
953                                      int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>;
954
955 // Vector maxNum (Floating Point) -  prefer a number over a quiet NaN)
956 defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
957                                        int_aarch64_neon_vmaxnm,
958                                        int_aarch64_neon_vmaxnm,
959                                        int_aarch64_neon_vmaxnm,
960                                        v2f32, v4f32, v2f64, 1>;
961
962 // Vector minNum (Floating Point) - prefer a number over a quiet NaN)
963 defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
964                                        int_aarch64_neon_vminnm,
965                                        int_aarch64_neon_vminnm,
966                                        int_aarch64_neon_vminnm,
967                                        v2f32, v4f32, v2f64, 1>;
968
969 // Vector Maximum Pairwise (Signed and Unsigned Integer)
970 defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
971 defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
972
973 // Vector Minimum Pairwise (Signed and Unsigned Integer)
974 defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
975 defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
976
977 // Vector Maximum Pairwise (Floating Point)
978 defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
979                                      int_arm_neon_vpmaxs, int_arm_neon_vpmaxs,
980                                      int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
981
982 // Vector Minimum Pairwise (Floating Point)
983 defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
984                                      int_arm_neon_vpmins, int_arm_neon_vpmins,
985                                      int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
986
987 // Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
988 defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
989                                        int_aarch64_neon_vpmaxnm,
990                                        int_aarch64_neon_vpmaxnm,
991                                        int_aarch64_neon_vpmaxnm,
992                                        v2f32, v4f32, v2f64, 1>;
993
994 // Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN)
995 defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
996                                        int_aarch64_neon_vpminnm,
997                                        int_aarch64_neon_vpminnm,
998                                        int_aarch64_neon_vpminnm,
999                                        v2f32, v4f32, v2f64, 1>;
1000
1001 // Vector Addition Pairwise (Integer)
1002 defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
1003
1004 // Vector Addition Pairwise (Floating Point)
1005 defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
1006                                        int_arm_neon_vpadd,
1007                                        int_arm_neon_vpadd,
1008                                        int_arm_neon_vpadd,
1009                                        v2f32, v4f32, v2f64, 1>;
1010
1011 // Vector Saturating Doubling Multiply High
1012 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
1013                     int_arm_neon_vqdmulh, 1>;
1014
1015 // Vector Saturating Rouding Doubling Multiply High
1016 defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
1017                      int_arm_neon_vqrdmulh, 1>;
1018
1019 // Vector Multiply Extended (Floating Point)
1020 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
1021                                       int_aarch64_neon_vmulx,
1022                                       int_aarch64_neon_vmulx,
1023                                       int_aarch64_neon_vmulx,
1024                                       v2f32, v4f32, v2f64, 1>;
1025
1026 // Patterns to match llvm.aarch64.* intrinsic for 
1027 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
1028 class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
1029   : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
1030         (EXTRACT_SUBREG
1031              (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
1032              sub_32)>;
1033
1034 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
1035 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
1036 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
1037 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
1038 def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
1039
1040 // Vector Immediate Instructions
1041
1042 multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
1043 {
1044   def _asmoperand : AsmOperandClass
1045     {
1046       let Name = "NeonMovImmShift" # PREFIX;
1047       let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
1048       let PredicateMethod = "isNeonMovImmShift" # PREFIX;
1049     }
1050 }
1051
1052 // Definition of vector immediates shift operands
1053
1054 // The selectable use-cases extract the shift operation
1055 // information from the OpCmode fields encoded in the immediate.
1056 def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
1057   uint64_t OpCmode = N->getZExtValue();
1058   unsigned ShiftImm;
1059   unsigned ShiftOnesIn;
1060   unsigned HasShift =
1061     A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1062   if (!HasShift) return SDValue();
1063   return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
1064 }]>;
1065
1066 // Vector immediates shift operands which accept LSL and MSL
1067 // shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
1068 // or 0, 8 (LSLH) or 8, 16 (MSL).
1069 defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
1070 defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
1071 // LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24
1072 defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
1073
1074 multiclass neon_mov_imm_shift_operands<string PREFIX,
1075                                        string HALF, string ISHALF, code pred>
1076 {
1077    def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
1078     {
1079       let PrintMethod =
1080         "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1081       let DecoderMethod =
1082         "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
1083       let ParserMatchClass =
1084         !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
1085     }
1086 }
1087
1088 defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{
1089   unsigned ShiftImm;
1090   unsigned ShiftOnesIn;
1091   unsigned HasShift =
1092     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1093   return (HasShift && !ShiftOnesIn);
1094 }]>;
1095
1096 defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{
1097   unsigned ShiftImm;
1098   unsigned ShiftOnesIn;
1099   unsigned HasShift =
1100     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1101   return (HasShift && ShiftOnesIn);
1102 }]>;
1103
1104 defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
1105   unsigned ShiftImm;
1106   unsigned ShiftOnesIn;
1107   unsigned HasShift =
1108     A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1109   return (HasShift && !ShiftOnesIn);
1110 }]>;
1111
1112 def neon_uimm1_asmoperand : AsmOperandClass
1113 {
1114   let Name = "UImm1";
1115   let PredicateMethod = "isUImm<1>";
1116   let RenderMethod = "addImmOperands";
1117 }
1118
1119 def neon_uimm2_asmoperand : AsmOperandClass
1120 {
1121   let Name = "UImm2";
1122   let PredicateMethod = "isUImm<2>";
1123   let RenderMethod = "addImmOperands";
1124 }
1125
1126 def neon_uimm8_asmoperand : AsmOperandClass
1127 {
1128   let Name = "UImm8";
1129   let PredicateMethod = "isUImm<8>";
1130   let RenderMethod = "addImmOperands";
1131 }
1132
1133 def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1134   let ParserMatchClass = neon_uimm8_asmoperand;
1135   let PrintMethod = "printUImmHexOperand";
1136 }
1137
1138 def neon_uimm64_mask_asmoperand : AsmOperandClass
1139 {
1140   let Name = "NeonUImm64Mask";
1141   let PredicateMethod = "isNeonUImm64Mask";
1142   let RenderMethod = "addNeonUImm64MaskOperands";
1143 }
1144
1145 // MCOperand for 64-bit bytemask with each byte having only the
1146 // value 0x00 and 0xff is encoded as an unsigned 8-bit value
1147 def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
1148   let ParserMatchClass = neon_uimm64_mask_asmoperand;
1149   let PrintMethod = "printNeonUImm64MaskOperand";
1150 }
1151
1152 multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
1153                                    SDPatternOperator opnode>
1154 {
1155     // shift zeros, per word
1156     def _2S  : NeonI_1VModImm<0b0, op,
1157                               (outs VPR64:$Rd),
1158                               (ins neon_uimm8:$Imm,
1159                                 neon_mov_imm_LSL_operand:$Simm),
1160                               !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1161                               [(set (v2i32 VPR64:$Rd),
1162                                  (v2i32 (opnode (timm:$Imm),
1163                                    (neon_mov_imm_LSL_operand:$Simm))))],
1164                               NoItinerary> {
1165        bits<2> Simm;
1166        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1167      }
1168
1169     def _4S  : NeonI_1VModImm<0b1, op,
1170                               (outs VPR128:$Rd),
1171                               (ins neon_uimm8:$Imm,
1172                                 neon_mov_imm_LSL_operand:$Simm),
1173                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1174                               [(set (v4i32 VPR128:$Rd),
1175                                  (v4i32 (opnode (timm:$Imm),
1176                                    (neon_mov_imm_LSL_operand:$Simm))))],
1177                               NoItinerary> {
1178       bits<2> Simm;
1179       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
1180     }
1181
1182     // shift zeros, per halfword
1183     def _4H  : NeonI_1VModImm<0b0, op,
1184                               (outs VPR64:$Rd),
1185                               (ins neon_uimm8:$Imm,
1186                                 neon_mov_imm_LSLH_operand:$Simm),
1187                               !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1188                               [(set (v4i16 VPR64:$Rd),
1189                                  (v4i16 (opnode (timm:$Imm),
1190                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1191                               NoItinerary> {
1192       bit  Simm;
1193       let cmode = {0b1, 0b0, Simm, 0b0};
1194     }
1195
1196     def _8H  : NeonI_1VModImm<0b1, op,
1197                               (outs VPR128:$Rd),
1198                               (ins neon_uimm8:$Imm,
1199                                 neon_mov_imm_LSLH_operand:$Simm),
1200                               !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1201                               [(set (v8i16 VPR128:$Rd),
1202                                  (v8i16 (opnode (timm:$Imm),
1203                                    (neon_mov_imm_LSLH_operand:$Simm))))],
1204                               NoItinerary> {
1205       bit Simm;
1206       let cmode = {0b1, 0b0, Simm, 0b0};
1207      }
1208 }
1209
1210 multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
1211                                                    SDPatternOperator opnode,
1212                                                    SDPatternOperator neonopnode>
1213 {
1214   let Constraints = "$src = $Rd" in {
1215     // shift zeros, per word
1216     def _2S  : NeonI_1VModImm<0b0, op,
1217                  (outs VPR64:$Rd),
1218                  (ins VPR64:$src, neon_uimm8:$Imm,
1219                    neon_mov_imm_LSL_operand:$Simm),
1220                  !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1221                  [(set (v2i32 VPR64:$Rd),
1222                     (v2i32 (opnode (v2i32 VPR64:$src),
1223                       (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
1224                         neon_mov_imm_LSL_operand:$Simm)))))))],
1225                  NoItinerary> {
1226       bits<2> Simm;
1227       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1228     }
1229
1230     def _4S  : NeonI_1VModImm<0b1, op,
1231                  (outs VPR128:$Rd),
1232                  (ins VPR128:$src, neon_uimm8:$Imm,
1233                    neon_mov_imm_LSL_operand:$Simm),
1234                  !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1235                  [(set (v4i32 VPR128:$Rd),
1236                     (v4i32 (opnode (v4i32 VPR128:$src),
1237                       (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
1238                         neon_mov_imm_LSL_operand:$Simm)))))))],
1239                  NoItinerary> {
1240       bits<2> Simm;
1241       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
1242     }
1243
1244     // shift zeros, per halfword
1245     def _4H  : NeonI_1VModImm<0b0, op,
1246                  (outs VPR64:$Rd),
1247                  (ins VPR64:$src, neon_uimm8:$Imm,
1248                    neon_mov_imm_LSLH_operand:$Simm),
1249                  !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
1250                  [(set (v4i16 VPR64:$Rd),
1251                     (v4i16 (opnode (v4i16 VPR64:$src),
1252                        (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
1253                           neon_mov_imm_LSL_operand:$Simm)))))))],
1254                  NoItinerary> {
1255       bit  Simm;
1256       let cmode = {0b1, 0b0, Simm, 0b1};
1257     }
1258
1259     def _8H  : NeonI_1VModImm<0b1, op,
1260                  (outs VPR128:$Rd),
1261                  (ins VPR128:$src, neon_uimm8:$Imm,
1262                    neon_mov_imm_LSLH_operand:$Simm),
1263                  !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
1264                  [(set (v8i16 VPR128:$Rd),
1265                     (v8i16 (opnode (v8i16 VPR128:$src),
1266                       (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
1267                         neon_mov_imm_LSL_operand:$Simm)))))))],
1268                  NoItinerary> {
1269       bit Simm;
1270       let cmode = {0b1, 0b0, Simm, 0b1};
1271     }
1272   }
1273 }
1274
1275 multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
1276                                    SDPatternOperator opnode>
1277 {
1278     // shift ones, per word
1279     def _2S  : NeonI_1VModImm<0b0, op,
1280                              (outs VPR64:$Rd),
1281                              (ins neon_uimm8:$Imm,
1282                                neon_mov_imm_MSL_operand:$Simm),
1283                              !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
1284                               [(set (v2i32 VPR64:$Rd),
1285                                  (v2i32 (opnode (timm:$Imm),
1286                                    (neon_mov_imm_MSL_operand:$Simm))))],
1287                              NoItinerary> {
1288        bit Simm;
1289        let cmode = {0b1, 0b1, 0b0, Simm};
1290      }
1291
1292    def _4S  : NeonI_1VModImm<0b1, op,
1293                               (outs VPR128:$Rd),
1294                               (ins neon_uimm8:$Imm,
1295                                 neon_mov_imm_MSL_operand:$Simm),
1296                               !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
1297                               [(set (v4i32 VPR128:$Rd),
1298                                  (v4i32 (opnode (timm:$Imm),
1299                                    (neon_mov_imm_MSL_operand:$Simm))))],
1300                               NoItinerary> {
1301      bit Simm;
1302      let cmode = {0b1, 0b1, 0b0, Simm};
1303    }
1304 }
1305
1306 // Vector Move Immediate Shifted
1307 let isReMaterializable = 1 in {
1308 defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
1309 }
1310
1311 // Vector Move Inverted Immediate Shifted
1312 let isReMaterializable = 1 in {
1313 defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
1314 }
1315
1316 // Vector Bitwise Bit Clear (AND NOT) - immediate
1317 let isReMaterializable = 1 in {
1318 defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
1319                                                          and, Neon_mvni>;
1320 }
1321
1322 // Vector Bitwise OR - immedidate
1323
1324 let isReMaterializable = 1 in {
1325 defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
1326                                                            or, Neon_movi>;
1327 }
1328
1329 // Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
1330 // LowerBUILD_VECTOR favors lowering MOVI over MVNI.
1331 // BIC immediate instructions selection requires additional patterns to
1332 // transform Neon_movi operands into BIC immediate operands
1333
1334 def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
1335   uint64_t OpCmode = N->getZExtValue();
1336   unsigned ShiftImm;
1337   unsigned ShiftOnesIn;
1338   (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
1339   // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1
1340   // Transform encoded shift amount 0 to 1 and 1 to 0.
1341   return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
1342 }]>;
1343
1344 def neon_mov_imm_LSLH_transform_operand
1345   : ImmLeaf<i32, [{
1346     unsigned ShiftImm;
1347     unsigned ShiftOnesIn;
1348     unsigned HasShift =
1349       A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
1350     return (HasShift && !ShiftOnesIn); }],
1351   neon_mov_imm_LSLH_transform_XFORM>;
1352
1353 // Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8)
1354 // Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00)
1355 def : Pat<(v4i16 (and VPR64:$src,
1356             (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1357           (BICvi_lsl_4H VPR64:$src, 0,
1358             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1359
1360 // Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8)
1361 // Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00)
1362 def : Pat<(v8i16 (and VPR128:$src,
1363             (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))),
1364           (BICvi_lsl_8H VPR128:$src, 0,
1365             neon_mov_imm_LSLH_transform_operand:$Simm)>;
1366
1367
1368 multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
1369                                    SDPatternOperator neonopnode,
1370                                    Instruction INST4H,
1371                                    Instruction INST8H> {
1372   def : Pat<(v8i8 (opnode VPR64:$src,
1373                     (bitconvert(v4i16 (neonopnode timm:$Imm,
1374                       neon_mov_imm_LSLH_operand:$Simm))))),
1375             (INST4H VPR64:$src, neon_uimm8:$Imm,
1376               neon_mov_imm_LSLH_operand:$Simm)>;
1377   def : Pat<(v1i64 (opnode VPR64:$src,
1378                   (bitconvert(v4i16 (neonopnode timm:$Imm,
1379                     neon_mov_imm_LSLH_operand:$Simm))))),
1380           (INST4H VPR64:$src, neon_uimm8:$Imm,
1381             neon_mov_imm_LSLH_operand:$Simm)>;
1382
1383   def : Pat<(v16i8 (opnode VPR128:$src,
1384                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1385                      neon_mov_imm_LSLH_operand:$Simm))))),
1386           (INST8H VPR128:$src, neon_uimm8:$Imm,
1387             neon_mov_imm_LSLH_operand:$Simm)>;
1388   def : Pat<(v4i32 (opnode VPR128:$src,
1389                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1390                      neon_mov_imm_LSLH_operand:$Simm))))),
1391           (INST8H VPR128:$src, neon_uimm8:$Imm,
1392             neon_mov_imm_LSLH_operand:$Simm)>;
1393   def : Pat<(v2i64 (opnode VPR128:$src,
1394                    (bitconvert(v8i16 (neonopnode timm:$Imm,
1395                      neon_mov_imm_LSLH_operand:$Simm))))),
1396           (INST8H VPR128:$src, neon_uimm8:$Imm,
1397             neon_mov_imm_LSLH_operand:$Simm)>;
1398 }
1399
1400 // Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
1401 defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>;
1402
1403 // Additional patterns for Vector Bitwise OR - immedidate
1404 defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>;
1405
1406
1407 // Vector Move Immediate Masked
1408 let isReMaterializable = 1 in {
1409 defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
1410 }
1411
1412 // Vector Move Inverted Immediate Masked
1413 let isReMaterializable = 1 in {
1414 defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
1415 }
1416
1417 class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
1418                                 Instruction inst, RegisterOperand VPRC>
1419   : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
1420                         (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>;
1421
1422 // Aliases for Vector Move Immediate Shifted
1423 def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
1424 def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
1425 def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
1426 def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
1427
1428 // Aliases for Vector Move Inverted Immediate Shifted
1429 def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
1430 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
1431 def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
1432 def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
1433
1434 // Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
1435 def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
1436 def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
1437 def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
1438 def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
1439
1440 // Aliases for Vector Bitwise OR - immedidate
1441 def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
1442 def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
1443 def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
1444 def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
1445
1446 //  Vector Move Immediate - per byte
1447 let isReMaterializable = 1 in {
1448 def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
1449                                (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
1450                                "movi\t$Rd.8b, $Imm",
1451                                [(set (v8i8 VPR64:$Rd),
1452                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1453                                 NoItinerary> {
1454   let cmode = 0b1110;
1455 }
1456
1457 def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
1458                                 (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
1459                                 "movi\t$Rd.16b, $Imm",
1460                                 [(set (v16i8 VPR128:$Rd),
1461                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
1462                                  NoItinerary> {
1463   let cmode = 0b1110;
1464 }
1465 }
1466
1467 // Vector Move Immediate - bytemask, per double word
1468 let isReMaterializable = 1 in {
1469 def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
1470                                (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
1471                                "movi\t $Rd.2d, $Imm",
1472                                [(set (v2i64 VPR128:$Rd),
1473                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1474                                NoItinerary> {
1475   let cmode = 0b1110;
1476 }
1477 }
1478
1479 // Vector Move Immediate - bytemask, one doubleword
1480
1481 let isReMaterializable = 1 in {
1482 def MOVIdi : NeonI_1VModImm<0b0, 0b1,
1483                            (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
1484                            "movi\t $Rd, $Imm",
1485                            [(set (v1i64 FPR64:$Rd),
1486                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
1487                            NoItinerary> {
1488   let cmode = 0b1110;
1489 }
1490 }
1491
1492 // Vector Floating Point Move Immediate
1493
1494 class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
1495                       Operand immOpType, bit q, bit op>
1496   : NeonI_1VModImm<q, op,
1497                    (outs VPRC:$Rd), (ins immOpType:$Imm),
1498                    "fmov\t$Rd" # asmlane # ", $Imm",
1499                    [(set (OpTy VPRC:$Rd),
1500                       (OpTy (Neon_fmovi (timm:$Imm))))],
1501                    NoItinerary> {
1502      let cmode = 0b1111;
1503    }
1504
1505 let isReMaterializable = 1 in {
1506 def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>;
1507 def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
1508 def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
1509 }
1510
1511 // Vector Shift (Immediate)
1512 // Immediate in [0, 63]
1513 def imm0_63 : Operand<i32> {
1514   let ParserMatchClass = uimm6_asmoperand;
1515 }
1516
1517 // Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
1518 // as follows:
1519 //
1520 //    Offset    Encoding
1521 //     8        immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
1522 //     16       immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
1523 //     32       immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
1524 //     64       immh:immb<6>   = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
1525 //
1526 // The shift right immediate amount, in the range 1 to element bits, is computed
1527 // as Offset - UInt(immh:immb).  The shift left immediate amount, in the range 0
1528 // to element bits - 1, is computed as UInt(immh:immb) - Offset.
1529
1530 class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
1531   let Name = "ShrImm" # OFFSET;
1532   let RenderMethod = "addImmOperands";
1533   let DiagnosticType = "ShrImm" # OFFSET;
1534 }
1535
1536 class shr_imm<string OFFSET> : Operand<i32> {
1537   let EncoderMethod = "getShiftRightImm" # OFFSET;
1538   let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
1539   let ParserMatchClass =
1540     !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
1541 }
1542
1543 def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
1544 def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
1545 def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
1546 def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
1547
1548 def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
1549 def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
1550 def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
1551 def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
1552
1553 class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
1554   let Name = "ShlImm" # OFFSET;
1555   let RenderMethod = "addImmOperands";
1556   let DiagnosticType = "ShlImm" # OFFSET;
1557 }
1558
1559 class shl_imm<string OFFSET> : Operand<i32> {
1560   let EncoderMethod = "getShiftLeftImm" # OFFSET;
1561   let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
1562   let ParserMatchClass =
1563     !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
1564 }
1565
1566 def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
1567 def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
1568 def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
1569 def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
1570
1571 def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
1572 def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
1573 def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
1574 def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
1575
1576 class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
1577                RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
1578   : NeonI_2VShiftImm<q, u, opcode,
1579                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1580                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1581                      [(set (Ty VPRC:$Rd),
1582                         (Ty (OpNode (Ty VPRC:$Rn),
1583                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
1584                      NoItinerary>;
1585
1586 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
1587   // 64-bit vector types.
1588   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
1589     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1590   }
1591
1592   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
1593     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1594   }
1595
1596   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
1597     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1598   }
1599
1600   // 128-bit vector types.
1601   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
1602     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1603   }
1604
1605   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
1606     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1607   }
1608
1609   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
1610     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1611   }
1612
1613   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
1614     let Inst{22} = 0b1;        // immh:immb = 1xxxxxx
1615   }
1616 }
1617
1618 multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1619   def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1620                      OpNode> {
1621     let Inst{22-19} = 0b0001;
1622   }
1623
1624   def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1625                      OpNode> {
1626     let Inst{22-20} = 0b001;
1627   }
1628
1629   def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1630                      OpNode> {
1631      let Inst{22-21} = 0b01;
1632   }
1633
1634   def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1635                       OpNode> {
1636                       let Inst{22-19} = 0b0001;
1637                     }
1638
1639   def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1640                      OpNode> {
1641                      let Inst{22-20} = 0b001;
1642                     }
1643
1644   def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1645                      OpNode> {
1646                       let Inst{22-21} = 0b01;
1647                     }
1648
1649   def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1650                      OpNode> {
1651                       let Inst{22} = 0b1;
1652                     }
1653 }
1654
1655 // Shift left
1656 defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
1657
1658 // Shift right
1659 defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
1660 defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
1661
1662 def Neon_High16B : PatFrag<(ops node:$in),
1663                            (extract_subvector (v16i8 node:$in), (iPTR 8))>;
1664 def Neon_High8H  : PatFrag<(ops node:$in),
1665                            (extract_subvector (v8i16 node:$in), (iPTR 4))>;
1666 def Neon_High4S  : PatFrag<(ops node:$in),
1667                            (extract_subvector (v4i32 node:$in), (iPTR 2))>;
1668 def Neon_High2D  : PatFrag<(ops node:$in),
1669                            (extract_subvector (v2i64 node:$in), (iPTR 1))>;
1670 def Neon_High4float : PatFrag<(ops node:$in),
1671                                (extract_subvector (v4f32 node:$in), (iPTR 2))>;
1672 def Neon_High2double : PatFrag<(ops node:$in),
1673                                (extract_subvector (v2f64 node:$in), (iPTR 1))>;
1674
1675 def Neon_Low16B : PatFrag<(ops node:$in),
1676                           (v8i8 (extract_subvector (v16i8 node:$in),
1677                                                    (iPTR 0)))>;
1678 def Neon_Low8H : PatFrag<(ops node:$in),
1679                          (v4i16 (extract_subvector (v8i16 node:$in),
1680                                                    (iPTR 0)))>;
1681 def Neon_Low4S : PatFrag<(ops node:$in),
1682                          (v2i32 (extract_subvector (v4i32 node:$in),
1683                                                    (iPTR 0)))>;
1684 def Neon_Low2D : PatFrag<(ops node:$in),
1685                          (v1i64 (extract_subvector (v2i64 node:$in),
1686                                                    (iPTR 0)))>;
1687 def Neon_Low4float : PatFrag<(ops node:$in),
1688                              (v2f32 (extract_subvector (v4f32 node:$in),
1689                                                        (iPTR 0)))>;
1690 def Neon_Low2double : PatFrag<(ops node:$in),
1691                               (v1f64 (extract_subvector (v2f64 node:$in),
1692                                                         (iPTR 0)))>;
1693
1694 class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1695                    string SrcT, ValueType DestTy, ValueType SrcTy,
1696                    Operand ImmTy, SDPatternOperator ExtOp>
1697   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1698                      (ins VPR64:$Rn, ImmTy:$Imm),
1699                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1700                      [(set (DestTy VPR128:$Rd),
1701                         (DestTy (shl
1702                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
1703                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1704                      NoItinerary>;
1705
1706 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
1707                        string SrcT, ValueType DestTy, ValueType SrcTy,
1708                        int StartIndex, Operand ImmTy,
1709                        SDPatternOperator ExtOp, PatFrag getTop>
1710   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
1711                      (ins VPR128:$Rn, ImmTy:$Imm),
1712                      asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
1713                      [(set (DestTy VPR128:$Rd),
1714                         (DestTy (shl
1715                           (DestTy (ExtOp
1716                             (SrcTy (getTop VPR128:$Rn)))),
1717                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
1718                      NoItinerary>;
1719
1720 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
1721                          SDNode ExtOp> {
1722   // 64-bit vector types.
1723   def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
1724                          shl_imm8, ExtOp> {
1725     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1726   }
1727
1728   def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
1729                          shl_imm16, ExtOp> {
1730     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1731   }
1732
1733   def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
1734                          shl_imm32, ExtOp> {
1735     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1736   }
1737
1738   // 128-bit vector types
1739   def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
1740                               8, shl_imm8, ExtOp, Neon_High16B> {
1741     let Inst{22-19} = 0b0001;  // immh:immb = 0001xxx
1742   }
1743
1744   def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
1745                              4, shl_imm16, ExtOp, Neon_High8H> {
1746     let Inst{22-20} = 0b001;   // immh:immb = 001xxxx
1747   }
1748
1749   def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
1750                              2, shl_imm32, ExtOp, Neon_High4S> {
1751     let Inst{22-21} = 0b01;    // immh:immb = 01xxxxx
1752   }
1753
1754   // Use other patterns to match when the immediate is 0.
1755   def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
1756             (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
1757
1758   def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
1759             (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
1760
1761   def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
1762             (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
1763
1764   def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
1765             (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
1766
1767   def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
1768             (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
1769
1770   def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
1771             (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
1772 }
1773
1774 // Shift left long
1775 defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
1776 defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
1777
1778 // Rounding/Saturating shift
1779 class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
1780                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1781                   SDPatternOperator OpNode>
1782   : NeonI_2VShiftImm<q, u, opcode,
1783                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
1784                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1785                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
1786                         (i32 ImmTy:$Imm))))],
1787                      NoItinerary>;
1788
1789 // shift right (vector by immediate)
1790 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
1791                            SDPatternOperator OpNode> {
1792   def _8B  : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1793                          OpNode> {
1794     let Inst{22-19} = 0b0001;
1795   }
1796
1797   def _4H  : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1798                          OpNode> {
1799     let Inst{22-20} = 0b001;
1800   }
1801
1802   def _2S  : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1803                          OpNode> {
1804     let Inst{22-21} = 0b01;
1805   }
1806
1807   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1808                          OpNode> {
1809     let Inst{22-19} = 0b0001;
1810   }
1811
1812   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1813                         OpNode> {
1814     let Inst{22-20} = 0b001;
1815   }
1816
1817   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1818                         OpNode> {
1819     let Inst{22-21} = 0b01;
1820   }
1821
1822   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1823                         OpNode> {
1824     let Inst{22} = 0b1;
1825   }
1826 }
1827
1828 multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
1829                           SDPatternOperator OpNode> {
1830   // 64-bit vector types.
1831   def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
1832                         OpNode> {
1833     let Inst{22-19} = 0b0001;
1834   }
1835
1836   def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
1837                         OpNode> {
1838     let Inst{22-20} = 0b001;
1839   }
1840
1841   def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
1842                         OpNode> {
1843     let Inst{22-21} = 0b01;
1844   }
1845
1846   // 128-bit vector types.
1847   def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
1848                          OpNode> {
1849     let Inst{22-19} = 0b0001;
1850   }
1851
1852   def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
1853                         OpNode> {
1854     let Inst{22-20} = 0b001;
1855   }
1856
1857   def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
1858                         OpNode> {
1859     let Inst{22-21} = 0b01;
1860   }
1861
1862   def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
1863                         OpNode> {
1864     let Inst{22} = 0b1;
1865   }
1866 }
1867
1868 // Rounding shift right
1869 defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
1870                                 int_aarch64_neon_vsrshr>;
1871 defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
1872                                 int_aarch64_neon_vurshr>;
1873
1874 // Saturating shift left unsigned
1875 defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
1876
1877 // Saturating shift left
1878 defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
1879 defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
1880
1881 class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
1882                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1883                   SDNode OpNode>
1884   : NeonI_2VShiftImm<q, u, opcode,
1885            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1886            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1887            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1888               (Ty (OpNode (Ty VPRC:$Rn),
1889                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
1890            NoItinerary> {
1891   let Constraints = "$src = $Rd";
1892 }
1893
1894 // Shift Right accumulate
1895 multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
1896   def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1897                         OpNode> {
1898     let Inst{22-19} = 0b0001;
1899   }
1900
1901   def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1902                         OpNode> {
1903     let Inst{22-20} = 0b001;
1904   }
1905
1906   def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1907                         OpNode> {
1908     let Inst{22-21} = 0b01;
1909   }
1910
1911   def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1912                          OpNode> {
1913     let Inst{22-19} = 0b0001;
1914   }
1915
1916   def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1917                         OpNode> {
1918     let Inst{22-20} = 0b001;
1919   }
1920
1921   def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1922                         OpNode> {
1923     let Inst{22-21} = 0b01;
1924   }
1925
1926   def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1927                         OpNode> {
1928     let Inst{22} = 0b1;
1929   }
1930 }
1931
1932 // Shift right and accumulate
1933 defm SSRAvvi    : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
1934 defm USRAvvi    : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
1935
1936 // Rounding shift accumulate
1937 class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
1938                     RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1939                     SDPatternOperator OpNode>
1940   : NeonI_2VShiftImm<q, u, opcode,
1941                      (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1942                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1943                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
1944                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
1945                      NoItinerary> {
1946   let Constraints = "$src = $Rd";
1947 }
1948
1949 multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
1950                              SDPatternOperator OpNode> {
1951   def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
1952                           OpNode> {
1953     let Inst{22-19} = 0b0001;
1954   }
1955
1956   def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
1957                           OpNode> {
1958     let Inst{22-20} = 0b001;
1959   }
1960
1961   def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
1962                           OpNode> {
1963     let Inst{22-21} = 0b01;
1964   }
1965
1966   def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
1967                            OpNode> {
1968     let Inst{22-19} = 0b0001;
1969   }
1970
1971   def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
1972                           OpNode> {
1973     let Inst{22-20} = 0b001;
1974   }
1975
1976   def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
1977                           OpNode> {
1978     let Inst{22-21} = 0b01;
1979   }
1980
1981   def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
1982                           OpNode> {
1983     let Inst{22} = 0b1;
1984   }
1985 }
1986
1987 // Rounding shift right and accumulate
1988 defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
1989 defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
1990
1991 // Shift insert by immediate
1992 class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
1993                   RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
1994                   SDPatternOperator OpNode>
1995     : NeonI_2VShiftImm<q, u, opcode,
1996            (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
1997            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
1998            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
1999              (i32 ImmTy:$Imm))))],
2000            NoItinerary> {
2001   let Constraints = "$src = $Rd";
2002 }
2003
2004 // shift left insert (vector by immediate)
2005 multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
2006   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
2007                         int_aarch64_neon_vsli> {
2008     let Inst{22-19} = 0b0001;
2009   }
2010
2011   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
2012                         int_aarch64_neon_vsli> {
2013     let Inst{22-20} = 0b001;
2014   }
2015
2016   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
2017                         int_aarch64_neon_vsli> {
2018     let Inst{22-21} = 0b01;
2019   }
2020
2021     // 128-bit vector types
2022   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
2023                          int_aarch64_neon_vsli> {
2024     let Inst{22-19} = 0b0001;
2025   }
2026
2027   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
2028                         int_aarch64_neon_vsli> {
2029     let Inst{22-20} = 0b001;
2030   }
2031
2032   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
2033                         int_aarch64_neon_vsli> {
2034     let Inst{22-21} = 0b01;
2035   }
2036
2037   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
2038                         int_aarch64_neon_vsli> {
2039     let Inst{22} = 0b1;
2040   }
2041 }
2042
2043 // shift right insert (vector by immediate)
2044 multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
2045     // 64-bit vector types.
2046   def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
2047                         int_aarch64_neon_vsri> {
2048     let Inst{22-19} = 0b0001;
2049   }
2050
2051   def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
2052                         int_aarch64_neon_vsri> {
2053     let Inst{22-20} = 0b001;
2054   }
2055
2056   def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
2057                         int_aarch64_neon_vsri> {
2058     let Inst{22-21} = 0b01;
2059   }
2060
2061     // 128-bit vector types
2062   def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
2063                          int_aarch64_neon_vsri> {
2064     let Inst{22-19} = 0b0001;
2065   }
2066
2067   def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
2068                         int_aarch64_neon_vsri> {
2069     let Inst{22-20} = 0b001;
2070   }
2071
2072   def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
2073                         int_aarch64_neon_vsri> {
2074     let Inst{22-21} = 0b01;
2075   }
2076
2077   def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
2078                         int_aarch64_neon_vsri> {
2079     let Inst{22} = 0b1;
2080   }
2081 }
2082
2083 // Shift left and insert
2084 defm SLIvvi   : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
2085
2086 // Shift right and insert
2087 defm SRIvvi   : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
2088
2089 class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2090                     string SrcT, Operand ImmTy>
2091   : NeonI_2VShiftImm<q, u, opcode,
2092                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
2093                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2094                      [], NoItinerary>;
2095
2096 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
2097                        string SrcT, Operand ImmTy>
2098   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
2099                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
2100                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
2101                      [], NoItinerary> {
2102   let Constraints = "$src = $Rd";
2103 }
2104
2105 // left long shift by immediate
2106 multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
2107   def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
2108     let Inst{22-19} = 0b0001;
2109   }
2110
2111   def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
2112     let Inst{22-20} = 0b001;
2113   }
2114
2115   def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
2116     let Inst{22-21} = 0b01;
2117   }
2118
2119   // Shift Narrow High
2120   def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
2121                               shr_imm8> {
2122     let Inst{22-19} = 0b0001;
2123   }
2124
2125   def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
2126                              shr_imm16> {
2127     let Inst{22-20} = 0b001;
2128   }
2129
2130   def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
2131                              shr_imm32> {
2132     let Inst{22-21} = 0b01;
2133   }
2134 }
2135
2136 // Shift right narrow
2137 defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
2138
2139 // Shift right narrow (prefix Q is saturating, prefix R is rounding)
2140 defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
2141 defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
2142 defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
2143 defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
2144 defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
2145 defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
2146 defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
2147
2148 def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
2149                               (v2i64 (concat_vectors (v1i64 node:$Rm),
2150                                                      (v1i64 node:$Rn)))>;
2151 def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
2152                               (v8i16 (concat_vectors (v4i16 node:$Rm),
2153                                                      (v4i16 node:$Rn)))>;
2154 def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
2155                               (v4i32 (concat_vectors (v2i32 node:$Rm),
2156                                                      (v2i32 node:$Rn)))>;
2157 def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
2158                               (v4f32 (concat_vectors (v2f32 node:$Rm),
2159                                                      (v2f32 node:$Rn)))>;
2160 def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
2161                               (v2f64 (concat_vectors (v1f64 node:$Rm),
2162                                                      (v1f64 node:$Rn)))>;
2163
2164 def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2165                              (v8i16 (srl (v8i16 node:$lhs),
2166                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2167 def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2168                              (v4i32 (srl (v4i32 node:$lhs),
2169                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2170 def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2171                              (v2i64 (srl (v2i64 node:$lhs),
2172                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2173 def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
2174                              (v8i16 (sra (v8i16 node:$lhs),
2175                                (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
2176 def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
2177                              (v4i32 (sra (v4i32 node:$lhs),
2178                                (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
2179 def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
2180                              (v2i64 (sra (v2i64 node:$lhs),
2181                                (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
2182
2183 // Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
2184 multiclass Neon_shiftNarrow_patterns<string shr> {
2185   def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
2186               (i32 shr_imm8:$Imm)))),
2187             (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
2188   def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
2189               (i32 shr_imm16:$Imm)))),
2190             (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
2191   def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
2192               (i32 shr_imm32:$Imm)))),
2193             (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
2194
2195   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2196               (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
2197                 VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
2198             (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
2199                          VPR128:$Rn, imm:$Imm)>;
2200   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2201               (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
2202                 VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
2203             (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2204                         VPR128:$Rn, imm:$Imm)>;
2205   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
2206               (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
2207                 VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
2208             (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2209                         VPR128:$Rn, imm:$Imm)>;
2210 }
2211
2212 multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
2213   def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
2214             (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
2215   def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
2216             (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
2217   def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
2218             (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
2219
2220   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2221                 (v1i64 (bitconvert (v8i8
2222                     (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
2223             (!cast<Instruction>(prefix # "_16B")
2224                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2225                 VPR128:$Rn, imm:$Imm)>;
2226   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2227                 (v1i64 (bitconvert (v4i16
2228                     (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
2229             (!cast<Instruction>(prefix # "_8H")
2230                 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2231                 VPR128:$Rn, imm:$Imm)>;
2232   def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2233                 (v1i64 (bitconvert (v2i32
2234                     (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
2235             (!cast<Instruction>(prefix # "_4S")
2236                   (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2237                   VPR128:$Rn, imm:$Imm)>;
2238 }
2239
2240 defm : Neon_shiftNarrow_patterns<"lshr">;
2241 defm : Neon_shiftNarrow_patterns<"ashr">;
2242
2243 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
2244 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
2245 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
2246 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
2247 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
2248 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
2249 defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
2250
2251 // Convert fix-point and float-pointing
2252 class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
2253                 RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
2254                 Operand ImmTy, SDPatternOperator IntOp>
2255   : NeonI_2VShiftImm<q, u, opcode,
2256                      (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
2257                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
2258                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
2259                        (i32 ImmTy:$Imm))))],
2260                      NoItinerary>;
2261
2262 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
2263                               SDPatternOperator IntOp> {
2264   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
2265                       shr_imm32, IntOp> {
2266     let Inst{22-21} = 0b01;
2267   }
2268
2269   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
2270                       shr_imm32, IntOp> {
2271     let Inst{22-21} = 0b01;
2272   }
2273
2274   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
2275                       shr_imm64, IntOp> {
2276     let Inst{22} = 0b1;
2277   }
2278 }
2279
2280 multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
2281                               SDPatternOperator IntOp> {
2282   def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
2283                       shr_imm32, IntOp> {
2284     let Inst{22-21} = 0b01;
2285   }
2286
2287   def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
2288                       shr_imm32, IntOp> {
2289     let Inst{22-21} = 0b01;
2290   }
2291
2292   def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
2293                       shr_imm64, IntOp> {
2294     let Inst{22} = 0b1;
2295   }
2296 }
2297
2298 // Convert fixed-point to floating-point
2299 defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
2300                                    int_arm_neon_vcvtfxs2fp>;
2301 defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
2302                                    int_arm_neon_vcvtfxu2fp>;
2303
2304 // Convert floating-point to fixed-point
2305 defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
2306                                    int_arm_neon_vcvtfp2fxs>;
2307 defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
2308                                    int_arm_neon_vcvtfp2fxu>;
2309
2310 multiclass Neon_sshll2_0<SDNode ext>
2311 {
2312   def _v8i8  : PatFrag<(ops node:$Rn),
2313                        (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
2314   def _v4i16 : PatFrag<(ops node:$Rn),
2315                        (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
2316   def _v2i32 : PatFrag<(ops node:$Rn),
2317                        (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
2318 }
2319
2320 defm NI_sext_high : Neon_sshll2_0<sext>;
2321 defm NI_zext_high : Neon_sshll2_0<zext>;
2322
2323
2324 //===----------------------------------------------------------------------===//
2325 // Multiclasses for NeonI_Across
2326 //===----------------------------------------------------------------------===//
2327
2328 // Variant 1
2329
2330 multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
2331                             string asmop, SDPatternOperator opnode>
2332 {
2333     def _1h8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2334                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2335                 asmop # "\t$Rd, $Rn.8b",
2336                 [(set (v1i16 FPR16:$Rd),
2337                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
2338                 NoItinerary>;
2339
2340     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2341                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2342                 asmop # "\t$Rd, $Rn.16b",
2343                 [(set (v1i16 FPR16:$Rd),
2344                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
2345                 NoItinerary>;
2346
2347     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2348                 (outs FPR32:$Rd), (ins VPR64:$Rn),
2349                 asmop # "\t$Rd, $Rn.4h",
2350                 [(set (v1i32 FPR32:$Rd),
2351                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
2352                 NoItinerary>;
2353
2354     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2355                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2356                 asmop # "\t$Rd, $Rn.8h",
2357                 [(set (v1i32 FPR32:$Rd),
2358                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
2359                 NoItinerary>;
2360
2361     // _1d2s doesn't exist!
2362
2363     def _1d4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2364                 (outs FPR64:$Rd), (ins VPR128:$Rn),
2365                 asmop # "\t$Rd, $Rn.4s",
2366                 [(set (v1i64 FPR64:$Rd),
2367                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
2368                 NoItinerary>;
2369 }
2370
2371 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
2372 defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
2373
2374 // Variant 2
2375
2376 multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
2377                             string asmop, SDPatternOperator opnode>
2378 {
2379     def _1b8b:  NeonI_2VAcross<0b0, u, 0b00, opcode,
2380                 (outs FPR8:$Rd), (ins VPR64:$Rn),
2381                 asmop # "\t$Rd, $Rn.8b",
2382                 [(set (v1i8 FPR8:$Rd),
2383                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
2384                 NoItinerary>;
2385
2386     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
2387                 (outs FPR8:$Rd), (ins VPR128:$Rn),
2388                 asmop # "\t$Rd, $Rn.16b",
2389                 [(set (v1i8 FPR8:$Rd),
2390                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
2391                 NoItinerary>;
2392
2393     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
2394                 (outs FPR16:$Rd), (ins VPR64:$Rn),
2395                 asmop # "\t$Rd, $Rn.4h",
2396                 [(set (v1i16 FPR16:$Rd),
2397                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
2398                 NoItinerary>;
2399
2400     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
2401                 (outs FPR16:$Rd), (ins VPR128:$Rn),
2402                 asmop # "\t$Rd, $Rn.8h",
2403                 [(set (v1i16 FPR16:$Rd),
2404                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
2405                 NoItinerary>;
2406
2407     // _1s2s doesn't exist!
2408
2409     def _1s4s:  NeonI_2VAcross<0b1, u, 0b10, opcode,
2410                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2411                 asmop # "\t$Rd, $Rn.4s",
2412                 [(set (v1i32 FPR32:$Rd),
2413                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
2414                 NoItinerary>;
2415 }
2416
2417 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
2418 defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
2419
2420 defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
2421 defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
2422
2423 defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
2424
2425 // Variant 3
2426
2427 multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
2428                             string asmop, SDPatternOperator opnode> {
2429     def _1s4s:  NeonI_2VAcross<0b1, u, size, opcode,
2430                 (outs FPR32:$Rd), (ins VPR128:$Rn),
2431                 asmop # "\t$Rd, $Rn.4s",
2432                 [(set (f32 FPR32:$Rd),
2433                     (f32 (opnode (v4f32 VPR128:$Rn))))],
2434                 NoItinerary>;
2435 }
2436
2437 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
2438                                 int_aarch64_neon_vmaxnmv>;
2439 defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
2440                                 int_aarch64_neon_vminnmv>;
2441
2442 defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
2443                               int_aarch64_neon_vmaxv>;
2444 defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
2445                               int_aarch64_neon_vminv>;
2446
2447 // The followings are for instruction class (Perm)
2448
2449 class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
2450                     string asmop, RegisterOperand OpVPR, string OpS,
2451                     SDPatternOperator opnode, ValueType Ty>
2452   : NeonI_Perm<q, size, opcode,
2453                (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2454                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
2455                [(set (Ty OpVPR:$Rd),
2456                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
2457                NoItinerary>;
2458
2459 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
2460                           SDPatternOperator opnode> {
2461   def _8b  : NeonI_Permute<0b0, 0b00, opcode, asmop,
2462                            VPR64, "8b", opnode, v8i8>;
2463   def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
2464                            VPR128, "16b",opnode, v16i8>;
2465   def _4h  : NeonI_Permute<0b0, 0b01, opcode, asmop,
2466                            VPR64, "4h", opnode, v4i16>;
2467   def _8h  : NeonI_Permute<0b1, 0b01, opcode, asmop,
2468                            VPR128, "8h", opnode, v8i16>;
2469   def _2s  : NeonI_Permute<0b0, 0b10, opcode, asmop,
2470                            VPR64, "2s", opnode, v2i32>;
2471   def _4s  : NeonI_Permute<0b1, 0b10, opcode, asmop,
2472                            VPR128, "4s", opnode, v4i32>;
2473   def _2d  : NeonI_Permute<0b1, 0b11, opcode, asmop,
2474                            VPR128, "2d", opnode, v2i64>;
2475 }
2476
2477 defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
2478 defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
2479 defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
2480 defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
2481 defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
2482 defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
2483
2484 multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
2485   def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
2486             (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
2487
2488   def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
2489             (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
2490
2491   def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
2492             (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
2493 }
2494
2495 defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
2496 defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
2497 defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
2498 defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
2499 defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
2500 defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
2501
2502 // The followings are for instruction class (3V Diff)
2503
2504 // normal long/long2 pattern
2505 class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
2506                  string asmop, string ResS, string OpS,
2507                  SDPatternOperator opnode, SDPatternOperator ext,
2508                  RegisterOperand OpVPR,
2509                  ValueType ResTy, ValueType OpTy>
2510   : NeonI_3VDiff<q, u, size, opcode,
2511                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2512                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2513                  [(set (ResTy VPR128:$Rd),
2514                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
2515                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2516                  NoItinerary>;
2517
2518 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
2519                         string asmop, SDPatternOperator opnode,
2520                         bit Commutable = 0> {
2521   let isCommutable = Commutable in {
2522     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2523                            opnode, sext, VPR64, v8i16, v8i8>;
2524     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2525                            opnode, sext, VPR64, v4i32, v4i16>;
2526     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2527                            opnode, sext, VPR64, v2i64, v2i32>;
2528   }
2529 }
2530
2531 multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
2532                          SDPatternOperator opnode, bit Commutable = 0> {
2533   let isCommutable = Commutable in {
2534     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2535                             opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2536     def _4s8h  : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2537                             opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2538     def _2d4s  : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2539                             opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2540   }
2541 }
2542
2543 multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
2544                         SDPatternOperator opnode, bit Commutable = 0> {
2545   let isCommutable = Commutable in {
2546     def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2547                            opnode, zext, VPR64, v8i16, v8i8>;
2548     def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2549                            opnode, zext, VPR64, v4i32, v4i16>;
2550     def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2551                            opnode, zext, VPR64, v2i64, v2i32>;
2552   }
2553 }
2554
2555 multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
2556                          SDPatternOperator opnode, bit Commutable = 0> {
2557   let isCommutable = Commutable in {
2558     def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2559                             opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2560     def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2561                            opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2562     def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2563                            opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2564   }
2565 }
2566
2567 defm SADDLvvv :  NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
2568 defm UADDLvvv :  NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
2569
2570 defm SADDL2vvv :  NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
2571 defm UADDL2vvv :  NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
2572
2573 defm SSUBLvvv :  NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
2574 defm USUBLvvv :  NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
2575
2576 defm SSUBL2vvv :  NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
2577 defm USUBL2vvv :  NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
2578
2579 // normal wide/wide2 pattern
2580 class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
2581                  string asmop, string ResS, string OpS,
2582                  SDPatternOperator opnode, SDPatternOperator ext,
2583                  RegisterOperand OpVPR,
2584                  ValueType ResTy, ValueType OpTy>
2585   : NeonI_3VDiff<q, u, size, opcode,
2586                  (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
2587                  asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
2588                  [(set (ResTy VPR128:$Rd),
2589                     (ResTy (opnode (ResTy VPR128:$Rn),
2590                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
2591                  NoItinerary>;
2592
2593 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
2594                         SDPatternOperator opnode> {
2595   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2596                          opnode, sext, VPR64, v8i16, v8i8>;
2597   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2598                          opnode, sext, VPR64, v4i32, v4i16>;
2599   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2600                          opnode, sext, VPR64, v2i64, v2i32>;
2601 }
2602
2603 defm SADDWvvv :  NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
2604 defm SSUBWvvv :  NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
2605
2606 multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
2607                          SDPatternOperator opnode> {
2608   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2609                           opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
2610   def _4s8h  : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2611                           opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
2612   def _2d4s  : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2613                           opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
2614 }
2615
2616 defm SADDW2vvv :  NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
2617 defm SSUBW2vvv :  NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
2618
2619 multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
2620                         SDPatternOperator opnode> {
2621   def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2622                          opnode, zext, VPR64, v8i16, v8i8>;
2623   def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2624                          opnode, zext, VPR64, v4i32, v4i16>;
2625   def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2626                          opnode, zext, VPR64, v2i64, v2i32>;
2627 }
2628
2629 defm UADDWvvv :  NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
2630 defm USUBWvvv :  NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
2631
2632 multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
2633                          SDPatternOperator opnode> {
2634   def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2635                           opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
2636   def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2637                          opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
2638   def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2639                          opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
2640 }
2641
2642 defm UADDW2vvv :  NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
2643 defm USUBW2vvv :  NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
2644
2645 // Get the high half part of the vector element.
2646 multiclass NeonI_get_high {
2647   def _8h : PatFrag<(ops node:$Rn),
2648                     (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
2649                                              (v8i16 (Neon_vdup (i32 8)))))))>;
2650   def _4s : PatFrag<(ops node:$Rn),
2651                     (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
2652                                               (v4i32 (Neon_vdup (i32 16)))))))>;
2653   def _2d : PatFrag<(ops node:$Rn),
2654                     (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
2655                                               (v2i64 (Neon_vdup (i32 32)))))))>;
2656 }
2657
2658 defm NI_get_hi : NeonI_get_high;
2659
2660 // pattern for addhn/subhn with 2 operands
2661 class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2662                            string asmop, string ResS, string OpS,
2663                            SDPatternOperator opnode, SDPatternOperator get_hi,
2664                            ValueType ResTy, ValueType OpTy>
2665   : NeonI_3VDiff<q, u, size, opcode,
2666                  (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2667                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2668                  [(set (ResTy VPR64:$Rd),
2669                     (ResTy (get_hi
2670                       (OpTy (opnode (OpTy VPR128:$Rn),
2671                                     (OpTy VPR128:$Rm))))))],
2672                  NoItinerary>;
2673
2674 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
2675                                 SDPatternOperator opnode, bit Commutable = 0> {
2676   let isCommutable = Commutable in {
2677     def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2678                                      opnode, NI_get_hi_8h, v8i8, v8i16>;
2679     def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2680                                      opnode, NI_get_hi_4s, v4i16, v4i32>;
2681     def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2682                                      opnode, NI_get_hi_2d, v2i32, v2i64>;
2683   }
2684 }
2685
2686 defm ADDHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
2687 defm SUBHNvvv  : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
2688
2689 // pattern for operation with 2 operands
2690 class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
2691                     string asmop, string ResS, string OpS,
2692                     SDPatternOperator opnode,
2693                     RegisterOperand ResVPR, RegisterOperand OpVPR,
2694                     ValueType ResTy, ValueType OpTy>
2695   : NeonI_3VDiff<q, u, size, opcode,
2696                  (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2697                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2698                  [(set (ResTy ResVPR:$Rd),
2699                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
2700                  NoItinerary>;
2701
2702 // normal narrow pattern
2703 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
2704                           SDPatternOperator opnode, bit Commutable = 0> {
2705   let isCommutable = Commutable in {
2706     def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
2707                               opnode, VPR64, VPR128, v8i8, v8i16>;
2708     def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
2709                               opnode, VPR64, VPR128, v4i16, v4i32>;
2710     def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
2711                               opnode, VPR64, VPR128, v2i32, v2i64>;
2712   }
2713 }
2714
2715 defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
2716 defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
2717
2718 // pattern for acle intrinsic with 3 operands
2719 class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2720                      string asmop, string ResS, string OpS>
2721   : NeonI_3VDiff<q, u, size, opcode,
2722                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
2723                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2724                  [], NoItinerary> {
2725   let Constraints = "$src = $Rd";
2726   let neverHasSideEffects = 1;
2727 }
2728
2729 multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
2730   def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
2731   def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
2732   def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
2733 }
2734
2735 defm ADDHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
2736 defm SUBHN2vvv  : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
2737
2738 defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
2739 defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
2740
2741 // Patterns have to be separate because there's a SUBREG_TO_REG in the output
2742 // part.
2743 class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
2744                         SDPatternOperator coreop>
2745   : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
2746                       (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
2747                                                         (SrcTy VPR128:$Rm)))))),
2748         (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
2749               VPR128:$Rn, VPR128:$Rm)>;
2750
2751 // addhn2 patterns
2752 def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8,  v8i16,
2753           BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
2754 def : NarrowHighHalfPat<ADDHN2vvv_8h4s,  v4i16, v4i32,
2755           BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
2756 def : NarrowHighHalfPat<ADDHN2vvv_4s2d,  v2i32, v2i64,
2757           BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
2758
2759 // subhn2 patterns
2760 def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8,  v8i16,
2761           BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
2762 def : NarrowHighHalfPat<SUBHN2vvv_8h4s,  v4i16, v4i32,
2763           BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
2764 def : NarrowHighHalfPat<SUBHN2vvv_4s2d,  v2i32, v2i64,
2765           BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
2766
2767 // raddhn2 patterns
2768 def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vraddhn>;
2769 def : NarrowHighHalfPat<RADDHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vraddhn>;
2770 def : NarrowHighHalfPat<RADDHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vraddhn>;
2771
2772 // rsubhn2 patterns
2773 def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8,  v8i16, int_arm_neon_vrsubhn>;
2774 def : NarrowHighHalfPat<RSUBHN2vvv_8h4s,  v4i16, v4i32, int_arm_neon_vrsubhn>;
2775 def : NarrowHighHalfPat<RSUBHN2vvv_4s2d,  v2i32, v2i64, int_arm_neon_vrsubhn>;
2776
2777 // pattern that need to extend result
2778 class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
2779                      string asmop, string ResS, string OpS,
2780                      SDPatternOperator opnode,
2781                      RegisterOperand OpVPR,
2782                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2783   : NeonI_3VDiff<q, u, size, opcode,
2784                  (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
2785                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2786                  [(set (ResTy VPR128:$Rd),
2787                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
2788                                                 (OpTy OpVPR:$Rm))))))],
2789                  NoItinerary>;
2790
2791 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
2792                            SDPatternOperator opnode, bit Commutable = 0> {
2793   let isCommutable = Commutable in {
2794     def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2795                                opnode, VPR64, v8i16, v8i8, v8i8>;
2796     def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2797                                opnode, VPR64, v4i32, v4i16, v4i16>;
2798     def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2799                                opnode, VPR64, v2i64, v2i32, v2i32>;
2800   }
2801 }
2802
2803 defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
2804 defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
2805
2806 multiclass NeonI_Op_High<SDPatternOperator op> {
2807   def _16B : PatFrag<(ops node:$Rn, node:$Rm),
2808                      (op (v8i8 (Neon_High16B node:$Rn)),
2809                          (v8i8 (Neon_High16B node:$Rm)))>;
2810   def _8H  : PatFrag<(ops node:$Rn, node:$Rm),
2811                      (op (v4i16 (Neon_High8H node:$Rn)),
2812                          (v4i16 (Neon_High8H node:$Rm)))>;
2813   def _4S  : PatFrag<(ops node:$Rn, node:$Rm),
2814                      (op (v2i32 (Neon_High4S node:$Rn)),
2815                          (v2i32 (Neon_High4S node:$Rm)))>;
2816 }
2817
2818 defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
2819 defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
2820 defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
2821 defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
2822 defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
2823 defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
2824
2825 multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
2826                             bit Commutable = 0> {
2827   let isCommutable = Commutable in {
2828     def _8h8b  : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2829                                 !cast<PatFrag>(opnode # "_16B"),
2830                                 VPR128, v8i16, v16i8, v8i8>;
2831     def _4s4h  : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2832                                 !cast<PatFrag>(opnode # "_8H"),
2833                                 VPR128, v4i32, v8i16, v4i16>;
2834     def _2d2s  : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2835                                 !cast<PatFrag>(opnode # "_4S"),
2836                                 VPR128, v2i64, v4i32, v2i32>;
2837   }
2838 }
2839
2840 defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
2841 defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
2842
2843 // For pattern that need two operators being chained.
2844 class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
2845                      string asmop, string ResS, string OpS,
2846                      SDPatternOperator opnode, SDPatternOperator subop,
2847                      RegisterOperand OpVPR,
2848                      ValueType ResTy, ValueType OpTy, ValueType OpSTy>
2849   : NeonI_3VDiff<q, u, size, opcode,
2850                  (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2851                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2852                  [(set (ResTy VPR128:$Rd),
2853                     (ResTy (opnode
2854                       (ResTy VPR128:$src),
2855                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
2856                                                  (OpTy OpVPR:$Rm))))))))],
2857                  NoItinerary> {
2858   let Constraints = "$src = $Rd";
2859 }
2860
2861 multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
2862                              SDPatternOperator opnode, SDPatternOperator subop>{
2863   def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2864                              opnode, subop, VPR64, v8i16, v8i8, v8i8>;
2865   def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2866                              opnode, subop, VPR64, v4i32, v4i16, v4i16>;
2867   def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2868                              opnode, subop, VPR64, v2i64, v2i32, v2i32>;
2869 }
2870
2871 defm SABALvvv :  NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
2872                                    add, int_arm_neon_vabds>;
2873 defm UABALvvv :  NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
2874                                    add, int_arm_neon_vabdu>;
2875
2876 multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
2877                               SDPatternOperator opnode, string subop> {
2878   def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2879                              opnode, !cast<PatFrag>(subop # "_16B"),
2880                              VPR128, v8i16, v16i8, v8i8>;
2881   def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2882                              opnode, !cast<PatFrag>(subop # "_8H"),
2883                              VPR128, v4i32, v8i16, v4i16>;
2884   def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2885                              opnode, !cast<PatFrag>(subop # "_4S"),
2886                              VPR128, v2i64, v4i32, v2i32>;
2887 }
2888
2889 defm SABAL2vvv :  NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
2890                                      "NI_sabdl_hi">;
2891 defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
2892                                      "NI_uabdl_hi">;
2893
2894 // Long pattern with 2 operands
2895 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
2896                           SDPatternOperator opnode, bit Commutable = 0> {
2897   let isCommutable = Commutable in {
2898     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2899                               opnode, VPR128, VPR64, v8i16, v8i8>;
2900     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2901                               opnode, VPR128, VPR64, v4i32, v4i16>;
2902     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2903                               opnode, VPR128, VPR64, v2i64, v2i32>;
2904   }
2905 }
2906
2907 defm SMULLvvv :  NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
2908 defm UMULLvvv :  NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
2909
2910 class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
2911                            string asmop, string ResS, string OpS,
2912                            SDPatternOperator opnode,
2913                            ValueType ResTy, ValueType OpTy>
2914   : NeonI_3VDiff<q, u, size, opcode,
2915                  (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
2916                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2917                  [(set (ResTy VPR128:$Rd),
2918                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
2919                  NoItinerary>;
2920
2921 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
2922                                    string opnode, bit Commutable = 0> {
2923   let isCommutable = Commutable in {
2924     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
2925                                       !cast<PatFrag>(opnode # "_16B"),
2926                                       v8i16, v16i8>;
2927     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
2928                                      !cast<PatFrag>(opnode # "_8H"),
2929                                      v4i32, v8i16>;
2930     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
2931                                      !cast<PatFrag>(opnode # "_4S"),
2932                                      v2i64, v4i32>;
2933   }
2934 }
2935
2936 defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
2937                                          "NI_smull_hi", 1>;
2938 defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
2939                                          "NI_umull_hi", 1>;
2940
2941 // Long pattern with 3 operands
2942 class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
2943                      string asmop, string ResS, string OpS,
2944                      SDPatternOperator opnode,
2945                      ValueType ResTy, ValueType OpTy>
2946   : NeonI_3VDiff<q, u, size, opcode,
2947                  (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
2948                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2949                  [(set (ResTy VPR128:$Rd),
2950                     (ResTy (opnode
2951                       (ResTy VPR128:$src),
2952                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
2953                NoItinerary> {
2954   let Constraints = "$src = $Rd";
2955 }
2956
2957 multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
2958                              SDPatternOperator opnode> {
2959   def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
2960                              opnode, v8i16, v8i8>;
2961   def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
2962                              opnode, v4i32, v4i16>;
2963   def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
2964                              opnode, v2i64, v2i32>;
2965 }
2966
2967 def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2968                          (add node:$Rd,
2969                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2970
2971 def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2972                          (add node:$Rd,
2973                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2974
2975 def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2976                          (sub node:$Rd,
2977                             (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
2978
2979 def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
2980                          (sub node:$Rd,
2981                             (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
2982
2983 defm SMLALvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
2984 defm UMLALvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
2985
2986 defm SMLSLvvv :  NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
2987 defm UMLSLvvv :  NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
2988
2989 class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
2990                            string asmop, string ResS, string OpS,
2991                            SDPatternOperator subop, SDPatternOperator opnode,
2992                            RegisterOperand OpVPR,
2993                            ValueType ResTy, ValueType OpTy>
2994   : NeonI_3VDiff<q, u, size, opcode,
2995                (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
2996                asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
2997                [(set (ResTy VPR128:$Rd),
2998                   (ResTy (subop
2999                     (ResTy VPR128:$src),
3000                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
3001                NoItinerary> {
3002   let Constraints = "$src = $Rd";
3003 }
3004
3005 multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
3006                                    SDPatternOperator subop, string opnode> {
3007   def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3008                                     subop, !cast<PatFrag>(opnode # "_16B"),
3009                                     VPR128, v8i16, v16i8>;
3010   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3011                                    subop, !cast<PatFrag>(opnode # "_8H"),
3012                                    VPR128, v4i32, v8i16>;
3013   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3014                                    subop, !cast<PatFrag>(opnode # "_4S"),
3015                                    VPR128, v2i64, v4i32>;
3016 }
3017
3018 defm SMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
3019                                           add, "NI_smull_hi">;
3020 defm UMLAL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
3021                                           add, "NI_umull_hi">;
3022
3023 defm SMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
3024                                           sub, "NI_smull_hi">;
3025 defm UMLSL2vvv :  NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
3026                                           sub, "NI_umull_hi">;
3027
3028 multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
3029                                     SDPatternOperator opnode> {
3030   def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3031                                    opnode, int_arm_neon_vqdmull,
3032                                    VPR64, v4i32, v4i16>;
3033   def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3034                                    opnode, int_arm_neon_vqdmull,
3035                                    VPR64, v2i64, v2i32>;
3036 }
3037
3038 defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
3039                                            int_arm_neon_vqadds>;
3040 defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
3041                                            int_arm_neon_vqsubs>;
3042
3043 multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
3044                          SDPatternOperator opnode, bit Commutable = 0> {
3045   let isCommutable = Commutable in {
3046     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
3047                               opnode, VPR128, VPR64, v4i32, v4i16>;
3048     def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
3049                               opnode, VPR128, VPR64, v2i64, v2i32>;
3050   }
3051 }
3052
3053 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
3054                                 int_arm_neon_vqdmull, 1>;
3055
3056 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
3057                                    string opnode, bit Commutable = 0> {
3058   let isCommutable = Commutable in {
3059     def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3060                                      !cast<PatFrag>(opnode # "_8H"),
3061                                      v4i32, v8i16>;
3062     def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3063                                      !cast<PatFrag>(opnode # "_4S"),
3064                                      v2i64, v4i32>;
3065   }
3066 }
3067
3068 defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
3069                                            "NI_qdmull_hi", 1>;
3070
3071 multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
3072                                      SDPatternOperator opnode> {
3073   def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
3074                                    opnode, NI_qdmull_hi_8H,
3075                                    VPR128, v4i32, v8i16>;
3076   def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
3077                                    opnode, NI_qdmull_hi_4S,
3078                                    VPR128, v2i64, v4i32>;
3079 }
3080
3081 defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
3082                                              int_arm_neon_vqadds>;
3083 defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
3084                                              int_arm_neon_vqsubs>;
3085
3086 multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
3087                          SDPatternOperator opnode_8h8b,
3088                          SDPatternOperator opnode_1q1d, bit Commutable = 0> {
3089   let isCommutable = Commutable in {
3090     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
3091                               opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
3092
3093     def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
3094                               opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
3095   }
3096 }
3097
3098 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
3099                               int_aarch64_neon_vmull_p64, 1>;
3100
3101 multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
3102                                    string opnode, bit Commutable = 0> {
3103   let isCommutable = Commutable in {
3104     def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
3105                                       !cast<PatFrag>(opnode # "_16B"),
3106                                       v8i16, v16i8>;
3107
3108     def _1q2d : 
3109       NeonI_3VDiff<0b1, u, 0b11, opcode,
3110                    (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
3111                    asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
3112                    [(set (v16i8 VPR128:$Rd),
3113                       (v16i8 (int_aarch64_neon_vmull_p64 
3114                         (v1i64 (scalar_to_vector
3115                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
3116                         (v1i64 (scalar_to_vector
3117                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
3118                    NoItinerary>;
3119   }
3120 }
3121
3122 defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
3123                                          1>;
3124
3125 // End of implementation for instruction class (3V Diff)
3126
3127 // The followings are vector load/store multiple N-element structure
3128 // (class SIMD lselem).
3129
3130 // ld1:         load multiple 1-element structure to 1/2/3/4 registers.
3131 // ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
3132 //              The structure consists of a sequence of sets of N values.
3133 //              The first element of the structure is placed in the first lane
3134 //              of the first first vector, the second element in the first lane
3135 //              of the second vector, and so on.
3136 // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
3137 // the three 64-bit vectors list {BA, DC, FE}.
3138 // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
3139 // 64-bit vectors list {DA, EB, FC}.
3140 // Store instructions store multiple structure to N registers like load.
3141
3142
3143 class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
3144                     RegisterOperand VecList, string asmop>
3145   : NeonI_LdStMult<q, 1, opcode, size,
3146                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3147                  asmop # "\t$Rt, [$Rn]",
3148                  [],
3149                  NoItinerary> {
3150   let mayLoad = 1;
3151   let neverHasSideEffects = 1;
3152 }
3153
3154 multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
3155   def _8B : NeonI_LDVList<0, opcode, 0b00,
3156                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3157
3158   def _4H : NeonI_LDVList<0, opcode, 0b01,
3159                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3160
3161   def _2S : NeonI_LDVList<0, opcode, 0b10,
3162                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3163
3164   def _16B : NeonI_LDVList<1, opcode, 0b00,
3165                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3166
3167   def _8H : NeonI_LDVList<1, opcode, 0b01,
3168                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3169
3170   def _4S : NeonI_LDVList<1, opcode, 0b10,
3171                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3172
3173   def _2D : NeonI_LDVList<1, opcode, 0b11,
3174                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3175 }
3176
3177 // Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
3178 defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
3179 def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
3180
3181 defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
3182
3183 defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
3184
3185 defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
3186
3187 // Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
3188 defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
3189 def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
3190
3191 defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
3192 def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
3193
3194 defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
3195 def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
3196
3197 class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
3198                     RegisterOperand VecList, string asmop>
3199   : NeonI_LdStMult<q, 0, opcode, size,
3200                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
3201                  asmop # "\t$Rt, [$Rn]",
3202                  [],
3203                  NoItinerary> {
3204   let mayStore = 1;
3205   let neverHasSideEffects = 1;
3206 }
3207
3208 multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
3209   def _8B : NeonI_STVList<0, opcode, 0b00,
3210                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3211
3212   def _4H : NeonI_STVList<0, opcode, 0b01,
3213                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3214
3215   def _2S : NeonI_STVList<0, opcode, 0b10,
3216                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3217
3218   def _16B : NeonI_STVList<1, opcode, 0b00,
3219                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3220
3221   def _8H : NeonI_STVList<1, opcode, 0b01,
3222                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3223
3224   def _4S : NeonI_STVList<1, opcode, 0b10,
3225                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3226
3227   def _2D : NeonI_STVList<1, opcode, 0b11,
3228                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3229 }
3230
3231 // Store multiple N-element structures from N registers (N = 1,2,3,4)
3232 defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
3233 def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
3234
3235 defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
3236
3237 defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
3238
3239 defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
3240
3241 // Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
3242 defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
3243 def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
3244
3245 defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
3246 def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
3247
3248 defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
3249 def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
3250
3251 def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3252 def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
3253
3254 def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3255 def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
3256
3257 def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
3258 def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
3259
3260 def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3261 def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
3262
3263 def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3264 def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
3265
3266 def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
3267 def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
3268
3269 def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
3270           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3271 def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
3272           (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
3273
3274 def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
3275           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3276 def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
3277           (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
3278
3279 def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
3280           (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
3281 def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
3282           (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
3283
3284 def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
3285           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3286 def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
3287           (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
3288
3289 def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
3290           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3291 def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
3292           (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
3293
3294 def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
3295           (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
3296 def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
3297           (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
3298
3299 // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
3300 // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
3301 // these patterns are not needed any more.
3302 def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
3303 def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
3304 def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
3305
3306 def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
3307           (LSFP8_STR $value, $addr, 0)>;
3308 def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
3309           (LSFP16_STR $value, $addr, 0)>;
3310 def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
3311           (LSFP32_STR $value, $addr, 0)>;
3312
3313
3314 // End of vector load/store multiple N-element structure(class SIMD lselem)
3315
3316 // The followings are post-index vector load/store multiple N-element
3317 // structure(class SIMD lselem-post)
3318 def exact1_asmoperand : AsmOperandClass {
3319   let Name = "Exact1";
3320   let PredicateMethod = "isExactImm<1>";
3321   let RenderMethod = "addImmOperands";
3322 }
3323 def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
3324   let ParserMatchClass = exact1_asmoperand;
3325 }
3326
3327 def exact2_asmoperand : AsmOperandClass {
3328   let Name = "Exact2";
3329   let PredicateMethod = "isExactImm<2>";
3330   let RenderMethod = "addImmOperands";
3331 }
3332 def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
3333   let ParserMatchClass = exact2_asmoperand;
3334 }
3335
3336 def exact3_asmoperand : AsmOperandClass {
3337   let Name = "Exact3";
3338   let PredicateMethod = "isExactImm<3>";
3339   let RenderMethod = "addImmOperands";
3340 }
3341 def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
3342   let ParserMatchClass = exact3_asmoperand;
3343 }
3344
3345 def exact4_asmoperand : AsmOperandClass {
3346   let Name = "Exact4";
3347   let PredicateMethod = "isExactImm<4>";
3348   let RenderMethod = "addImmOperands";
3349 }
3350 def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
3351   let ParserMatchClass = exact4_asmoperand;
3352 }
3353
3354 def exact6_asmoperand : AsmOperandClass {
3355   let Name = "Exact6";
3356   let PredicateMethod = "isExactImm<6>";
3357   let RenderMethod = "addImmOperands";
3358 }
3359 def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
3360   let ParserMatchClass = exact6_asmoperand;
3361 }
3362
3363 def exact8_asmoperand : AsmOperandClass {
3364   let Name = "Exact8";
3365   let PredicateMethod = "isExactImm<8>";
3366   let RenderMethod = "addImmOperands";
3367 }
3368 def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
3369   let ParserMatchClass = exact8_asmoperand;
3370 }
3371
3372 def exact12_asmoperand : AsmOperandClass {
3373   let Name = "Exact12";
3374   let PredicateMethod = "isExactImm<12>";
3375   let RenderMethod = "addImmOperands";
3376 }
3377 def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
3378   let ParserMatchClass = exact12_asmoperand;
3379 }
3380
3381 def exact16_asmoperand : AsmOperandClass {
3382   let Name = "Exact16";
3383   let PredicateMethod = "isExactImm<16>";
3384   let RenderMethod = "addImmOperands";
3385 }
3386 def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
3387   let ParserMatchClass = exact16_asmoperand;
3388 }
3389
3390 def exact24_asmoperand : AsmOperandClass {
3391   let Name = "Exact24";
3392   let PredicateMethod = "isExactImm<24>";
3393   let RenderMethod = "addImmOperands";
3394 }
3395 def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
3396   let ParserMatchClass = exact24_asmoperand;
3397 }
3398
3399 def exact32_asmoperand : AsmOperandClass {
3400   let Name = "Exact32";
3401   let PredicateMethod = "isExactImm<32>";
3402   let RenderMethod = "addImmOperands";
3403 }
3404 def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
3405   let ParserMatchClass = exact32_asmoperand;
3406 }
3407
3408 def exact48_asmoperand : AsmOperandClass {
3409   let Name = "Exact48";
3410   let PredicateMethod = "isExactImm<48>";
3411   let RenderMethod = "addImmOperands";
3412 }
3413 def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
3414   let ParserMatchClass = exact48_asmoperand;
3415 }
3416
3417 def exact64_asmoperand : AsmOperandClass {
3418   let Name = "Exact64";
3419   let PredicateMethod = "isExactImm<64>";
3420   let RenderMethod = "addImmOperands";
3421 }
3422 def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
3423   let ParserMatchClass = exact64_asmoperand;
3424 }
3425
3426 multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
3427                            RegisterOperand VecList, Operand ImmTy,
3428                            string asmop> {
3429   let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
3430       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3431     def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
3432                      (outs VecList:$Rt, GPR64xsp:$wb),
3433                      (ins GPR64xsp:$Rn, ImmTy:$amt),
3434                      asmop # "\t$Rt, [$Rn], $amt",
3435                      [],
3436                      NoItinerary> {
3437       let Rm = 0b11111;
3438     }
3439
3440     def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
3441                         (outs VecList:$Rt, GPR64xsp:$wb),
3442                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3443                         asmop # "\t$Rt, [$Rn], $Rm",
3444                         [],
3445                         NoItinerary>;
3446   }
3447 }
3448
3449 multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3450     Operand ImmTy2, string asmop> {
3451   defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
3452                               !cast<RegisterOperand>(List # "8B_operand"),
3453                               ImmTy, asmop>;
3454
3455   defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
3456                               !cast<RegisterOperand>(List # "4H_operand"),
3457                               ImmTy, asmop>;
3458
3459   defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
3460                               !cast<RegisterOperand>(List # "2S_operand"),
3461                               ImmTy, asmop>;
3462
3463   defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
3464                                !cast<RegisterOperand>(List # "16B_operand"),
3465                                ImmTy2, asmop>;
3466
3467   defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
3468                               !cast<RegisterOperand>(List # "8H_operand"),
3469                               ImmTy2, asmop>;
3470
3471   defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
3472                               !cast<RegisterOperand>(List # "4S_operand"),
3473                               ImmTy2, asmop>;
3474
3475   defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
3476                               !cast<RegisterOperand>(List # "2D_operand"),
3477                               ImmTy2, asmop>;
3478 }
3479
3480 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3481 defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
3482 defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3483                                  "ld1">;
3484
3485 defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
3486
3487 defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3488                              "ld3">;
3489
3490 defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
3491
3492 // Post-index load multiple 1-element structures from N consecutive registers
3493 // (N = 2,3,4)
3494 defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3495                                "ld1">;
3496 defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3497                                    uimm_exact16, "ld1">;
3498
3499 defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3500                                "ld1">;
3501 defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3502                                    uimm_exact24, "ld1">;
3503
3504 defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3505                                 "ld1">;
3506 defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3507                                    uimm_exact32, "ld1">;
3508
3509 multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
3510                             RegisterOperand VecList, Operand ImmTy,
3511                             string asmop> {
3512   let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
3513       DecoderMethod = "DecodeVLDSTPostInstruction" in {
3514     def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
3515                      (outs GPR64xsp:$wb),
3516                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
3517                      asmop # "\t$Rt, [$Rn], $amt",
3518                      [],
3519                      NoItinerary> {
3520       let Rm = 0b11111;
3521     }
3522
3523     def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
3524                       (outs GPR64xsp:$wb),
3525                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
3526                       asmop # "\t$Rt, [$Rn], $Rm",
3527                       [],
3528                       NoItinerary>;
3529   }
3530 }
3531
3532 multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
3533                            Operand ImmTy2, string asmop> {
3534   defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
3535                  !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
3536
3537   defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
3538                               !cast<RegisterOperand>(List # "4H_operand"),
3539                               ImmTy, asmop>;
3540
3541   defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
3542                               !cast<RegisterOperand>(List # "2S_operand"),
3543                               ImmTy, asmop>;
3544
3545   defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
3546                                !cast<RegisterOperand>(List # "16B_operand"),
3547                                ImmTy2, asmop>;
3548
3549   defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
3550                               !cast<RegisterOperand>(List # "8H_operand"),
3551                               ImmTy2, asmop>;
3552
3553   defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
3554                               !cast<RegisterOperand>(List # "4S_operand"),
3555                               ImmTy2, asmop>;
3556
3557   defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
3558                               !cast<RegisterOperand>(List # "2D_operand"),
3559                               ImmTy2, asmop>;
3560 }
3561
3562 // Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
3563 defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
3564 defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
3565                                  "st1">;
3566
3567 defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
3568
3569 defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
3570                              "st3">;
3571
3572 defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
3573
3574 // Post-index load multiple 1-element structures from N consecutive registers
3575 // (N = 2,3,4)
3576 defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
3577                                "st1">;
3578 defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
3579                                    uimm_exact16, "st1">;
3580
3581 defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
3582                                "st1">;
3583 defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
3584                                    uimm_exact24, "st1">;
3585
3586 defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
3587                                "st1">;
3588 defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
3589                                    uimm_exact32, "st1">;
3590
3591 // End of post-index vector load/store multiple N-element structure
3592 // (class SIMD lselem-post)
3593
3594 // The followings are vector load/store single N-element structure
3595 // (class SIMD lsone).
3596 def neon_uimm0_bare : Operand<i64>,
3597                         ImmLeaf<i64, [{return Imm == 0;}]> {
3598   let ParserMatchClass = neon_uimm0_asmoperand;
3599   let PrintMethod = "printUImmBareOperand";
3600 }
3601
3602 def neon_uimm1_bare : Operand<i64>,
3603                         ImmLeaf<i64, [{return Imm < 2;}]> {
3604   let ParserMatchClass = neon_uimm1_asmoperand;
3605   let PrintMethod = "printUImmBareOperand";
3606 }
3607
3608 def neon_uimm2_bare : Operand<i64>,
3609                         ImmLeaf<i64, [{return Imm < 4;}]> {
3610   let ParserMatchClass = neon_uimm2_asmoperand;
3611   let PrintMethod = "printUImmBareOperand";
3612 }
3613
3614 def neon_uimm3_bare : Operand<i64>,
3615                         ImmLeaf<i64, [{return Imm < 8;}]> {
3616   let ParserMatchClass = uimm3_asmoperand;
3617   let PrintMethod = "printUImmBareOperand";
3618 }
3619
3620 def neon_uimm4_bare : Operand<i64>,
3621                         ImmLeaf<i64, [{return Imm < 16;}]> {
3622   let ParserMatchClass = uimm4_asmoperand;
3623   let PrintMethod = "printUImmBareOperand";
3624 }
3625
3626 class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3627                     RegisterOperand VecList, string asmop>
3628     : NeonI_LdOne_Dup<q, r, opcode, size,
3629                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
3630                       asmop # "\t$Rt, [$Rn]",
3631                       [],
3632                       NoItinerary> {
3633   let mayLoad = 1;
3634   let neverHasSideEffects = 1;
3635 }
3636
3637 multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
3638   def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
3639                           !cast<RegisterOperand>(List # "8B_operand"), asmop>;
3640
3641   def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
3642                           !cast<RegisterOperand>(List # "4H_operand"), asmop>;
3643
3644   def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
3645                           !cast<RegisterOperand>(List # "2S_operand"), asmop>;
3646
3647   def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
3648                           !cast<RegisterOperand>(List # "1D_operand"), asmop>;
3649
3650   def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
3651                            !cast<RegisterOperand>(List # "16B_operand"), asmop>;
3652
3653   def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
3654                           !cast<RegisterOperand>(List # "8H_operand"), asmop>;
3655
3656   def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
3657                           !cast<RegisterOperand>(List # "4S_operand"), asmop>;
3658
3659   def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
3660                           !cast<RegisterOperand>(List # "2D_operand"), asmop>;
3661 }
3662
3663 // Load single 1-element structure to all lanes of 1 register
3664 defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
3665
3666 // Load single N-element structure to all lanes of N consecutive
3667 // registers (N = 2,3,4)
3668 defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
3669 defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
3670 defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
3671
3672
3673 class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3674                     Instruction INST>
3675     : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
3676           (VTy (INST GPR64xsp:$Rn))>;
3677
3678 // Match all LD1R instructions
3679 def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
3680
3681 def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
3682
3683 def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
3684
3685 def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
3686
3687 def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
3688 def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
3689
3690 def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
3691 def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
3692
3693 def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
3694 def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
3695
3696 class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
3697                        Instruction INST>
3698   : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
3699         (VTy (INST GPR64xsp:$Rn))>;
3700
3701 def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
3702 def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
3703
3704 multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
3705                                 RegisterClass RegList> {
3706   defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
3707   defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
3708   defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
3709   defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
3710 }
3711
3712 // Special vector list operand of 128-bit vectors with bare layout.
3713 // i.e. only show ".b", ".h", ".s", ".d"
3714 defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
3715 defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
3716 defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
3717 defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
3718
3719 class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3720                      Operand ImmOp, string asmop>
3721     : NeonI_LdStOne_Lane<1, r, op2_1, op0,
3722                          (outs VList:$Rt),
3723                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
3724                          asmop # "\t$Rt[$lane], [$Rn]",
3725                          [],
3726                          NoItinerary> {
3727   let mayLoad = 1;
3728   let neverHasSideEffects = 1;
3729   let hasExtraDefRegAllocReq = 1;
3730   let Constraints = "$src = $Rt";
3731 }
3732
3733 multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3734   def _B : NeonI_LDN_Lane<r, 0b00, op0,
3735                           !cast<RegisterOperand>(List # "B_operand"),
3736                           neon_uimm4_bare, asmop> {
3737     let Inst{12-10} = lane{2-0};
3738     let Inst{30} = lane{3};
3739   }
3740
3741   def _H : NeonI_LDN_Lane<r, 0b01, op0,
3742                           !cast<RegisterOperand>(List # "H_operand"),
3743                           neon_uimm3_bare, asmop> {
3744     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3745     let Inst{30} = lane{2};
3746   }
3747
3748   def _S : NeonI_LDN_Lane<r, 0b10, op0,
3749                           !cast<RegisterOperand>(List # "S_operand"),
3750                           neon_uimm2_bare, asmop> {
3751     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3752     let Inst{30} = lane{1};
3753   }
3754
3755   def _D : NeonI_LDN_Lane<r, 0b10, op0,
3756                           !cast<RegisterOperand>(List # "D_operand"),
3757                           neon_uimm1_bare, asmop> {
3758     let Inst{12-10} = 0b001;
3759     let Inst{30} = lane{0};
3760   }
3761 }
3762
3763 // Load single 1-element structure to one lane of 1 register.
3764 defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
3765
3766 // Load single N-element structure to one lane of N consecutive registers
3767 // (N = 2,3,4)
3768 defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
3769 defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
3770 defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
3771
3772 multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3773                           Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
3774                           Instruction INST> {
3775   def : Pat<(VTy (vector_insert (VTy VPR64:$src),
3776                      (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
3777             (VTy (EXTRACT_SUBREG
3778                      (INST GPR64xsp:$Rn,
3779                            (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
3780                            ImmOp:$lane),
3781                      sub_64))>;
3782
3783   def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
3784                       (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
3785             (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
3786 }
3787
3788 // Match all LD1LN instructions
3789 defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3790                       extloadi8, LD1LN_B>;
3791
3792 defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3793                       extloadi16, LD1LN_H>;
3794
3795 defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3796                       load, LD1LN_S>;
3797 defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3798                       load, LD1LN_S>;
3799
3800 defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3801                       load, LD1LN_D>;
3802 defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3803                       load, LD1LN_D>;
3804
3805 class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3806                      Operand ImmOp, string asmop>
3807     : NeonI_LdStOne_Lane<0, r, op2_1, op0,
3808                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
3809                          asmop # "\t$Rt[$lane], [$Rn]",
3810                          [],
3811                          NoItinerary> {
3812   let mayStore = 1;
3813   let neverHasSideEffects = 1;
3814   let hasExtraDefRegAllocReq = 1;
3815 }
3816
3817 multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
3818   def _B : NeonI_STN_Lane<r, 0b00, op0,
3819                           !cast<RegisterOperand>(List # "B_operand"),
3820                           neon_uimm4_bare, asmop> {
3821     let Inst{12-10} = lane{2-0};
3822     let Inst{30} = lane{3};
3823   }
3824
3825   def _H : NeonI_STN_Lane<r, 0b01, op0,
3826                           !cast<RegisterOperand>(List # "H_operand"),
3827                           neon_uimm3_bare, asmop> {
3828     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
3829     let Inst{30} = lane{2};
3830   }
3831
3832   def _S : NeonI_STN_Lane<r, 0b10, op0,
3833                           !cast<RegisterOperand>(List # "S_operand"),
3834                            neon_uimm2_bare, asmop> {
3835     let Inst{12-10} = {lane{0}, 0b0, 0b0};
3836     let Inst{30} = lane{1};
3837   }
3838
3839   def _D : NeonI_STN_Lane<r, 0b10, op0,
3840                           !cast<RegisterOperand>(List # "D_operand"),
3841                           neon_uimm1_bare, asmop>{
3842     let Inst{12-10} = 0b001;
3843     let Inst{30} = lane{0};
3844   }
3845 }
3846
3847 // Store single 1-element structure from one lane of 1 register.
3848 defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
3849
3850 // Store single N-element structure from one lane of N consecutive registers
3851 // (N = 2,3,4)
3852 defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
3853 defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
3854 defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
3855
3856 multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
3857                           Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
3858                           Instruction INST> {
3859   def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
3860                      GPR64xsp:$Rn),
3861             (INST GPR64xsp:$Rn,
3862                   (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
3863                   ImmOp:$lane)>;
3864
3865   def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
3866                      GPR64xsp:$Rn),
3867             (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
3868 }
3869
3870 // Match all ST1LN instructions
3871 defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
3872                       truncstorei8, ST1LN_B>;
3873
3874 defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
3875                       truncstorei16, ST1LN_H>;
3876
3877 defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
3878                       store, ST1LN_S>;
3879 defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
3880                       store, ST1LN_S>;
3881
3882 defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
3883                       store, ST1LN_D>;
3884 defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
3885                       store, ST1LN_D>;
3886
3887 // End of vector load/store single N-element structure (class SIMD lsone).
3888
3889
3890 // The following are post-index load/store single N-element instructions
3891 // (class SIMD lsone-post)
3892
3893 multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
3894                             RegisterOperand VecList, Operand ImmTy,
3895                             string asmop> {
3896   let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
3897   DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3898     def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3899                       (outs VecList:$Rt, GPR64xsp:$wb),
3900                       (ins GPR64xsp:$Rn, ImmTy:$amt),
3901                       asmop # "\t$Rt, [$Rn], $amt",
3902                       [],
3903                       NoItinerary> {
3904                         let Rm = 0b11111;
3905                       }
3906
3907     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
3908                       (outs VecList:$Rt, GPR64xsp:$wb),
3909                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
3910                       asmop # "\t$Rt, [$Rn], $Rm",
3911                       [],
3912                       NoItinerary>;
3913   }
3914 }
3915
3916 multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
3917                          Operand uimm_b, Operand uimm_h,
3918                          Operand uimm_s, Operand uimm_d> {
3919   defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
3920                               !cast<RegisterOperand>(List # "8B_operand"),
3921                               uimm_b, asmop>;
3922
3923   defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
3924                               !cast<RegisterOperand>(List # "4H_operand"),
3925                               uimm_h, asmop>;
3926
3927   defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
3928                               !cast<RegisterOperand>(List # "2S_operand"),
3929                               uimm_s, asmop>;
3930
3931   defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
3932                               !cast<RegisterOperand>(List # "1D_operand"),
3933                               uimm_d, asmop>;
3934
3935   defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
3936                                !cast<RegisterOperand>(List # "16B_operand"),
3937                                uimm_b, asmop>;
3938
3939   defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
3940                               !cast<RegisterOperand>(List # "8H_operand"),
3941                               uimm_h, asmop>;
3942
3943   defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
3944                               !cast<RegisterOperand>(List # "4S_operand"),
3945                               uimm_s, asmop>;
3946
3947   defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
3948                               !cast<RegisterOperand>(List # "2D_operand"),
3949                               uimm_d, asmop>;
3950 }
3951
3952 // Post-index load single 1-element structure to all lanes of 1 register
3953 defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
3954                              uimm_exact2, uimm_exact4, uimm_exact8>;
3955
3956 // Post-index load single N-element structure to all lanes of N consecutive
3957 // registers (N = 2,3,4)
3958 defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
3959                              uimm_exact4, uimm_exact8, uimm_exact16>;
3960 defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
3961                              uimm_exact6, uimm_exact12, uimm_exact24>;
3962 defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
3963                              uimm_exact8, uimm_exact16, uimm_exact32>;
3964
3965 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
3966     Constraints = "$Rn = $wb, $Rt = $src",
3967     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
3968   class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3969                                 Operand ImmTy, Operand ImmOp, string asmop>
3970       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3971                                 (outs VList:$Rt, GPR64xsp:$wb),
3972                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
3973                                     VList:$src, ImmOp:$lane),
3974                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
3975                                 [],
3976                                 NoItinerary> {
3977     let Rm = 0b11111;
3978   }
3979
3980   class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
3981                                  Operand ImmTy, Operand ImmOp, string asmop>
3982       : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
3983                                 (outs VList:$Rt, GPR64xsp:$wb),
3984                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
3985                                     VList:$src, ImmOp:$lane),
3986                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
3987                                 [],
3988                                 NoItinerary>;
3989 }
3990
3991 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
3992                            Operand uimm_b, Operand uimm_h,
3993                            Operand uimm_s, Operand uimm_d> {
3994   def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
3995                                !cast<RegisterOperand>(List # "B_operand"),
3996                                uimm_b, neon_uimm4_bare, asmop> {
3997     let Inst{12-10} = lane{2-0};
3998     let Inst{30} = lane{3};
3999   }
4000
4001   def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
4002                                    !cast<RegisterOperand>(List # "B_operand"),
4003                                    uimm_b, neon_uimm4_bare, asmop> {
4004     let Inst{12-10} = lane{2-0};
4005     let Inst{30} = lane{3};
4006   }
4007
4008   def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
4009                                !cast<RegisterOperand>(List # "H_operand"),
4010                                uimm_h, neon_uimm3_bare, asmop> {
4011     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4012     let Inst{30} = lane{2};
4013   }
4014
4015   def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
4016                                    !cast<RegisterOperand>(List # "H_operand"),
4017                                    uimm_h, neon_uimm3_bare, asmop> {
4018     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4019     let Inst{30} = lane{2};
4020   }
4021
4022   def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4023                                !cast<RegisterOperand>(List # "S_operand"),
4024                                uimm_s, neon_uimm2_bare, asmop> {
4025     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4026     let Inst{30} = lane{1};
4027   }
4028
4029   def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
4030                                    !cast<RegisterOperand>(List # "S_operand"),
4031                                    uimm_s, neon_uimm2_bare, asmop> {
4032     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4033     let Inst{30} = lane{1};
4034   }
4035
4036   def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
4037                                !cast<RegisterOperand>(List # "D_operand"),
4038                                uimm_d, neon_uimm1_bare, asmop> {
4039     let Inst{12-10} = 0b001;
4040     let Inst{30} = lane{0};
4041   }
4042
4043   def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
4044                                    !cast<RegisterOperand>(List # "D_operand"),
4045                                    uimm_d, neon_uimm1_bare, asmop> {
4046     let Inst{12-10} = 0b001;
4047     let Inst{30} = lane{0};
4048   }
4049 }
4050
4051 // Post-index load single 1-element structure to one lane of 1 register.
4052 defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
4053                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4054
4055 // Post-index load single N-element structure to one lane of N consecutive
4056 // registers
4057 // (N = 2,3,4)
4058 defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
4059                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4060 defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
4061                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4062 defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
4063                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4064
4065 let mayStore = 1, neverHasSideEffects = 1,
4066     hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
4067     DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
4068   class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4069                       Operand ImmTy, Operand ImmOp, string asmop>
4070       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4071                                 (outs GPR64xsp:$wb),
4072                                 (ins GPR64xsp:$Rn, ImmTy:$amt,
4073                                     VList:$Rt, ImmOp:$lane),
4074                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
4075                                 [],
4076                                 NoItinerary> {
4077     let Rm = 0b11111;
4078   }
4079
4080   class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
4081                        Operand ImmTy, Operand ImmOp, string asmop>
4082       : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
4083                                 (outs GPR64xsp:$wb),
4084                                 (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
4085                                     ImmOp:$lane),
4086                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
4087                                 [],
4088                                 NoItinerary>;
4089 }
4090
4091 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
4092                            Operand uimm_b, Operand uimm_h,
4093                            Operand uimm_s, Operand uimm_d> {
4094   def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
4095                                !cast<RegisterOperand>(List # "B_operand"),
4096                                uimm_b, neon_uimm4_bare, asmop> {
4097     let Inst{12-10} = lane{2-0};
4098     let Inst{30} = lane{3};
4099   }
4100
4101   def _B_register : STN_WBReg_Lane<r, 0b00, op0,
4102                                    !cast<RegisterOperand>(List # "B_operand"),
4103                                    uimm_b, neon_uimm4_bare, asmop> {
4104     let Inst{12-10} = lane{2-0};
4105     let Inst{30} = lane{3};
4106   }
4107
4108   def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
4109                                !cast<RegisterOperand>(List # "H_operand"),
4110                                uimm_h, neon_uimm3_bare, asmop> {
4111     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4112     let Inst{30} = lane{2};
4113   }
4114
4115   def _H_register : STN_WBReg_Lane<r, 0b01, op0,
4116                                    !cast<RegisterOperand>(List # "H_operand"),
4117                                    uimm_h, neon_uimm3_bare, asmop> {
4118     let Inst{12-10} = {lane{1}, lane{0}, 0b0};
4119     let Inst{30} = lane{2};
4120   }
4121
4122   def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
4123                                !cast<RegisterOperand>(List # "S_operand"),
4124                                uimm_s, neon_uimm2_bare, asmop> {
4125     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4126     let Inst{30} = lane{1};
4127   }
4128
4129   def _S_register : STN_WBReg_Lane<r, 0b10, op0,
4130                                    !cast<RegisterOperand>(List # "S_operand"),
4131                                    uimm_s, neon_uimm2_bare, asmop> {
4132     let Inst{12-10} = {lane{0}, 0b0, 0b0};
4133     let Inst{30} = lane{1};
4134   }
4135
4136   def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
4137                                !cast<RegisterOperand>(List # "D_operand"),
4138                                uimm_d, neon_uimm1_bare, asmop> {
4139     let Inst{12-10} = 0b001;
4140     let Inst{30} = lane{0};
4141   }
4142
4143   def _D_register : STN_WBReg_Lane<r, 0b10, op0,
4144                                    !cast<RegisterOperand>(List # "D_operand"),
4145                                    uimm_d, neon_uimm1_bare, asmop> {
4146     let Inst{12-10} = 0b001;
4147     let Inst{30} = lane{0};
4148   }
4149 }
4150
4151 // Post-index store single 1-element structure from one lane of 1 register.
4152 defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
4153                                 uimm_exact2, uimm_exact4, uimm_exact8>;
4154
4155 // Post-index store single N-element structure from one lane of N consecutive
4156 // registers (N = 2,3,4)
4157 defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
4158                                 uimm_exact4, uimm_exact8, uimm_exact16>;
4159 defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
4160                                 uimm_exact6, uimm_exact12, uimm_exact24>;
4161 defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
4162                                 uimm_exact8, uimm_exact16, uimm_exact32>;
4163
4164 // End of post-index load/store single N-element instructions
4165 // (class SIMD lsone-post)
4166
4167 // Neon Scalar instructions implementation
4168 // Scalar Three Same
4169
4170 class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4171                              RegisterClass FPRC>
4172   : NeonI_Scalar3Same<u, size, opcode,
4173                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
4174                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4175                       [],
4176                       NoItinerary>;
4177
4178 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
4179   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4180
4181 multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
4182                                       bit Commutable = 0> {
4183   let isCommutable = Commutable in {
4184     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4185     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4186   }
4187 }
4188
4189 multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
4190                                       string asmop, bit Commutable = 0> {
4191   let isCommutable = Commutable in {
4192     def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
4193     def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
4194   }
4195 }
4196
4197 multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
4198                                         string asmop, bit Commutable = 0> {
4199   let isCommutable = Commutable in {
4200     def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
4201     def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
4202     def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
4203     def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
4204   }
4205 }
4206
4207 multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
4208                                             Instruction INSTD> {
4209   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
4210             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4211 }
4212
4213 multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
4214                                                Instruction INSTB,
4215                                                Instruction INSTH,
4216                                                Instruction INSTS,
4217                                                Instruction INSTD>
4218   : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
4219   def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
4220            (INSTB FPR8:$Rn, FPR8:$Rm)>;
4221   def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4222            (INSTH FPR16:$Rn, FPR16:$Rm)>;
4223   def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4224            (INSTS FPR32:$Rn, FPR32:$Rm)>;
4225 }
4226
4227 multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
4228                                              Instruction INSTH,
4229                                              Instruction INSTS> {
4230   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4231             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4232   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4233             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4234 }
4235
4236 multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
4237                                              ValueType SResTy, ValueType STy,
4238                                              Instruction INSTS, ValueType DResTy,
4239                                              ValueType DTy, Instruction INSTD> {
4240   def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
4241             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4242   def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
4243             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4244 }
4245
4246 class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
4247                                               Instruction INSTD>
4248   : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
4249         (INSTD FPR64:$Rn, FPR64:$Rm)>;
4250
4251 // Scalar Three Different
4252
4253 class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
4254                              RegisterClass FPRCD, RegisterClass FPRCS>
4255   : NeonI_Scalar3Diff<u, size, opcode,
4256                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
4257                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4258                       [],
4259                       NoItinerary>;
4260
4261 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
4262   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
4263   def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
4264 }
4265
4266 multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
4267   let Constraints = "$Src = $Rd" in {
4268     def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
4269                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
4270                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4271                        [],
4272                        NoItinerary>;
4273     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
4274                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
4275                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
4276                        [],
4277                        NoItinerary>;
4278   }
4279 }
4280
4281 multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
4282                                              Instruction INSTH,
4283                                              Instruction INSTS> {
4284   def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4285             (INSTH FPR16:$Rn, FPR16:$Rm)>;
4286   def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4287             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4288 }
4289
4290 multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
4291                                              Instruction INSTH,
4292                                              Instruction INSTS> {
4293   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
4294             (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
4295   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
4296             (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
4297 }
4298
4299 // Scalar Two Registers Miscellaneous
4300
4301 class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
4302                              RegisterClass FPRCD, RegisterClass FPRCS>
4303   : NeonI_Scalar2SameMisc<u, size, opcode,
4304                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
4305                           !strconcat(asmop, "\t$Rd, $Rn"),
4306                           [],
4307                           NoItinerary>;
4308
4309 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
4310                                          string asmop> {
4311   def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
4312                                       FPR32>;
4313   def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
4314                                       FPR64>;
4315 }
4316
4317 multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
4318   def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
4319 }
4320
4321 multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
4322   : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
4323   def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
4324   def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
4325   def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
4326 }
4327
4328 class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
4329   : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
4330
4331 multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
4332                                                  string asmop> {
4333   def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
4334   def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
4335   def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
4336 }
4337
4338 class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
4339                                        string asmop, RegisterClass FPRC>
4340   : NeonI_Scalar2SameMisc<u, size, opcode,
4341                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
4342                           !strconcat(asmop, "\t$Rd, $Rn"),
4343                           [],
4344                           NoItinerary>;
4345
4346 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
4347                                                  string asmop> {
4348
4349   let Constraints = "$Src = $Rd" in {
4350     def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
4351     def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
4352     def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
4353     def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
4354   }
4355 }
4356
4357 class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
4358                                                   Instruction INSTD>
4359   : Pat<(f32 (opnode (f64 FPR64:$Rn))),
4360         (INSTD FPR64:$Rn)>;
4361
4362 multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
4363                                                       Instruction INSTS,
4364                                                       Instruction INSTD> {
4365   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
4366             (INSTS FPR32:$Rn)>;
4367   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
4368             (INSTD FPR64:$Rn)>;
4369 }
4370
4371 class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
4372                                                 Instruction INSTD>
4373   : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
4374             (INSTD FPR64:$Rn)>;
4375
4376 multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
4377                                                      Instruction INSTS,
4378                                                      Instruction INSTD> {
4379   def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
4380             (INSTS FPR32:$Rn)>;
4381   def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
4382             (INSTD FPR64:$Rn)>;
4383 }
4384
4385 multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
4386                                                  Instruction INSTS,
4387                                                  Instruction INSTD> {
4388   def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
4389             (INSTS FPR32:$Rn)>;
4390   def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
4391             (INSTD FPR64:$Rn)>;
4392 }
4393
4394 class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
4395                                               Instruction INSTD>
4396   : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
4397         (INSTD FPR64:$Rn)>;
4398
4399 class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
4400   : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4401                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
4402                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4403                           [],
4404                           NoItinerary>;
4405
4406 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
4407                                               string asmop> {
4408   def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
4409                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
4410                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4411                            [],
4412                            NoItinerary>;
4413   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
4414                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
4415                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
4416                            [],
4417                            NoItinerary>;
4418 }
4419
4420 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
4421                                                 Instruction INSTD>
4422   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4423                        (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
4424         (INSTD FPR64:$Rn, 0)>;
4425
4426 class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
4427                                                    Instruction INSTD>
4428   : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
4429                           (i32 neon_uimm0:$Imm), CC)),
4430         (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
4431
4432 multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
4433                                                       CondCode CC,
4434                                                       Instruction INSTS,
4435                                                       Instruction INSTD> {
4436   def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpz32:$FPImm))),
4437             (INSTS FPR32:$Rn, fpz32:$FPImm)>;
4438   def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpz32:$FPImm))),
4439             (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4440   def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), CC)),
4441             (INSTD FPR64:$Rn, fpz32:$FPImm)>;
4442 }
4443
4444 multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
4445                                                 Instruction INSTD> {
4446   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
4447             (INSTD FPR64:$Rn)>;
4448 }
4449
4450 multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
4451                                                    Instruction INSTB,
4452                                                    Instruction INSTH,
4453                                                    Instruction INSTS,
4454                                                    Instruction INSTD>
4455   : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
4456   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
4457             (INSTB FPR8:$Rn)>;
4458   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
4459             (INSTH FPR16:$Rn)>;
4460   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
4461             (INSTS FPR32:$Rn)>;
4462 }
4463
4464 multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
4465                                                        SDPatternOperator opnode,
4466                                                        Instruction INSTH,
4467                                                        Instruction INSTS,
4468                                                        Instruction INSTD> {
4469   def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
4470             (INSTH FPR16:$Rn)>;
4471   def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
4472             (INSTS FPR32:$Rn)>;
4473   def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
4474             (INSTD FPR64:$Rn)>;
4475
4476 }
4477
4478 multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
4479                                                        SDPatternOperator opnode,
4480                                                        Instruction INSTB,
4481                                                        Instruction INSTH,
4482                                                        Instruction INSTS,
4483                                                        Instruction INSTD> {
4484   def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
4485             (INSTB FPR8:$Src, FPR8:$Rn)>;
4486   def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
4487             (INSTH FPR16:$Src, FPR16:$Rn)>;
4488   def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
4489             (INSTS FPR32:$Src, FPR32:$Rn)>;
4490   def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
4491             (INSTD FPR64:$Src, FPR64:$Rn)>;
4492 }
4493
4494 // Scalar Shift By Immediate
4495
4496 class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
4497                                 RegisterClass FPRC, Operand ImmTy>
4498   : NeonI_ScalarShiftImm<u, opcode,
4499                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
4500                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4501                          [], NoItinerary>;
4502
4503 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
4504                                             string asmop> {
4505   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4506     bits<6> Imm;
4507     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4508     let Inst{21-16} = Imm;
4509   }
4510 }
4511
4512 multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
4513                                                string asmop>
4514   : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
4515   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
4516     bits<3> Imm;
4517     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4518     let Inst{18-16} = Imm;
4519   }
4520   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
4521     bits<4> Imm;
4522     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4523     let Inst{19-16} = Imm;
4524   }
4525   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4526     bits<5> Imm;
4527     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4528     let Inst{20-16} = Imm;
4529   }
4530 }
4531
4532 multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
4533                                             string asmop> {
4534   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
4535     bits<6> Imm;
4536     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4537     let Inst{21-16} = Imm;
4538   }
4539 }
4540
4541 multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
4542                                               string asmop>
4543   : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
4544   def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
4545     bits<3> Imm;
4546     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4547     let Inst{18-16} = Imm;
4548   }
4549   def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
4550     bits<4> Imm;
4551     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4552     let Inst{19-16} = Imm;
4553   }
4554   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
4555     bits<5> Imm;
4556     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4557     let Inst{20-16} = Imm;
4558   }
4559 }
4560
4561 class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4562   : NeonI_ScalarShiftImm<u, opcode,
4563                          (outs FPR64:$Rd),
4564                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
4565                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4566                          [], NoItinerary> {
4567     bits<6> Imm;
4568     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4569     let Inst{21-16} = Imm;
4570     let Constraints = "$Src = $Rd";
4571 }
4572
4573 class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
4574   : NeonI_ScalarShiftImm<u, opcode,
4575                          (outs FPR64:$Rd),
4576                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
4577                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4578                          [], NoItinerary> {
4579     bits<6> Imm;
4580     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4581     let Inst{21-16} = Imm;
4582     let Constraints = "$Src = $Rd";
4583 }
4584
4585 class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
4586                                        RegisterClass FPRCD, RegisterClass FPRCS,
4587                                        Operand ImmTy>
4588   : NeonI_ScalarShiftImm<u, opcode,
4589                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
4590                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
4591                          [], NoItinerary>;
4592
4593 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
4594                                                 string asmop> {
4595   def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
4596                                              shr_imm8> {
4597     bits<3> Imm;
4598     let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
4599     let Inst{18-16} = Imm;
4600   }
4601   def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
4602                                              shr_imm16> {
4603     bits<4> Imm;
4604     let Inst{22-20} = 0b001; // immh:immb = 001xxxx
4605     let Inst{19-16} = Imm;
4606   }
4607   def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
4608                                              shr_imm32> {
4609     bits<5> Imm;
4610     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4611     let Inst{20-16} = Imm;
4612   }
4613 }
4614
4615 multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
4616   def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
4617     bits<5> Imm;
4618     let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
4619     let Inst{20-16} = Imm;
4620   }
4621   def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
4622     bits<6> Imm;
4623     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
4624     let Inst{21-16} = Imm;
4625   }
4626 }
4627
4628 multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
4629                                                Instruction INSTD> {
4630   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4631                 (INSTD FPR64:$Rn, imm:$Imm)>;
4632 }
4633
4634 multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
4635                                                Instruction INSTD> {
4636   def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
4637                 (INSTD FPR64:$Rn, imm:$Imm)>;
4638 }
4639
4640 class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
4641                                              Instruction INSTD>
4642   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4643             (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
4644         (INSTD FPR64:$Rn, imm:$Imm)>;
4645
4646 class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
4647                                              Instruction INSTD>
4648   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
4649             (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
4650         (INSTD FPR64:$Rn, imm:$Imm)>;
4651
4652 multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
4653                                                    Instruction INSTB,
4654                                                    Instruction INSTH,
4655                                                    Instruction INSTS,
4656                                                    Instruction INSTD>
4657   : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
4658   def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
4659                 (INSTB FPR8:$Rn, imm:$Imm)>;
4660   def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
4661                 (INSTH FPR16:$Rn, imm:$Imm)>;
4662   def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
4663                 (INSTS FPR32:$Rn, imm:$Imm)>;
4664 }
4665
4666 class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
4667                                                 Instruction INSTD>
4668   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4669             (i32 shl_imm64:$Imm))),
4670         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4671
4672 class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
4673                                                 Instruction INSTD>
4674   : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
4675             (i32 shr_imm64:$Imm))),
4676         (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
4677
4678 multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
4679                                                        SDPatternOperator opnode,
4680                                                        Instruction INSTH,
4681                                                        Instruction INSTS,
4682                                                        Instruction INSTD> {
4683   def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
4684                 (INSTH FPR16:$Rn, imm:$Imm)>;
4685   def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4686                 (INSTS FPR32:$Rn, imm:$Imm)>;
4687   def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4688                 (INSTD FPR64:$Rn, imm:$Imm)>;
4689 }
4690
4691 multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
4692                                                       Instruction INSTS,
4693                                                       Instruction INSTD> {
4694   def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4695                 (INSTS FPR32:$Rn, imm:$Imm)>;
4696   def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4697                 (INSTD FPR64:$Rn, imm:$Imm)>;
4698 }
4699
4700 multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
4701                                                       Instruction INSTS,
4702                                                       Instruction INSTD> {
4703   def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
4704                 (INSTS FPR32:$Rn, imm:$Imm)>;
4705   def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4706                 (INSTD FPR64:$Rn, imm:$Imm)>;
4707 }
4708
4709 // Scalar Signed Shift Right (Immediate)
4710 defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
4711 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
4712 // Pattern to match llvm.arm.* intrinsic.
4713 def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
4714
4715 // Scalar Unsigned Shift Right (Immediate)
4716 defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
4717 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
4718 // Pattern to match llvm.arm.* intrinsic.
4719 def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
4720
4721 // Scalar Signed Rounding Shift Right (Immediate)
4722 defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
4723 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
4724
4725 // Scalar Unigned Rounding Shift Right (Immediate)
4726 defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
4727 defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
4728
4729 // Scalar Signed Shift Right and Accumulate (Immediate)
4730 def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
4731 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4732           <int_aarch64_neon_vsrads_n, SSRA>;
4733
4734 // Scalar Unsigned Shift Right and Accumulate (Immediate)
4735 def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
4736 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4737           <int_aarch64_neon_vsradu_n, USRA>;
4738
4739 // Scalar Signed Rounding Shift Right and Accumulate (Immediate)
4740 def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
4741 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4742           <int_aarch64_neon_vrsrads_n, SRSRA>;
4743
4744 // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
4745 def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
4746 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4747           <int_aarch64_neon_vrsradu_n, URSRA>;
4748
4749 // Scalar Shift Left (Immediate)
4750 defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
4751 defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
4752 // Pattern to match llvm.arm.* intrinsic.
4753 def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
4754
4755 // Signed Saturating Shift Left (Immediate)
4756 defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
4757 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
4758                                                SQSHLbbi, SQSHLhhi,
4759                                                SQSHLssi, SQSHLddi>;
4760 // Pattern to match llvm.arm.* intrinsic.
4761 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
4762
4763 // Unsigned Saturating Shift Left (Immediate)
4764 defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
4765 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
4766                                                UQSHLbbi, UQSHLhhi,
4767                                                UQSHLssi, UQSHLddi>;
4768 // Pattern to match llvm.arm.* intrinsic.
4769 defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
4770
4771 // Signed Saturating Shift Left Unsigned (Immediate)
4772 defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
4773 defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
4774                                                SQSHLUbbi, SQSHLUhhi,
4775                                                SQSHLUssi, SQSHLUddi>;
4776
4777 // Shift Right And Insert (Immediate)
4778 def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
4779 def : Neon_ScalarShiftRImm_accum_D_size_patterns
4780           <int_aarch64_neon_vsri, SRI>;
4781
4782 // Shift Left And Insert (Immediate)
4783 def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
4784 def : Neon_ScalarShiftLImm_accum_D_size_patterns
4785           <int_aarch64_neon_vsli, SLI>;
4786
4787 // Signed Saturating Shift Right Narrow (Immediate)
4788 defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
4789 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
4790                                                     SQSHRNbhi, SQSHRNhsi,
4791                                                     SQSHRNsdi>;
4792
4793 // Unsigned Saturating Shift Right Narrow (Immediate)
4794 defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
4795 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
4796                                                     UQSHRNbhi, UQSHRNhsi,
4797                                                     UQSHRNsdi>;
4798
4799 // Signed Saturating Rounded Shift Right Narrow (Immediate)
4800 defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
4801 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
4802                                                     SQRSHRNbhi, SQRSHRNhsi,
4803                                                     SQRSHRNsdi>;
4804
4805 // Unsigned Saturating Rounded Shift Right Narrow (Immediate)
4806 defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
4807 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
4808                                                     UQRSHRNbhi, UQRSHRNhsi,
4809                                                     UQRSHRNsdi>;
4810
4811 // Signed Saturating Shift Right Unsigned Narrow (Immediate)
4812 defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
4813 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
4814                                                     SQSHRUNbhi, SQSHRUNhsi,
4815                                                     SQSHRUNsdi>;
4816
4817 // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
4818 defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
4819 defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
4820                                                     SQRSHRUNbhi, SQRSHRUNhsi,
4821                                                     SQRSHRUNsdi>;
4822
4823 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
4824 defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
4825 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
4826                                                   SCVTF_Nssi, SCVTF_Nddi>;
4827
4828 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
4829 defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
4830 defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
4831                                                   UCVTF_Nssi, UCVTF_Nddi>;
4832
4833 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
4834 defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
4835 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
4836                                                   FCVTZS_Nssi, FCVTZS_Nddi>;
4837
4838 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
4839 defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
4840 defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
4841                                                   FCVTZU_Nssi, FCVTZU_Nddi>;
4842
4843 // Patterns For Convert Instructions Between v1f64 and v1i64
4844 class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
4845                                              Instruction INST>
4846     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4847           (INST FPR64:$Rn, imm:$Imm)>;
4848
4849 class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
4850                                              Instruction INST>
4851     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
4852           (INST FPR64:$Rn, imm:$Imm)>;
4853
4854 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
4855                                              SCVTF_Nddi>;
4856
4857 def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
4858                                              UCVTF_Nddi>;
4859
4860 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
4861                                              FCVTZS_Nddi>;
4862
4863 def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
4864                                              FCVTZU_Nddi>;
4865
4866 // Scalar Integer Add
4867 let isCommutable = 1 in {
4868 def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
4869 }
4870
4871 // Scalar Integer Sub
4872 def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
4873
4874 // Pattern for Scalar Integer Add and Sub with D register only
4875 defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
4876 defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
4877
4878 // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
4879 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
4880 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
4881 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
4882 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
4883
4884 // Scalar Integer Saturating Add (Signed, Unsigned)
4885 defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
4886 defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
4887
4888 // Scalar Integer Saturating Sub (Signed, Unsigned)
4889 defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
4890 defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
4891
4892
4893 // Patterns to match llvm.aarch64.* intrinsic for
4894 // Scalar Integer Saturating Add, Sub  (Signed, Unsigned)
4895 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
4896                                            SQADDhhh, SQADDsss, SQADDddd>;
4897 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
4898                                            UQADDhhh, UQADDsss, UQADDddd>;
4899 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
4900                                            SQSUBhhh, SQSUBsss, SQSUBddd>;
4901 defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
4902                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
4903
4904 // Scalar Integer Saturating Doubling Multiply Half High
4905 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
4906
4907 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4908 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
4909
4910 // Patterns to match llvm.arm.* intrinsic for
4911 // Scalar Integer Saturating Doubling Multiply Half High and
4912 // Scalar Integer Saturating Rounding Doubling Multiply Half High
4913 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
4914                                                                SQDMULHsss>;
4915 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
4916                                                                 SQRDMULHsss>;
4917
4918 // Scalar Floating-point Multiply Extended
4919 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
4920
4921 // Scalar Floating-point Reciprocal Step
4922 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
4923 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
4924                                          FRECPSsss, f64, f64, FRECPSddd>;
4925 def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4926           (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
4927
4928 // Scalar Floating-point Reciprocal Square Root Step
4929 defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
4930 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
4931                                          FRSQRTSsss, f64, f64, FRSQRTSddd>;
4932 def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4933           (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
4934 def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
4935
4936 // Patterns to match llvm.aarch64.* intrinsic for
4937 // Scalar Floating-point Multiply Extended,
4938 multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
4939                                                   Instruction INSTS,
4940                                                   Instruction INSTD> {
4941   def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
4942             (INSTS FPR32:$Rn, FPR32:$Rm)>;
4943   def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
4944             (INSTD FPR64:$Rn, FPR64:$Rm)>;
4945 }
4946
4947 defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
4948                                               FMULXsss, FMULXddd>;
4949 def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4950           (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
4951
4952 // Scalar Integer Shift Left (Signed, Unsigned)
4953 def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
4954 def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
4955
4956 // Patterns to match llvm.arm.* intrinsic for
4957 // Scalar Integer Shift Left (Signed, Unsigned)
4958 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
4959 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
4960
4961 // Patterns to match llvm.aarch64.* intrinsic for
4962 // Scalar Integer Shift Left (Signed, Unsigned)
4963 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
4964 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
4965
4966 // Scalar Integer Saturating Shift Left (Signed, Unsigned)
4967 defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
4968 defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
4969
4970 // Patterns to match llvm.aarch64.* intrinsic for
4971 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4972 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
4973                                            SQSHLhhh, SQSHLsss, SQSHLddd>;
4974 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
4975                                            UQSHLhhh, UQSHLsss, UQSHLddd>;
4976
4977 // Patterns to match llvm.arm.* intrinsic for
4978 // Scalar  Integer Saturating Shift Letf (Signed, Unsigned)
4979 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
4980 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
4981
4982 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4983 def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
4984 def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
4985
4986 // Patterns to match llvm.aarch64.* intrinsic for
4987 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4988 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
4989 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
4990
4991 // Patterns to match llvm.arm.* intrinsic for
4992 // Scalar Integer Rounding Shift Left (Signed, Unsigned)
4993 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
4994 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
4995
4996 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
4997 defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
4998 defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
4999
5000 // Patterns to match llvm.aarch64.* intrinsic for
5001 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5002 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
5003                                            SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
5004 defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
5005                                            UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
5006
5007 // Patterns to match llvm.arm.* intrinsic for
5008 // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
5009 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
5010 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
5011
5012 // Signed Saturating Doubling Multiply-Add Long
5013 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
5014 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
5015                                             SQDMLALshh, SQDMLALdss>;
5016
5017 // Signed Saturating Doubling Multiply-Subtract Long
5018 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
5019 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
5020                                             SQDMLSLshh, SQDMLSLdss>;
5021
5022 // Signed Saturating Doubling Multiply Long
5023 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
5024 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
5025                                          SQDMULLshh, SQDMULLdss>;
5026
5027 // Scalar Signed Integer Convert To Floating-point
5028 defm SCVTF  : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
5029 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
5030                                                  SCVTFss, SCVTFdd>;
5031
5032 // Scalar Unsigned Integer Convert To Floating-point
5033 defm UCVTF  : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
5034 defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
5035                                                  UCVTFss, UCVTFdd>;
5036
5037 // Scalar Floating-point Converts
5038 def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
5039 def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
5040                                                   FCVTXN>;
5041
5042 defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
5043 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
5044                                                   FCVTNSss, FCVTNSdd>;
5045 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
5046
5047 defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
5048 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
5049                                                   FCVTNUss, FCVTNUdd>;
5050 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
5051
5052 defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
5053 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
5054                                                   FCVTMSss, FCVTMSdd>;
5055 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
5056
5057 defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
5058 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
5059                                                   FCVTMUss, FCVTMUdd>;
5060 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
5061
5062 defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
5063 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
5064                                                   FCVTASss, FCVTASdd>;
5065 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
5066
5067 defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
5068 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
5069                                                   FCVTAUss, FCVTAUdd>;
5070 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
5071
5072 defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
5073 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
5074                                                   FCVTPSss, FCVTPSdd>;
5075 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
5076
5077 defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
5078 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
5079                                                   FCVTPUss, FCVTPUdd>;
5080 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
5081
5082 defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
5083 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
5084                                                   FCVTZSss, FCVTZSdd>;
5085 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
5086                                                 FCVTZSdd>;
5087
5088 defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
5089 defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
5090                                                   FCVTZUss, FCVTZUdd>;
5091 def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
5092                                                 FCVTZUdd>;
5093
5094 // Patterns For Convert Instructions Between v1f64 and v1i64
5095 class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
5096                                               Instruction INST>
5097     : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5098
5099 class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
5100                                               Instruction INST>
5101     : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5102
5103 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
5104 def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
5105
5106 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
5107 def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
5108
5109 // Scalar Floating-point Reciprocal Estimate
5110 defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
5111 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
5112                                              FRECPEss, FRECPEdd>;
5113 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
5114                                               FRECPEdd>;
5115
5116 // Scalar Floating-point Reciprocal Exponent
5117 defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
5118 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
5119                                              FRECPXss, FRECPXdd>;
5120
5121 // Scalar Floating-point Reciprocal Square Root Estimate
5122 defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
5123 defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
5124                                                  FRSQRTEss, FRSQRTEdd>;
5125 def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
5126                                               FRSQRTEdd>;
5127
5128 // Scalar Floating-point Round
5129 class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
5130     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5131
5132 def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
5133 def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
5134 def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
5135 def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
5136 def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
5137 def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
5138 def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
5139
5140 // Scalar Integer Compare
5141
5142 // Scalar Compare Bitwise Equal
5143 def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
5144 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
5145
5146 class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
5147                                               Instruction INSTD,
5148                                               CondCode CC>
5149   : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
5150         (INSTD FPR64:$Rn, FPR64:$Rm)>;
5151
5152 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
5153
5154 // Scalar Compare Signed Greather Than Or Equal
5155 def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
5156 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
5157 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
5158
5159 // Scalar Compare Unsigned Higher Or Same
5160 def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
5161 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
5162 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
5163
5164 // Scalar Compare Unsigned Higher
5165 def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
5166 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
5167 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
5168
5169 // Scalar Compare Signed Greater Than
5170 def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
5171 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
5172 def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
5173
5174 // Scalar Compare Bitwise Test Bits
5175 def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
5176 defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
5177 defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
5178
5179 // Scalar Compare Bitwise Equal To Zero
5180 def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
5181 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
5182                                                 CMEQddi>;
5183 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
5184
5185 // Scalar Compare Signed Greather Than Or Equal To Zero
5186 def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
5187 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
5188                                                 CMGEddi>;
5189 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
5190
5191 // Scalar Compare Signed Greater Than Zero
5192 def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
5193 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
5194                                                 CMGTddi>;
5195 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
5196
5197 // Scalar Compare Signed Less Than Or Equal To Zero
5198 def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
5199 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
5200                                                 CMLEddi>;
5201 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
5202
5203 // Scalar Compare Less Than Zero
5204 def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
5205 def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
5206                                                 CMLTddi>;
5207 def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
5208
5209 // Scalar Floating-point Compare
5210
5211 // Scalar Floating-point Compare Mask Equal
5212 defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
5213 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
5214                                          FCMEQsss, v1i64, f64, FCMEQddd>;
5215 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
5216
5217 // Scalar Floating-point Compare Mask Equal To Zero
5218 defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
5219 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
5220                                                   FCMEQZssi, FCMEQZddi>;
5221
5222 // Scalar Floating-point Compare Mask Greater Than Or Equal
5223 defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
5224 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
5225                                          FCMGEsss, v1i64, f64, FCMGEddd>;
5226 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
5227
5228 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
5229 defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
5230 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
5231                                                   FCMGEZssi, FCMGEZddi>;
5232
5233 // Scalar Floating-point Compare Mask Greather Than
5234 defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
5235 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
5236                                          FCMGTsss, v1i64, f64, FCMGTddd>;
5237 def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
5238
5239 // Scalar Floating-point Compare Mask Greather Than Zero
5240 defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
5241 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
5242                                                   FCMGTZssi, FCMGTZddi>;
5243
5244 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
5245 defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
5246 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
5247                                                   FCMLEZssi, FCMLEZddi>;
5248
5249 // Scalar Floating-point Compare Mask Less Than Zero
5250 defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
5251 defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
5252                                                   FCMLTZssi, FCMLTZddi>;
5253
5254 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
5255 defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
5256 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
5257                                          FACGEsss, v1i64, f64, FACGEddd>;
5258 def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5259           (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
5260
5261 // Scalar Floating-point Absolute Compare Mask Greater Than
5262 defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
5263 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
5264                                          FACGTsss, v1i64, f64, FACGTddd>;
5265 def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5266           (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
5267
5268 // Scalar Floating-point Absolute Difference
5269 defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
5270 defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
5271                                          FABDsss, f64, f64, FABDddd>;
5272
5273 // Scalar Absolute Value
5274 defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
5275 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
5276
5277 // Scalar Signed Saturating Absolute Value
5278 defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
5279 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
5280                                                SQABSbb, SQABShh, SQABSss, SQABSdd>;
5281
5282 // Scalar Negate
5283 defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
5284 defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
5285
5286 // Scalar Signed Saturating Negate
5287 defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
5288 defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
5289                                                SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
5290
5291 // Scalar Signed Saturating Accumulated of Unsigned Value
5292 defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
5293 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
5294                                                      SUQADDbb, SUQADDhh,
5295                                                      SUQADDss, SUQADDdd>;
5296
5297 // Scalar Unsigned Saturating Accumulated of Signed Value
5298 defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
5299 defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
5300                                                      USQADDbb, USQADDhh,
5301                                                      USQADDss, USQADDdd>;
5302
5303 def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
5304                                           (v1i64 FPR64:$Rn))),
5305           (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
5306
5307 def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
5308                                           (v1i64 FPR64:$Rn))),
5309           (USQADDdd FPR64:$Src, FPR64:$Rn)>;
5310
5311 def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
5312           (ABSdd FPR64:$Rn)>;
5313
5314 def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
5315           (SQABSdd FPR64:$Rn)>;
5316
5317 def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
5318           (SQNEGdd FPR64:$Rn)>;
5319
5320 def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
5321                       (v1i64 FPR64:$Rn))),
5322           (NEGdd FPR64:$Rn)>;
5323
5324 // Scalar Signed Saturating Extract Unsigned Narrow
5325 defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
5326 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
5327                                                      SQXTUNbh, SQXTUNhs,
5328                                                      SQXTUNsd>;
5329
5330 // Scalar Signed Saturating Extract Narrow
5331 defm SQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
5332 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
5333                                                      SQXTNbh, SQXTNhs,
5334                                                      SQXTNsd>;
5335
5336 // Scalar Unsigned Saturating Extract Narrow
5337 defm UQXTN  : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
5338 defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
5339                                                      UQXTNbh, UQXTNhs,
5340                                                      UQXTNsd>;
5341
5342 // Scalar Reduce Pairwise
5343
5344 multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
5345                                      string asmop, bit Commutable = 0> {
5346   let isCommutable = Commutable in {
5347     def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
5348                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
5349                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
5350                                 [],
5351                                 NoItinerary>;
5352   }
5353 }
5354
5355 multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
5356                                      string asmop, bit Commutable = 0>
5357   : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
5358   let isCommutable = Commutable in {
5359     def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
5360                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
5361                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
5362                                 [],
5363                                 NoItinerary>;
5364   }
5365 }
5366
5367 // Scalar Reduce Addition Pairwise (Integer) with
5368 // Pattern to match llvm.arm.* intrinsic
5369 defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
5370
5371 // Pattern to match llvm.aarch64.* intrinsic for
5372 // Scalar Reduce Addition Pairwise (Integer)
5373 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
5374           (ADDPvv_D_2D VPR128:$Rn)>;
5375 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5376           (ADDPvv_D_2D VPR128:$Rn)>;
5377
5378 // Scalar Reduce Addition Pairwise (Floating Point)
5379 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
5380
5381 // Scalar Reduce Maximum Pairwise (Floating Point)
5382 defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
5383
5384 // Scalar Reduce Minimum Pairwise (Floating Point)
5385 defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
5386
5387 // Scalar Reduce maxNum Pairwise (Floating Point)
5388 defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
5389
5390 // Scalar Reduce minNum Pairwise (Floating Point)
5391 defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
5392
5393 multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
5394                                             Instruction INSTS,
5395                                             Instruction INSTD> {
5396   def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
5397             (INSTS VPR64:$Rn)>;
5398   def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
5399             (INSTD VPR128:$Rn)>;
5400 }
5401
5402 // Patterns to match llvm.aarch64.* intrinsic for
5403 // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
5404 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
5405                                         FADDPvv_S_2S, FADDPvv_D_2D>;
5406
5407 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
5408                                         FMAXPvv_S_2S, FMAXPvv_D_2D>;
5409
5410 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
5411                                         FMINPvv_S_2S, FMINPvv_D_2D>;
5412
5413 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
5414                                         FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5415
5416 defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
5417                                         FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
5418
5419 def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
5420           (FADDPvv_S_2S (v2f32
5421                (EXTRACT_SUBREG
5422                    (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5423                    sub_64)))>;
5424
5425 // Scalar by element Arithmetic
5426
5427 class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
5428                                     string rmlane, bit u, bit szhi, bit szlo,
5429                                     RegisterClass ResFPR, RegisterClass OpFPR,
5430                                     RegisterOperand OpVPR, Operand OpImm>
5431   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5432                              (outs ResFPR:$Rd),
5433                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5434                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5435                              [],
5436                              NoItinerary> {
5437   bits<3> Imm;
5438   bits<5> MRm;
5439 }
5440
5441 class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
5442                                                     string rmlane,
5443                                                     bit u, bit szhi, bit szlo,
5444                                                     RegisterClass ResFPR,
5445                                                     RegisterClass OpFPR,
5446                                                     RegisterOperand OpVPR,
5447                                                     Operand OpImm>
5448   : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
5449                              (outs ResFPR:$Rd),
5450                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
5451                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
5452                              [],
5453                              NoItinerary> {
5454   let Constraints = "$src = $Rd";
5455   bits<3> Imm;
5456   bits<5> MRm;
5457 }
5458
5459 // Scalar Floating Point  multiply (scalar, by element)
5460 def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
5461   0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5462   let Inst{11} = Imm{1}; // h
5463   let Inst{21} = Imm{0}; // l
5464   let Inst{20-16} = MRm;
5465 }
5466 def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
5467   0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5468   let Inst{11} = Imm{0}; // h
5469   let Inst{21} = 0b0;    // l
5470   let Inst{20-16} = MRm;
5471 }
5472
5473 // Scalar Floating Point  multiply extended (scalar, by element)
5474 def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
5475   0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5476   let Inst{11} = Imm{1}; // h
5477   let Inst{21} = Imm{0}; // l
5478   let Inst{20-16} = MRm;
5479 }
5480 def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
5481   0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5482   let Inst{11} = Imm{0}; // h
5483   let Inst{21} = 0b0;    // l
5484   let Inst{20-16} = MRm;
5485 }
5486
5487 multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
5488   SDPatternOperator opnode,
5489   Instruction INST,
5490   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5491   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5492
5493   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5494                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
5495              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5496
5497   def  : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
5498                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
5499              (ResTy (INST (ResTy FPRC:$Rn),
5500                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5501                OpNImm:$Imm))>;
5502
5503   // swapped operands
5504   def  : Pat<(ResTy (opnode
5505                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5506                (ResTy FPRC:$Rn))),
5507              (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5508
5509   def  : Pat<(ResTy (opnode
5510                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5511                (ResTy FPRC:$Rn))),
5512              (ResTy (INST (ResTy FPRC:$Rn),
5513                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5514                OpNImm:$Imm))>;
5515 }
5516
5517 // Patterns for Scalar Floating Point  multiply (scalar, by element)
5518 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
5519   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5520 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
5521   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5522
5523 // Patterns for Scalar Floating Point  multiply extended (scalar, by element)
5524 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5525   FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
5526   v2f32, v4f32, neon_uimm1_bare>;
5527 defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
5528   FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
5529   v1f64, v2f64, neon_uimm0_bare>;
5530
5531 // Scalar Floating Point fused multiply-add (scalar, by element)
5532 def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5533   0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5534   let Inst{11} = Imm{1}; // h
5535   let Inst{21} = Imm{0}; // l
5536   let Inst{20-16} = MRm;
5537 }
5538 def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
5539   0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5540   let Inst{11} = Imm{0}; // h
5541   let Inst{21} = 0b0;    // l
5542   let Inst{20-16} = MRm;
5543 }
5544
5545 // Scalar Floating Point fused multiply-subtract (scalar, by element)
5546 def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5547   0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5548   let Inst{11} = Imm{1}; // h
5549   let Inst{21} = Imm{0}; // l
5550   let Inst{20-16} = MRm;
5551 }
5552 def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
5553   0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
5554   let Inst{11} = Imm{0}; // h
5555   let Inst{21} = 0b0;    // l
5556   let Inst{20-16} = MRm;
5557 }
5558 // We are allowed to match the fma instruction regardless of compile options.
5559 multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
5560   Instruction FMLAI, Instruction FMLSI,
5561   ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
5562   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5563   // fmla
5564   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5565                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5566                (ResTy FPRC:$Ra))),
5567              (ResTy (FMLAI (ResTy FPRC:$Ra),
5568                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5569
5570   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5571                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5572                (ResTy FPRC:$Ra))),
5573              (ResTy (FMLAI (ResTy FPRC:$Ra),
5574                (ResTy FPRC:$Rn),
5575                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5576                OpNImm:$Imm))>;
5577
5578   // swapped fmla operands
5579   def  : Pat<(ResTy (fma
5580                (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
5581                (ResTy FPRC:$Rn),
5582                (ResTy FPRC:$Ra))),
5583              (ResTy (FMLAI (ResTy FPRC:$Ra),
5584                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5585
5586   def  : Pat<(ResTy (fma
5587                (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
5588                (ResTy FPRC:$Rn),
5589                (ResTy FPRC:$Ra))),
5590              (ResTy (FMLAI (ResTy FPRC:$Ra),
5591                (ResTy FPRC:$Rn),
5592                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5593                OpNImm:$Imm))>;
5594
5595   // fmls
5596   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5597                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5598                (ResTy FPRC:$Ra))),
5599              (ResTy (FMLSI (ResTy FPRC:$Ra),
5600                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5601
5602   def  : Pat<(ResTy (fma (ResTy FPRC:$Rn),
5603                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5604                (ResTy FPRC:$Ra))),
5605              (ResTy (FMLSI (ResTy FPRC:$Ra),
5606                (ResTy FPRC:$Rn),
5607                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5608                OpNImm:$Imm))>;
5609
5610   // swapped fmls operands
5611   def  : Pat<(ResTy (fma
5612                (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
5613                (ResTy FPRC:$Rn),
5614                (ResTy FPRC:$Ra))),
5615              (ResTy (FMLSI (ResTy FPRC:$Ra),
5616                (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
5617
5618   def  : Pat<(ResTy (fma
5619                (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
5620                (ResTy FPRC:$Rn),
5621                (ResTy FPRC:$Ra))),
5622              (ResTy (FMLSI (ResTy FPRC:$Ra),
5623                (ResTy FPRC:$Rn),
5624                (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
5625                OpNImm:$Imm))>;
5626 }
5627
5628 // Scalar Floating Point fused multiply-add and
5629 // multiply-subtract (scalar, by element)
5630 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
5631   f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5632 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5633   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5634 defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
5635   f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5636
5637 // Scalar Signed saturating doubling multiply long (scalar, by element)
5638 def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5639   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5640   let Inst{11} = 0b0; // h
5641   let Inst{21} = Imm{1}; // l
5642   let Inst{20} = Imm{0}; // m
5643   let Inst{19-16} = MRm{3-0};
5644 }
5645 def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
5646   0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5647   let Inst{11} = Imm{2}; // h
5648   let Inst{21} = Imm{1}; // l
5649   let Inst{20} = Imm{0}; // m
5650   let Inst{19-16} = MRm{3-0};
5651 }
5652 def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5653   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5654   let Inst{11} = 0b0;    // h
5655   let Inst{21} = Imm{0}; // l
5656   let Inst{20-16} = MRm;
5657 }
5658 def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
5659   0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5660   let Inst{11} = Imm{1};    // h
5661   let Inst{21} = Imm{0};    // l
5662   let Inst{20-16} = MRm;
5663 }
5664
5665 multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
5666   SDPatternOperator opnode,
5667   Instruction INST,
5668   ValueType ResTy, RegisterClass FPRC,
5669   ValueType OpVTy, ValueType OpTy,
5670   ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5671
5672   def  : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
5673                (OpVTy (scalar_to_vector
5674                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
5675              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5676
5677   //swapped operands
5678   def  : Pat<(ResTy (opnode
5679                (OpVTy (scalar_to_vector
5680                  (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
5681                  (OpVTy FPRC:$Rn))),
5682              (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
5683 }
5684
5685
5686 // Patterns for Scalar Signed saturating doubling
5687 // multiply long (scalar, by element)
5688 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5689   SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
5690   i32, VPR64Lo, neon_uimm2_bare>;
5691 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5692   SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
5693   i32, VPR128Lo, neon_uimm3_bare>;
5694 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5695   SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
5696   i32, VPR64Lo, neon_uimm1_bare>;
5697 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
5698   SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
5699   i32, VPR128Lo, neon_uimm2_bare>;
5700
5701 // Scalar Signed saturating doubling multiply-add long (scalar, by element)
5702 def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5703   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5704   let Inst{11} = 0b0; // h
5705   let Inst{21} = Imm{1}; // l
5706   let Inst{20} = Imm{0}; // m
5707   let Inst{19-16} = MRm{3-0};
5708 }
5709 def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5710   0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5711   let Inst{11} = Imm{2}; // h
5712   let Inst{21} = Imm{1}; // l
5713   let Inst{20} = Imm{0}; // m
5714   let Inst{19-16} = MRm{3-0};
5715 }
5716 def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5717   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5718   let Inst{11} = 0b0;    // h
5719   let Inst{21} = Imm{0}; // l
5720   let Inst{20-16} = MRm;
5721 }
5722 def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
5723   0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5724   let Inst{11} = Imm{1};    // h
5725   let Inst{21} = Imm{0};    // l
5726   let Inst{20-16} = MRm;
5727 }
5728
5729 // Scalar Signed saturating doubling
5730 // multiply-subtract long (scalar, by element)
5731 def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5732   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
5733   let Inst{11} = 0b0; // h
5734   let Inst{21} = Imm{1}; // l
5735   let Inst{20} = Imm{0}; // m
5736   let Inst{19-16} = MRm{3-0};
5737 }
5738 def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5739   0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
5740   let Inst{11} = Imm{2}; // h
5741   let Inst{21} = Imm{1}; // l
5742   let Inst{20} = Imm{0}; // m
5743   let Inst{19-16} = MRm{3-0};
5744 }
5745 def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5746   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
5747   let Inst{11} = 0b0;    // h
5748   let Inst{21} = Imm{0}; // l
5749   let Inst{20-16} = MRm;
5750 }
5751 def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
5752   0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
5753   let Inst{11} = Imm{1};    // h
5754   let Inst{21} = Imm{0};    // l
5755   let Inst{20-16} = MRm;
5756 }
5757
5758 multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
5759   SDPatternOperator opnode,
5760   SDPatternOperator coreopnode,
5761   Instruction INST,
5762   ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
5763   ValueType OpTy,
5764   ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
5765
5766   def  : Pat<(ResTy (opnode
5767                (ResTy ResFPRC:$Ra),
5768                (ResTy (coreopnode (OpTy FPRC:$Rn),
5769                  (OpTy (scalar_to_vector
5770                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
5771              (ResTy (INST (ResTy ResFPRC:$Ra),
5772                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5773
5774   // swapped operands
5775   def  : Pat<(ResTy (opnode
5776                (ResTy ResFPRC:$Ra),
5777                (ResTy (coreopnode
5778                  (OpTy (scalar_to_vector
5779                    (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
5780                  (OpTy FPRC:$Rn))))),
5781              (ResTy (INST (ResTy ResFPRC:$Ra),
5782                (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
5783 }
5784
5785 // Patterns for Scalar Signed saturating
5786 // doubling multiply-add long (scalar, by element)
5787 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5788   int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5789   i32, VPR64Lo, neon_uimm2_bare>;
5790 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5791   int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5792   i32, VPR128Lo, neon_uimm3_bare>;
5793 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5794   int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5795   i32, VPR64Lo, neon_uimm1_bare>;
5796 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
5797   int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5798   i32, VPR128Lo, neon_uimm2_bare>;
5799
5800 // Patterns for Scalar Signed saturating
5801 // doubling multiply-sub long (scalar, by element)
5802 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5803   int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
5804   i32, VPR64Lo, neon_uimm2_bare>;
5805 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5806   int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
5807   i32, VPR128Lo, neon_uimm3_bare>;
5808 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5809   int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
5810   i32, VPR64Lo, neon_uimm1_bare>;
5811 defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
5812   int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
5813   i32, VPR128Lo, neon_uimm2_bare>;
5814
5815 // Scalar Signed saturating doubling multiply returning
5816 // high half (scalar, by element)
5817 def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5818   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5819   let Inst{11} = 0b0; // h
5820   let Inst{21} = Imm{1}; // l
5821   let Inst{20} = Imm{0}; // m
5822   let Inst{19-16} = MRm{3-0};
5823 }
5824 def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5825   0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5826   let Inst{11} = Imm{2}; // h
5827   let Inst{21} = Imm{1}; // l
5828   let Inst{20} = Imm{0}; // m
5829   let Inst{19-16} = MRm{3-0};
5830 }
5831 def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5832   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5833   let Inst{11} = 0b0;    // h
5834   let Inst{21} = Imm{0}; // l
5835   let Inst{20-16} = MRm;
5836 }
5837 def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
5838   0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5839   let Inst{11} = Imm{1};    // h
5840   let Inst{21} = Imm{0};    // l
5841   let Inst{20-16} = MRm;
5842 }
5843
5844 // Patterns for Scalar Signed saturating doubling multiply returning
5845 // high half (scalar, by element)
5846 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5847   SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
5848   i32, VPR64Lo, neon_uimm2_bare>;
5849 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5850   SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
5851   i32, VPR128Lo, neon_uimm3_bare>;
5852 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5853   SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
5854   i32, VPR64Lo, neon_uimm1_bare>;
5855 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
5856   SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
5857   i32, VPR128Lo, neon_uimm2_bare>;
5858
5859 // Scalar Signed saturating rounding doubling multiply
5860 // returning high half (scalar, by element)
5861 def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5862   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
5863   let Inst{11} = 0b0; // h
5864   let Inst{21} = Imm{1}; // l
5865   let Inst{20} = Imm{0}; // m
5866   let Inst{19-16} = MRm{3-0};
5867 }
5868 def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5869   0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
5870   let Inst{11} = Imm{2}; // h
5871   let Inst{21} = Imm{1}; // l
5872   let Inst{20} = Imm{0}; // m
5873   let Inst{19-16} = MRm{3-0};
5874 }
5875 def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5876   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
5877   let Inst{11} = 0b0;    // h
5878   let Inst{21} = Imm{0}; // l
5879   let Inst{20-16} = MRm;
5880 }
5881 def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
5882   0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
5883   let Inst{11} = Imm{1};    // h
5884   let Inst{21} = Imm{0};    // l
5885   let Inst{20-16} = MRm;
5886 }
5887
5888 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5889   SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
5890   VPR64Lo, neon_uimm2_bare>;
5891 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5892   SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
5893   VPR128Lo, neon_uimm3_bare>;
5894 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5895   SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
5896   VPR64Lo, neon_uimm1_bare>;
5897 defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
5898   SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
5899   VPR128Lo, neon_uimm2_bare>;
5900
5901 // Scalar general arithmetic operation
5902 class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
5903                                         Instruction INST> 
5904     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
5905
5906 class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
5907                                         Instruction INST> 
5908     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5909           (INST FPR64:$Rn, FPR64:$Rm)>;
5910
5911 class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
5912                                         Instruction INST> 
5913     : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
5914               (v1f64 FPR64:$Ra))),
5915           (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5916
5917 def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
5918 def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
5919 def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
5920 def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
5921 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
5922 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
5923 def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
5924 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
5925 def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
5926
5927 def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
5928 def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
5929
5930 def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
5931 def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
5932
5933 // Scalar Copy - DUP element to scalar
5934 class NeonI_Scalar_DUP<string asmop, string asmlane,
5935                        RegisterClass ResRC, RegisterOperand VPRC,
5936                        Operand OpImm>
5937   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
5938                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
5939                      [],
5940                      NoItinerary> {
5941   bits<4> Imm;
5942 }
5943
5944 def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
5945   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
5946 }
5947 def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
5948   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
5949 }
5950 def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
5951   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
5952 }
5953 def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
5954   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
5955 }
5956
5957 multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
5958   ValueType OpTy, Operand OpImm,
5959   ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
5960   def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
5961             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
5962
5963   def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
5964             (ResTy (DUPI
5965               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5966                 OpNImm:$Imm))>;
5967 }
5968
5969 // Patterns for vector extract of FP data using scalar DUP instructions
5970 defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
5971   v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
5972 defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
5973   v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
5974
5975 multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
5976   ValueType ResTy, ValueType OpTy,Operand OpLImm,
5977   ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
5978
5979   def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
5980             (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
5981
5982   def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
5983             (ResTy (DUPI
5984               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
5985                 OpNImm:$Imm))>;
5986 }
5987
5988 // Patterns for extract subvectors of v1ix data using scalar DUP instructions.
5989 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
5990                                         v8i8, v16i8, neon_uimm3_bare>;
5991 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
5992                                         v4i16, v8i16, neon_uimm2_bare>;
5993 defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
5994                                         v2i32, v4i32, neon_uimm1_bare>;
5995
5996 multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
5997                                           ValueType OpTy, ValueType ElemTy,
5998                                           Operand OpImm, ValueType OpNTy,
5999                                           ValueType ExTy, Operand OpNImm> {
6000
6001   def : Pat<(ResTy (vector_insert (ResTy undef),
6002               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
6003               (neon_uimm0_bare:$Imm))),
6004             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6005
6006   def : Pat<(ResTy (vector_insert (ResTy undef),
6007               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
6008               (OpNImm:$Imm))),
6009             (ResTy (DUPI
6010               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6011               OpNImm:$Imm))>;
6012 }
6013
6014 multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
6015                                           ValueType OpTy, ValueType ElemTy,
6016                                           Operand OpImm, ValueType OpNTy,
6017                                           ValueType ExTy, Operand OpNImm> {
6018
6019   def : Pat<(ResTy (scalar_to_vector
6020               (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
6021             (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
6022
6023   def : Pat<(ResTy (scalar_to_vector
6024               (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
6025             (ResTy (DUPI
6026               (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6027               OpNImm:$Imm))>;
6028 }
6029
6030 // Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
6031 // instructions.
6032 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
6033   v1i64, v2i64, i64, neon_uimm1_bare,
6034   v1i64, v2i64, neon_uimm0_bare>;
6035 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
6036   v1i32, v4i32, i32, neon_uimm2_bare,
6037   v2i32, v4i32, neon_uimm1_bare>;
6038 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
6039   v1i16, v8i16, i32, neon_uimm3_bare,
6040   v4i16, v8i16, neon_uimm2_bare>;
6041 defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
6042   v1i8, v16i8, i32, neon_uimm4_bare,
6043   v8i8, v16i8, neon_uimm3_bare>;
6044 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
6045   v1i64, v2i64, i64, neon_uimm1_bare,
6046   v1i64, v2i64, neon_uimm0_bare>;
6047 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
6048   v1i32, v4i32, i32, neon_uimm2_bare,
6049   v2i32, v4i32, neon_uimm1_bare>;
6050 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
6051   v1i16, v8i16, i32, neon_uimm3_bare,
6052   v4i16, v8i16, neon_uimm2_bare>;
6053 defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
6054   v1i8, v16i8, i32, neon_uimm4_bare,
6055   v8i8, v16i8, neon_uimm3_bare>;
6056
6057 multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
6058                                   Instruction DUPI, Operand OpImm,
6059                                   RegisterClass ResRC> {
6060   def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
6061           (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
6062 }
6063
6064 // Aliases for Scalar copy - DUP element (scalar)
6065 // FIXME: This is actually the preferred syntax but TableGen can't deal with
6066 // custom printing of aliases.
6067 defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
6068 defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
6069 defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
6070 defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
6071
6072 multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
6073                       ValueType OpTy> {
6074   def : Pat<(ResTy (GetLow VPR128:$Rn)),
6075             (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
6076   def : Pat<(ResTy (GetHigh VPR128:$Rn)),
6077             (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
6078 }
6079
6080 defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
6081 defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
6082 defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
6083 defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
6084 defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
6085 defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
6086
6087 //===----------------------------------------------------------------------===//
6088 // Non-Instruction Patterns
6089 //===----------------------------------------------------------------------===//
6090
6091 // 64-bit vector bitcasts...
6092
6093 def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>;
6094 def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>;
6095 def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>;
6096 def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>;
6097
6098 def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>;
6099 def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>;
6100 def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>;
6101 def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>;
6102
6103 def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>;
6104 def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>;
6105 def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>;
6106 def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>;
6107
6108 def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>;
6109 def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>;
6110 def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>;
6111 def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>;
6112
6113 def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>;
6114 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6115 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6116 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6117
6118 // ..and 128-bit vector bitcasts...
6119
6120 def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>;
6121 def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>;
6122 def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>;
6123 def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>;
6124 def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>;
6125
6126 def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>;
6127 def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>;
6128 def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>;
6129 def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>;
6130 def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>;
6131
6132 def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>;
6133 def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>;
6134 def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>;
6135 def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>;
6136 def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>;
6137
6138 def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>;
6139 def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>;
6140 def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>;
6141 def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>;
6142 def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>;
6143
6144 def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>;
6145 def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>;
6146 def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>;
6147 def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>;
6148 def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>;
6149
6150 def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>;
6151 def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>;
6152 def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>;
6153 def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>;
6154 def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>;
6155
6156 // ...and scalar bitcasts...
6157 def : Pat<(f16 (bitconvert (v1i16  FPR16:$src))), (f16 FPR16:$src)>;
6158 def : Pat<(f32 (bitconvert (v1i32  FPR32:$src))), (f32 FPR32:$src)>;
6159 def : Pat<(f64 (bitconvert (v1i64  FPR64:$src))), (f64 FPR64:$src)>;
6160 def : Pat<(f64 (bitconvert (v1f64  FPR64:$src))), (f64 FPR64:$src)>;
6161
6162 def : Pat<(i64 (bitconvert (v1i64  FPR64:$src))), (FMOVxd $src)>;
6163 def : Pat<(i64 (bitconvert (v1f64  FPR64:$src))), (FMOVxd $src)>;
6164 def : Pat<(i64 (bitconvert (v2i32  FPR64:$src))), (FMOVxd $src)>;
6165 def : Pat<(i64 (bitconvert (v2f32  FPR64:$src))), (FMOVxd $src)>;
6166 def : Pat<(i64 (bitconvert (v4i16  FPR64:$src))), (FMOVxd $src)>;
6167 def : Pat<(i64 (bitconvert (v8i8  FPR64:$src))), (FMOVxd $src)>;
6168
6169 def : Pat<(i32 (bitconvert (v1i32  FPR32:$src))), (FMOVws $src)>;
6170
6171 def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>;
6172 def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>;
6173 def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>;
6174
6175 def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), (f64 VPR64:$src)>;
6176 def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), (f64 VPR64:$src)>;
6177 def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), (f64 VPR64:$src)>;
6178 def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), (f64 VPR64:$src)>;
6179 def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), (f64 VPR64:$src)>;
6180
6181 def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), (f128 VPR128:$src)>;
6182 def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), (f128 VPR128:$src)>;
6183 def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), (f128 VPR128:$src)>;
6184 def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), (f128 VPR128:$src)>;
6185 def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), (f128 VPR128:$src)>;
6186 def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), (f128 VPR128:$src)>;
6187
6188 def : Pat<(v1i16 (bitconvert (f16  FPR16:$src))), (v1i16 FPR16:$src)>;
6189 def : Pat<(v1i32 (bitconvert (f32  FPR32:$src))), (v1i32 FPR32:$src)>;
6190 def : Pat<(v1i64 (bitconvert (f64  FPR64:$src))), (v1i64 FPR64:$src)>;
6191 def : Pat<(v1f64 (bitconvert (f64  FPR64:$src))), (v1f64 FPR64:$src)>;
6192
6193 def : Pat<(v1i64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6194 def : Pat<(v1f64 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6195 def : Pat<(v2i32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6196 def : Pat<(v2f32 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6197 def : Pat<(v4i16 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6198 def : Pat<(v8i8 (bitconvert (i64  GPR64:$src))), (FMOVdx $src)>;
6199
6200 def : Pat<(v1i32 (bitconvert (i32  GPR32:$src))), (FMOVsw $src)>;
6201
6202 def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), (v8i8 FPR64:$src)>;
6203 def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
6204 def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
6205 def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
6206 def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
6207
6208 def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), (v16i8 FPR128:$src)>;
6209 def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), (v8i16 FPR128:$src)>;
6210 def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), (v4i32 FPR128:$src)>;
6211 def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), (v2i64 FPR128:$src)>;
6212 def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), (v4f32 FPR128:$src)>;
6213 def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), (v2f64 FPR128:$src)>;
6214
6215 // Scalar Three Same
6216
6217 def neon_uimm3 : Operand<i64>,
6218                    ImmLeaf<i64, [{return Imm < 8;}]> {
6219   let ParserMatchClass = uimm3_asmoperand;
6220   let PrintMethod = "printUImmHexOperand";
6221 }
6222
6223 def neon_uimm4 : Operand<i64>,
6224                    ImmLeaf<i64, [{return Imm < 16;}]> {
6225   let ParserMatchClass = uimm4_asmoperand;
6226   let PrintMethod = "printUImmHexOperand";
6227 }
6228
6229 // Bitwise Extract
6230 class NeonI_Extract<bit q, bits<2> op2, string asmop,
6231                     string OpS, RegisterOperand OpVPR, Operand OpImm>
6232   : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
6233                      (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
6234                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
6235                      ", $Rm." # OpS # ", $Index",
6236                      [],
6237                      NoItinerary>{
6238   bits<4> Index;
6239 }
6240
6241 def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
6242                                VPR64, neon_uimm3> {
6243   let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
6244 }
6245
6246 def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
6247                                VPR128, neon_uimm4> {
6248   let Inst{14-11} = Index;
6249 }
6250
6251 class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
6252                  Operand OpImm>
6253   : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
6254                                  (i64 OpImm:$Imm))),
6255               (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
6256
6257 def : NI_Extract<v8i8,  VPR64,  EXTvvvi_8b,  neon_uimm3>;
6258 def : NI_Extract<v4i16, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6259 def : NI_Extract<v2i32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6260 def : NI_Extract<v1i64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6261 def : NI_Extract<v2f32, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6262 def : NI_Extract<v1f64, VPR64,  EXTvvvi_8b,  neon_uimm3>;
6263 def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
6264 def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
6265 def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
6266 def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
6267 def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
6268 def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
6269
6270 // Table lookup
6271 class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
6272              string asmop, string OpS, RegisterOperand OpVPR,
6273              RegisterOperand VecList>
6274   : NeonI_TBL<q, op2, len, op,
6275               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
6276               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6277               [],
6278               NoItinerary>;
6279
6280 // The vectors in look up table are always 16b
6281 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
6282   def _8b  : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
6283                     !cast<RegisterOperand>(List # "16B_operand")>;
6284
6285   def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
6286                     !cast<RegisterOperand>(List # "16B_operand")>;
6287 }
6288
6289 defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
6290 defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
6291 defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
6292 defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
6293
6294 // Table lookup extention
6295 class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
6296              string asmop, string OpS, RegisterOperand OpVPR,
6297              RegisterOperand VecList>
6298   : NeonI_TBL<q, op2, len, op,
6299               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
6300               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
6301               [],
6302               NoItinerary> {
6303   let Constraints = "$src = $Rd";
6304 }
6305
6306 // The vectors in look up table are always 16b
6307 multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
6308   def _8b  : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
6309                     !cast<RegisterOperand>(List # "16B_operand")>;
6310
6311   def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
6312                     !cast<RegisterOperand>(List # "16B_operand")>;
6313 }
6314
6315 defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
6316 defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
6317 defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
6318 defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
6319
6320 class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
6321                      RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
6322   : NeonI_copy<0b1, 0b0, 0b0011,
6323                (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
6324                asmop # "\t$Rd." # Res # "[$Imm], $Rn",
6325                [(set (ResTy VPR128:$Rd),
6326                  (ResTy (vector_insert
6327                    (ResTy VPR128:$src),
6328                    (OpTy OpGPR:$Rn),
6329                    (OpImm:$Imm))))],
6330                NoItinerary> {
6331   bits<4> Imm;
6332   let Constraints = "$src = $Rd";
6333 }
6334
6335 //Insert element (vector, from main)
6336 def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
6337                            neon_uimm4_bare> {
6338   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6339 }
6340 def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
6341                            neon_uimm3_bare> {
6342   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6343 }
6344 def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
6345                            neon_uimm2_bare> {
6346   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6347 }
6348 def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
6349                            neon_uimm1_bare> {
6350   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6351 }
6352
6353 def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
6354                     (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
6355 def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
6356                     (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
6357 def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
6358                     (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
6359 def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
6360                     (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
6361
6362 class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
6363                              RegisterClass OpGPR, ValueType OpTy,
6364                              Operand OpImm, Instruction INS>
6365   : Pat<(ResTy (vector_insert
6366               (ResTy VPR64:$src),
6367               (OpTy OpGPR:$Rn),
6368               (OpImm:$Imm))),
6369         (ResTy (EXTRACT_SUBREG
6370           (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
6371             OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
6372
6373 def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
6374                                           neon_uimm3_bare, INSbw>;
6375 def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
6376                                           neon_uimm2_bare, INShw>;
6377 def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
6378                                           neon_uimm1_bare, INSsw>;
6379 def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
6380                                           neon_uimm0_bare, INSdx>;
6381
6382 class NeonI_INS_element<string asmop, string Res, Operand ResImm>
6383   : NeonI_insert<0b1, 0b1,
6384                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
6385                  ResImm:$Immd, ResImm:$Immn),
6386                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
6387                  [],
6388                  NoItinerary> {
6389   let Constraints = "$src = $Rd";
6390   bits<4> Immd;
6391   bits<4> Immn;
6392 }
6393
6394 //Insert element (vector, from element)
6395 def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
6396   let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
6397   let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
6398 }
6399 def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
6400   let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
6401   let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
6402   // bit 11 is unspecified, but should be set to zero.
6403 }
6404 def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
6405   let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
6406   let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
6407   // bits 11-12 are unspecified, but should be set to zero.
6408 }
6409 def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
6410   let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
6411   let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
6412   // bits 11-13 are unspecified, but should be set to zero.
6413 }
6414
6415 def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
6416                     (INSELb VPR128:$Rd, VPR128:$Rn,
6417                       neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
6418 def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
6419                     (INSELh VPR128:$Rd, VPR128:$Rn,
6420                       neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
6421 def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
6422                     (INSELs VPR128:$Rd, VPR128:$Rn,
6423                       neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
6424 def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
6425                     (INSELd VPR128:$Rd, VPR128:$Rn,
6426                       neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
6427
6428 multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
6429                                 ValueType MidTy, Operand StImm, Operand NaImm,
6430                                 Instruction INS> {
6431 def : Pat<(ResTy (vector_insert
6432             (ResTy VPR128:$src),
6433             (MidTy (vector_extract
6434               (ResTy VPR128:$Rn),
6435               (StImm:$Immn))),
6436             (StImm:$Immd))),
6437           (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
6438               StImm:$Immd, StImm:$Immn)>;
6439
6440 def : Pat <(ResTy (vector_insert
6441              (ResTy VPR128:$src),
6442              (MidTy (vector_extract
6443                (NaTy VPR64:$Rn),
6444                (NaImm:$Immn))),
6445              (StImm:$Immd))),
6446            (INS (ResTy VPR128:$src),
6447              (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6448              StImm:$Immd, NaImm:$Immn)>;
6449
6450 def : Pat <(NaTy (vector_insert
6451              (NaTy VPR64:$src),
6452              (MidTy (vector_extract
6453                (ResTy VPR128:$Rn),
6454                (StImm:$Immn))),
6455              (NaImm:$Immd))),
6456            (NaTy (EXTRACT_SUBREG
6457              (ResTy (INS
6458                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6459                (ResTy VPR128:$Rn),
6460                NaImm:$Immd, StImm:$Immn)),
6461              sub_64))>;
6462
6463 def : Pat <(NaTy (vector_insert
6464              (NaTy VPR64:$src),
6465              (MidTy (vector_extract
6466                (NaTy VPR64:$Rn),
6467                (NaImm:$Immn))),
6468              (NaImm:$Immd))),
6469            (NaTy (EXTRACT_SUBREG
6470              (ResTy (INS
6471                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6472                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
6473                NaImm:$Immd, NaImm:$Immn)),
6474              sub_64))>;
6475 }
6476
6477 defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
6478                             neon_uimm1_bare, INSELs>;
6479 defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
6480                             neon_uimm0_bare, INSELd>;
6481 defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6482                             neon_uimm3_bare, INSELb>;
6483 defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6484                             neon_uimm2_bare, INSELh>;
6485 defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6486                             neon_uimm1_bare, INSELs>;
6487 defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
6488                             neon_uimm0_bare, INSELd>;
6489
6490 multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
6491                                       ValueType MidTy,
6492                                       RegisterClass OpFPR, Operand ResImm,
6493                                       SubRegIndex SubIndex, Instruction INS> {
6494 def : Pat <(ResTy (vector_insert
6495              (ResTy VPR128:$src),
6496              (MidTy OpFPR:$Rn),
6497              (ResImm:$Imm))),
6498            (INS (ResTy VPR128:$src),
6499              (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
6500              ResImm:$Imm,
6501              (i64 0))>;
6502
6503 def : Pat <(NaTy (vector_insert
6504              (NaTy VPR64:$src),
6505              (MidTy OpFPR:$Rn),
6506              (ResImm:$Imm))),
6507            (NaTy (EXTRACT_SUBREG
6508              (ResTy (INS
6509                (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
6510                (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
6511                ResImm:$Imm,
6512                (i64 0))),
6513              sub_64))>;
6514 }
6515
6516 defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
6517                                   sub_32, INSELs>;
6518 defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
6519                                   sub_64, INSELd>;
6520
6521 class NeonI_SMOV<string asmop, string Res, bit Q,
6522                  ValueType OpTy, ValueType eleTy,
6523                  Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
6524   : NeonI_copy<Q, 0b0, 0b0101,
6525                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6526                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6527                [(set (ResTy ResGPR:$Rd),
6528                  (ResTy (sext_inreg
6529                    (ResTy (vector_extract
6530                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
6531                    eleTy)))],
6532                NoItinerary> {
6533   bits<4> Imm;
6534 }
6535
6536 //Signed integer move (main, from element)
6537 def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
6538                         GPR32, i32> {
6539   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6540 }
6541 def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
6542                         GPR32, i32> {
6543   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6544 }
6545 def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
6546                         GPR64, i64> {
6547   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6548 }
6549 def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
6550                         GPR64, i64> {
6551   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6552 }
6553 def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
6554                         GPR64, i64> {
6555   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6556 }
6557
6558 multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
6559                                ValueType eleTy, Operand StImm,  Operand NaImm,
6560                                Instruction SMOVI> {
6561   def : Pat<(i64 (sext_inreg
6562               (i64 (anyext
6563                 (i32 (vector_extract
6564                   (StTy VPR128:$Rn), (StImm:$Imm))))),
6565               eleTy)),
6566             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6567
6568   def : Pat<(i64 (sext
6569               (i32 (vector_extract
6570                 (StTy VPR128:$Rn), (StImm:$Imm))))),
6571             (SMOVI VPR128:$Rn, StImm:$Imm)>;
6572
6573   def : Pat<(i64 (sext_inreg
6574               (i64 (vector_extract
6575                 (NaTy VPR64:$Rn), (NaImm:$Imm))),
6576               eleTy)),
6577             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6578               NaImm:$Imm)>;
6579
6580   def : Pat<(i64 (sext_inreg
6581               (i64 (anyext
6582                 (i32 (vector_extract
6583                   (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6584               eleTy)),
6585             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6586               NaImm:$Imm)>;
6587
6588   def : Pat<(i64 (sext
6589               (i32 (vector_extract
6590                 (NaTy VPR64:$Rn), (NaImm:$Imm))))),
6591             (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6592               NaImm:$Imm)>;
6593 }
6594
6595 defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6596                           neon_uimm3_bare, SMOVxb>;
6597 defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6598                           neon_uimm2_bare, SMOVxh>;
6599 defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6600                           neon_uimm1_bare, SMOVxs>;
6601
6602 class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
6603                           ValueType eleTy, Operand StImm,  Operand NaImm,
6604                           Instruction SMOVI>
6605   : Pat<(i32 (sext_inreg
6606           (i32 (vector_extract
6607             (NaTy VPR64:$Rn), (NaImm:$Imm))),
6608           eleTy)),
6609         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6610           NaImm:$Imm)>;
6611
6612 def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
6613                          neon_uimm3_bare, SMOVwb>;
6614 def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
6615                          neon_uimm2_bare, SMOVwh>;
6616
6617 class NeonI_UMOV<string asmop, string Res, bit Q,
6618                  ValueType OpTy, Operand OpImm,
6619                  RegisterClass ResGPR, ValueType ResTy>
6620   : NeonI_copy<Q, 0b0, 0b0111,
6621                (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
6622                asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
6623                [(set (ResTy ResGPR:$Rd),
6624                   (ResTy (vector_extract
6625                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
6626                NoItinerary> {
6627   bits<4> Imm;
6628 }
6629
6630 //Unsigned integer move (main, from element)
6631 def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
6632                          GPR32, i32> {
6633   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6634 }
6635 def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
6636                          GPR32, i32> {
6637   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6638 }
6639 def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
6640                          GPR32, i32> {
6641   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6642 }
6643 def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
6644                          GPR64, i64> {
6645   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6646 }
6647
6648 def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
6649                     (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
6650 def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
6651                     (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
6652
6653 class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
6654                          Operand StImm,  Operand NaImm,
6655                          Instruction SMOVI>
6656   : Pat<(ResTy (vector_extract
6657           (NaTy VPR64:$Rn), NaImm:$Imm)),
6658         (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6659           NaImm:$Imm)>;
6660
6661 def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
6662                         neon_uimm3_bare, UMOVwb>;
6663 def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
6664                         neon_uimm2_bare, UMOVwh>;
6665 def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
6666                         neon_uimm1_bare, UMOVws>;
6667
6668 def : Pat<(i32 (and
6669             (i32 (vector_extract
6670               (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
6671             255)),
6672           (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
6673
6674 def : Pat<(i32 (and
6675             (i32 (vector_extract
6676               (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
6677             65535)),
6678           (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
6679
6680 def : Pat<(i64 (zext
6681             (i32 (vector_extract
6682               (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
6683           (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
6684
6685 def : Pat<(i32 (and
6686             (i32 (vector_extract
6687               (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
6688             255)),
6689           (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6690             neon_uimm3_bare:$Imm)>;
6691
6692 def : Pat<(i32 (and
6693             (i32 (vector_extract
6694               (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
6695             65535)),
6696           (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6697             neon_uimm2_bare:$Imm)>;
6698
6699 def : Pat<(i64 (zext
6700             (i32 (vector_extract
6701               (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
6702           (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
6703             neon_uimm0_bare:$Imm)>;
6704
6705 // Additional copy patterns for scalar types
6706 def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
6707           (UMOVwb (v16i8
6708             (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
6709
6710 def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
6711           (UMOVwh (v8i16
6712             (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
6713
6714 def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
6715           (FMOVws FPR32:$Rn)>;
6716
6717 def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
6718           (FMOVxd FPR64:$Rn)>;
6719
6720 def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
6721           (f64 FPR64:$Rn)>;
6722
6723 def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
6724           (v1i8 (EXTRACT_SUBREG (v16i8
6725             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6726             sub_8))>;
6727
6728 def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
6729           (v1i16 (EXTRACT_SUBREG (v8i16
6730             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6731             sub_16))>;
6732
6733 def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
6734           (FMOVsw $src)>;
6735
6736 def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
6737           (FMOVdx $src)>;
6738
6739 def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6740           (v8i8 (EXTRACT_SUBREG (v16i8
6741             (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
6742             sub_64))>;
6743
6744 def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6745           (v4i16 (EXTRACT_SUBREG (v8i16
6746             (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
6747             sub_64))>;
6748
6749 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6750           (v2i32 (EXTRACT_SUBREG (v16i8
6751             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6752             sub_64))>;
6753
6754 def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6755           (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6756
6757 def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6758           (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6759
6760 def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
6761           (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6762
6763 def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
6764           (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
6765
6766 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6767           (v2i32 (EXTRACT_SUBREG (v16i8
6768             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6769             sub_64))>;
6770
6771 def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
6772           (v2i32 (EXTRACT_SUBREG (v16i8
6773             (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
6774             sub_64))>;
6775
6776 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
6777           (v1f64 FPR64:$Rn)>;
6778
6779 def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
6780           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
6781                          (f64 FPR64:$src), sub_64)>;
6782
6783 class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
6784                     RegisterOperand ResVPR, Operand OpImm>
6785   : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
6786                (ins VPR128:$Rn, OpImm:$Imm),
6787                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
6788                [],
6789                NoItinerary> {
6790   bits<4> Imm;
6791 }
6792
6793 def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
6794                               neon_uimm4_bare> {
6795   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6796 }
6797
6798 def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
6799                               neon_uimm3_bare> {
6800   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6801 }
6802
6803 def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
6804                               neon_uimm2_bare> {
6805   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6806 }
6807
6808 def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
6809                               neon_uimm1_bare> {
6810   let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
6811 }
6812
6813 def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
6814                               neon_uimm4_bare> {
6815   let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
6816 }
6817
6818 def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
6819                               neon_uimm3_bare> {
6820   let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
6821 }
6822
6823 def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
6824                               neon_uimm2_bare> {
6825   let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
6826 }
6827
6828 multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
6829                                        ValueType OpTy,ValueType NaTy,
6830                                        ValueType ExTy, Operand OpLImm,
6831                                        Operand OpNImm> {
6832 def  : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
6833         (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
6834
6835 def : Pat<(ResTy (Neon_vduplane
6836             (NaTy VPR64:$Rn), OpNImm:$Imm)),
6837           (ResTy (DUPELT
6838             (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
6839 }
6840 defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
6841                              neon_uimm4_bare, neon_uimm3_bare>;
6842 defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
6843                              neon_uimm4_bare, neon_uimm3_bare>;
6844 defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
6845                              neon_uimm3_bare, neon_uimm2_bare>;
6846 defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
6847                              neon_uimm3_bare, neon_uimm2_bare>;
6848 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
6849                              neon_uimm2_bare, neon_uimm1_bare>;
6850 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
6851                              neon_uimm2_bare, neon_uimm1_bare>;
6852 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
6853                              neon_uimm1_bare, neon_uimm0_bare>;
6854 defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
6855                              neon_uimm2_bare, neon_uimm1_bare>;
6856 defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
6857                              neon_uimm2_bare, neon_uimm1_bare>;
6858 defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
6859                              neon_uimm1_bare, neon_uimm0_bare>;
6860
6861 def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
6862           (v2f32 (DUPELT2s
6863             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6864             (i64 0)))>;
6865 def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
6866           (v4f32 (DUPELT4s
6867             (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
6868             (i64 0)))>;
6869 def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
6870           (v2f64 (DUPELT2d
6871             (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
6872             (i64 0)))>;
6873
6874 class NeonI_DUP<bit Q, string asmop, string rdlane,
6875                 RegisterOperand ResVPR, ValueType ResTy,
6876                 RegisterClass OpGPR, ValueType OpTy>
6877   : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
6878                asmop # "\t$Rd" # rdlane # ", $Rn",
6879                [(set (ResTy ResVPR:$Rd),
6880                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
6881                NoItinerary>;
6882
6883 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
6884   let Inst{20-16} = 0b00001;
6885   // bits 17-20 are unspecified, but should be set to zero.
6886 }
6887
6888 def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
6889   let Inst{20-16} = 0b00010;
6890   // bits 18-20 are unspecified, but should be set to zero.
6891 }
6892
6893 def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
6894   let Inst{20-16} = 0b00100;
6895   // bits 19-20 are unspecified, but should be set to zero.
6896 }
6897
6898 def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
6899   let Inst{20-16} = 0b01000;
6900   // bit 20 is unspecified, but should be set to zero.
6901 }
6902
6903 def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
6904   let Inst{20-16} = 0b00001;
6905   // bits 17-20 are unspecified, but should be set to zero.
6906 }
6907
6908 def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
6909   let Inst{20-16} = 0b00010;
6910   // bits 18-20 are unspecified, but should be set to zero.
6911 }
6912
6913 def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
6914   let Inst{20-16} = 0b00100;
6915   // bits 19-20 are unspecified, but should be set to zero.
6916 }
6917
6918 // patterns for CONCAT_VECTORS
6919 multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
6920 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
6921           (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
6922 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
6923           (INSELd
6924             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6925             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
6926             (i64 1),
6927             (i64 0))>;
6928 def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
6929           (DUPELT2d
6930             (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
6931             (i64 0))> ;
6932 }
6933
6934 defm : Concat_Vector_Pattern<v16i8, v8i8>;
6935 defm : Concat_Vector_Pattern<v8i16, v4i16>;
6936 defm : Concat_Vector_Pattern<v4i32, v2i32>;
6937 defm : Concat_Vector_Pattern<v2i64, v1i64>;
6938 defm : Concat_Vector_Pattern<v4f32, v2f32>;
6939 defm : Concat_Vector_Pattern<v2f64, v1f64>;
6940
6941 //patterns for EXTRACT_SUBVECTOR
6942 def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
6943           (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6944 def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
6945           (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6946 def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
6947           (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6948 def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
6949           (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6950 def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
6951           (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6952 def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
6953           (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
6954
6955 // The followings are for instruction class (3V Elem)
6956
6957 // Variant 1
6958
6959 class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
6960              string asmop, string ResS, string OpS, string EleOpS,
6961              Operand OpImm, RegisterOperand ResVPR,
6962              RegisterOperand OpVPR, RegisterOperand EleOpVPR>
6963   : NeonI_2VElem<q, u, size, opcode,
6964                  (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
6965                                          EleOpVPR:$Re, OpImm:$Index),
6966                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
6967                  ", $Re." # EleOpS # "[$Index]",
6968                  [],
6969                  NoItinerary> {
6970   bits<3> Index;
6971   bits<5> Re;
6972
6973   let Constraints = "$src = $Rd";
6974 }
6975
6976 multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
6977   // vector register class for element is always 128-bit to cover the max index
6978   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
6979                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
6980     let Inst{11} = {Index{1}};
6981     let Inst{21} = {Index{0}};
6982     let Inst{20-16} = Re;
6983   }
6984
6985   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
6986                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
6987     let Inst{11} = {Index{1}};
6988     let Inst{21} = {Index{0}};
6989     let Inst{20-16} = Re;
6990   }
6991
6992   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
6993   def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
6994                      neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
6995     let Inst{11} = {Index{2}};
6996     let Inst{21} = {Index{1}};
6997     let Inst{20} = {Index{0}};
6998     let Inst{19-16} = Re{3-0};
6999   }
7000
7001   def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7002                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7003     let Inst{11} = {Index{2}};
7004     let Inst{21} = {Index{1}};
7005     let Inst{20} = {Index{0}};
7006     let Inst{19-16} = Re{3-0};
7007   }
7008 }
7009
7010 defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
7011 defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
7012
7013 // Pattern for lane in 128-bit vector
7014 class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7015                    RegisterOperand ResVPR, RegisterOperand OpVPR,
7016                    RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7017                    ValueType EleOpTy>
7018   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7019           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7020         (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7021
7022 // Pattern for lane in 64-bit vector
7023 class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7024                   RegisterOperand ResVPR, RegisterOperand OpVPR,
7025                   RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
7026                   ValueType EleOpTy>
7027   : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
7028           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7029         (INST ResVPR:$src, OpVPR:$Rn,
7030           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7031
7032 multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
7033 {
7034   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7035                      op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
7036
7037   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7038                      op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
7039
7040   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7041                      op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7042
7043   def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7044                      op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7045
7046   // Index can only be half of the max value for lane in 64-bit vector
7047
7048   def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7049                     op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
7050
7051   def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7052                     op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7053 }
7054
7055 defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
7056 defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
7057
7058 class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
7059                  string asmop, string ResS, string OpS, string EleOpS,
7060                  Operand OpImm, RegisterOperand ResVPR,
7061                  RegisterOperand OpVPR, RegisterOperand EleOpVPR>
7062   : NeonI_2VElem<q, u, size, opcode,
7063                  (outs ResVPR:$Rd), (ins OpVPR:$Rn,
7064                                          EleOpVPR:$Re, OpImm:$Index),
7065                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
7066                  ", $Re." # EleOpS # "[$Index]",
7067                  [],
7068                  NoItinerary> {
7069   bits<3> Index;
7070   bits<5> Re;
7071 }
7072
7073 multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
7074   // vector register class for element is always 128-bit to cover the max index
7075   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7076                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7077     let Inst{11} = {Index{1}};
7078     let Inst{21} = {Index{0}};
7079     let Inst{20-16} = Re;
7080   }
7081
7082   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7083                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7084     let Inst{11} = {Index{1}};
7085     let Inst{21} = {Index{0}};
7086     let Inst{20-16} = Re;
7087   }
7088
7089   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7090   def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
7091                          neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
7092     let Inst{11} = {Index{2}};
7093     let Inst{21} = {Index{1}};
7094     let Inst{20} = {Index{0}};
7095     let Inst{19-16} = Re{3-0};
7096   }
7097
7098   def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
7099                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7100     let Inst{11} = {Index{2}};
7101     let Inst{21} = {Index{1}};
7102     let Inst{20} = {Index{0}};
7103     let Inst{19-16} = Re{3-0};
7104   }
7105 }
7106
7107 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
7108 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
7109 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
7110
7111 // Pattern for lane in 128-bit vector
7112 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7113                        RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7114                        ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7115   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7116           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7117         (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7118
7119 // Pattern for lane in 64-bit vector
7120 class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7121                       RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7122                       ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
7123   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7124           (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7125         (INST OpVPR:$Rn,
7126           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7127
7128 multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
7129   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7130                          op, VPR64, VPR128, v2i32, v2i32, v4i32>;
7131
7132   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7133                          op, VPR128, VPR128, v4i32, v4i32, v4i32>;
7134
7135   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
7136                          op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
7137
7138   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
7139                          op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
7140
7141   // Index can only be half of the max value for lane in 64-bit vector
7142
7143   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7144                         op, VPR64, VPR64, v2i32, v2i32, v2i32>;
7145
7146   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
7147                         op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
7148 }
7149
7150 defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
7151 defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
7152 defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
7153
7154 // Variant 2
7155
7156 multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
7157   // vector register class for element is always 128-bit to cover the max index
7158   def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7159                          neon_uimm2_bare, VPR64, VPR64, VPR128> {
7160     let Inst{11} = {Index{1}};
7161     let Inst{21} = {Index{0}};
7162     let Inst{20-16} = Re;
7163   }
7164
7165   def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7166                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7167     let Inst{11} = {Index{1}};
7168     let Inst{21} = {Index{0}};
7169     let Inst{20-16} = Re;
7170   }
7171
7172   // _1d2d doesn't exist!
7173
7174   def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7175                          neon_uimm1_bare, VPR128, VPR128, VPR128> {
7176     let Inst{11} = {Index{0}};
7177     let Inst{21} = 0b0;
7178     let Inst{20-16} = Re;
7179   }
7180 }
7181
7182 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
7183 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
7184
7185 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
7186                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
7187                          ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
7188                          SDPatternOperator coreop>
7189   : Pat<(ResTy (op (OpTy OpVPR:$Rn),
7190           (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
7191         (INST OpVPR:$Rn,
7192           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
7193
7194 multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
7195   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
7196                          op, VPR64, VPR128, v2f32, v2f32, v4f32>;
7197
7198   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
7199                          op, VPR128, VPR128, v4f32, v4f32, v4f32>;
7200
7201   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7202                          op, VPR128, VPR128, v2f64, v2f64, v2f64>;
7203
7204   // Index can only be half of the max value for lane in 64-bit vector
7205
7206   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
7207                         op, VPR64, VPR64, v2f32, v2f32, v2f32>;
7208
7209   def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
7210                            op, VPR128, VPR64, v2f64, v2f64, v1f64,
7211                            BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7212 }
7213
7214 defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
7215 defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
7216
7217 def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
7218                        (v2f32 VPR64:$Rn))),
7219           (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7220
7221 def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
7222                        (v4f32 VPR128:$Rn))),
7223           (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7224
7225 def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
7226                        (v2f64 VPR128:$Rn))),
7227           (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
7228
7229 // The followings are patterns using fma
7230 // -ffp-contract=fast generates fma
7231
7232 multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
7233   // vector register class for element is always 128-bit to cover the max index
7234   def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
7235                      neon_uimm2_bare, VPR64, VPR64, VPR128> {
7236     let Inst{11} = {Index{1}};
7237     let Inst{21} = {Index{0}};
7238     let Inst{20-16} = Re;
7239   }
7240
7241   def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
7242                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7243     let Inst{11} = {Index{1}};
7244     let Inst{21} = {Index{0}};
7245     let Inst{20-16} = Re;
7246   }
7247
7248   // _1d2d doesn't exist!
7249
7250   def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
7251                      neon_uimm1_bare, VPR128, VPR128, VPR128> {
7252     let Inst{11} = {Index{0}};
7253     let Inst{21} = 0b0;
7254     let Inst{20-16} = Re;
7255   }
7256 }
7257
7258 defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
7259 defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
7260
7261 // Pattern for lane in 128-bit vector
7262 class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7263                        RegisterOperand ResVPR, RegisterOperand OpVPR,
7264                        ValueType ResTy, ValueType OpTy,
7265                        SDPatternOperator coreop>
7266   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7267                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7268         (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
7269
7270 // Pattern for lane 0
7271 class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
7272                       RegisterOperand ResVPR, ValueType ResTy>
7273   : Pat<(ResTy (op (ResTy ResVPR:$Rn),
7274                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7275                    (ResTy ResVPR:$src))),
7276         (INST ResVPR:$src, ResVPR:$Rn,
7277               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7278
7279 // Pattern for lane in 64-bit vector
7280 class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7281                       RegisterOperand ResVPR, RegisterOperand OpVPR,
7282                       ValueType ResTy, ValueType OpTy,
7283                       SDPatternOperator coreop>
7284   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
7285                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7286         (INST ResVPR:$src, ResVPR:$Rn,
7287           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
7288
7289 // Pattern for lane in 64-bit vector
7290 class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
7291                            SDPatternOperator op,
7292                            RegisterOperand ResVPR, RegisterOperand OpVPR,
7293                            ValueType ResTy, ValueType OpTy,
7294                            SDPatternOperator coreop>
7295   : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
7296                    (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
7297         (INST ResVPR:$src, ResVPR:$Rn,
7298           (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
7299
7300
7301 multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
7302   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7303                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7304                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7305
7306   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
7307                         op, VPR64, v2f32>;
7308
7309   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7310                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7311                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7312
7313   def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
7314                         op, VPR128, v4f32>;
7315
7316   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7317                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7318                          BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7319
7320   // Index can only be half of the max value for lane in 64-bit vector
7321
7322   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7323                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7324                         BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
7325
7326   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7327                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7328                              BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
7329 }
7330
7331 defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
7332
7333 // Pattern for lane 0
7334 class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
7335                       RegisterOperand ResVPR, ValueType ResTy>
7336   : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
7337                    (ResTy (Neon_vdup (f32 FPR32:$Re))),
7338                    (ResTy ResVPR:$src))),
7339         (INST ResVPR:$src, ResVPR:$Rn,
7340               (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
7341
7342 multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
7343 {
7344   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7345                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7346                          BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7347
7348   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
7349                          neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
7350                          BinOpFrag<(Neon_vduplane
7351                                      (fneg node:$LHS), node:$RHS)>>;
7352
7353   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
7354                         op, VPR64, v2f32>;
7355
7356   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7357                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7358                          BinOpFrag<(fneg (Neon_vduplane
7359                                      node:$LHS, node:$RHS))>>;
7360
7361   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
7362                          neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
7363                          BinOpFrag<(Neon_vduplane
7364                                      (fneg node:$LHS), node:$RHS)>>;
7365
7366   def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
7367                         op, VPR128, v4f32>;
7368
7369   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7370                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7371                          BinOpFrag<(fneg (Neon_vduplane
7372                                      node:$LHS, node:$RHS))>>;
7373
7374   def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
7375                          neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
7376                          BinOpFrag<(Neon_vduplane
7377                                      (fneg node:$LHS), node:$RHS)>>;
7378
7379   // Index can only be half of the max value for lane in 64-bit vector
7380
7381   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7382                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7383                         BinOpFrag<(fneg (Neon_vduplane
7384                                     node:$LHS, node:$RHS))>>;
7385
7386   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
7387                         neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
7388                         BinOpFrag<(Neon_vduplane
7389                                     (fneg node:$LHS), node:$RHS)>>;
7390
7391   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7392                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7393                         BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
7394
7395   def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
7396                         neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
7397                         BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
7398
7399   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7400                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7401                              BinOpFrag<(fneg (Neon_combine_2d
7402                                          node:$LHS, node:$RHS))>>;
7403
7404   def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
7405                              neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
7406                              BinOpFrag<(Neon_combine_2d
7407                                          (fneg node:$LHS), (fneg node:$RHS))>>;
7408 }
7409
7410 defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
7411
7412 // Variant 3: Long type
7413 // E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
7414 //      SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
7415
7416 multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
7417   // vector register class for element is always 128-bit to cover the max index
7418   def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7419                      neon_uimm2_bare, VPR128, VPR64, VPR128> {
7420     let Inst{11} = {Index{1}};
7421     let Inst{21} = {Index{0}};
7422     let Inst{20-16} = Re;
7423   }
7424
7425   def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7426                      neon_uimm2_bare, VPR128, VPR128, VPR128> {
7427     let Inst{11} = {Index{1}};
7428     let Inst{21} = {Index{0}};
7429     let Inst{20-16} = Re;
7430   }
7431
7432   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7433   def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7434                      neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7435     let Inst{11} = {Index{2}};
7436     let Inst{21} = {Index{1}};
7437     let Inst{20} = {Index{0}};
7438     let Inst{19-16} = Re{3-0};
7439   }
7440
7441   def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7442                      neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7443     let Inst{11} = {Index{2}};
7444     let Inst{21} = {Index{1}};
7445     let Inst{20} = {Index{0}};
7446     let Inst{19-16} = Re{3-0};
7447   }
7448 }
7449
7450 defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
7451 defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
7452 defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
7453 defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
7454 defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
7455 defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
7456
7457 multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
7458   // vector register class for element is always 128-bit to cover the max index
7459   def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
7460                          neon_uimm2_bare, VPR128, VPR64, VPR128> {
7461     let Inst{11} = {Index{1}};
7462     let Inst{21} = {Index{0}};
7463     let Inst{20-16} = Re;
7464   }
7465
7466   def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
7467                          neon_uimm2_bare, VPR128, VPR128, VPR128> {
7468     let Inst{11} = {Index{1}};
7469     let Inst{21} = {Index{0}};
7470     let Inst{20-16} = Re;
7471   }
7472
7473   // Index operations on 16-bit(H) elements are restricted to using v0-v15.
7474   def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
7475                          neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
7476     let Inst{11} = {Index{2}};
7477     let Inst{21} = {Index{1}};
7478     let Inst{20} = {Index{0}};
7479     let Inst{19-16} = Re{3-0};
7480   }
7481
7482   def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
7483                          neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
7484     let Inst{11} = {Index{2}};
7485     let Inst{21} = {Index{1}};
7486     let Inst{20} = {Index{0}};
7487     let Inst{19-16} = Re{3-0};
7488   }
7489 }
7490
7491 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
7492 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
7493 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
7494
7495 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
7496           (FMOVdd $src)>;
7497
7498 // Pattern for lane in 128-bit vector
7499 class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7500                      RegisterOperand EleOpVPR, ValueType ResTy,
7501                      ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7502                      SDPatternOperator hiop>
7503   : Pat<(ResTy (op (ResTy VPR128:$src),
7504           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7505           (HalfOpTy (Neon_vduplane
7506                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7507         (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7508
7509 // Pattern for lane in 64-bit vector
7510 class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7511                     RegisterOperand EleOpVPR, ValueType ResTy,
7512                     ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7513                     SDPatternOperator hiop>
7514   : Pat<(ResTy (op (ResTy VPR128:$src),
7515           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7516           (HalfOpTy (Neon_vduplane
7517                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7518         (INST VPR128:$src, VPR128:$Rn,
7519           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7520
7521 class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
7522                      ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7523                      SDPatternOperator hiop, Instruction DupInst>
7524   : Pat<(ResTy (op (ResTy VPR128:$src),
7525           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7526           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7527         (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
7528
7529 multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
7530   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7531                      op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7532
7533   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7534                      op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
7535
7536   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7537                        op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7538
7539   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7540                        op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7541
7542   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7543                        op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7544
7545   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7546                        op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7547
7548   // Index can only be half of the max value for lane in 64-bit vector
7549
7550   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7551                     op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7552
7553   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7554                     op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
7555
7556   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7557                       op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7558
7559   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7560                       op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7561 }
7562
7563 defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
7564 defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
7565 defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
7566 defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
7567
7568 // Pattern for lane in 128-bit vector
7569 class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
7570                          RegisterOperand EleOpVPR, ValueType ResTy,
7571                          ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7572                          SDPatternOperator hiop>
7573   : Pat<(ResTy (op
7574           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7575           (HalfOpTy (Neon_vduplane
7576                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7577         (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
7578
7579 // Pattern for lane in 64-bit vector
7580 class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
7581                         RegisterOperand EleOpVPR, ValueType ResTy,
7582                         ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
7583                         SDPatternOperator hiop>
7584   : Pat<(ResTy (op
7585           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7586           (HalfOpTy (Neon_vduplane
7587                       (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
7588         (INST VPR128:$Rn,
7589           (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
7590
7591 // Pattern for fixed lane 0
7592 class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
7593                          ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
7594                          SDPatternOperator hiop, Instruction DupInst>
7595   : Pat<(ResTy (op
7596           (HalfOpTy (hiop (OpTy VPR128:$Rn))),
7597           (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
7598         (INST VPR128:$Rn, (DupInst $Re), 0)>;
7599
7600 multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
7601   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7602                          op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
7603
7604   def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7605                          op, VPR64, VPR128, v2i64, v2i32, v4i32>;
7606
7607   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7608                          op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7609
7610   def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7611                            op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7612
7613   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
7614                            op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7615
7616   def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
7617                            op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7618
7619   // Index can only be half of the max value for lane in 64-bit vector
7620
7621   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7622                         op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
7623
7624   def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7625                         op, VPR64, VPR64, v2i64, v2i32, v2i32>;
7626
7627   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7628                           op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7629
7630   def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7631                           op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7632 }
7633
7634 defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
7635 defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
7636 defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
7637
7638 multiclass NI_qdma<SDPatternOperator op> {
7639   def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7640                     (op node:$Ra,
7641                       (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7642
7643   def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
7644                     (op node:$Ra,
7645                       (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
7646 }
7647
7648 defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
7649 defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
7650
7651 multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
7652   def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
7653                      !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
7654                      v4i32, v4i16, v8i16>;
7655
7656   def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
7657                      !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
7658                      v2i64, v2i32, v4i32>;
7659
7660   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
7661                        !cast<PatFrag>(op # "_4s"), VPR128Lo,
7662                        v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
7663
7664   def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
7665                        !cast<PatFrag>(op # "_2d"), VPR128,
7666                        v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
7667
7668   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
7669                        !cast<PatFrag>(op # "_4s"),
7670                        v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
7671
7672   def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
7673                        !cast<PatFrag>(op # "_2d"),
7674                        v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
7675
7676   // Index can only be half of the max value for lane in 64-bit vector
7677
7678   def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
7679                     !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
7680                     v4i32, v4i16, v4i16>;
7681
7682   def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
7683                     !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
7684                     v2i64, v2i32, v2i32>;
7685
7686   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
7687                       !cast<PatFrag>(op # "_4s"), VPR64Lo,
7688                       v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
7689
7690   def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
7691                       !cast<PatFrag>(op # "_2d"), VPR64,
7692                       v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
7693 }
7694
7695 defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
7696 defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
7697
7698 // End of implementation for instruction class (3V Elem)
7699
7700 class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
7701                 bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
7702                 SDPatternOperator Neon_Rev>
7703   : NeonI_2VMisc<Q, U, size, opcode,
7704                (outs ResVPR:$Rd), (ins ResVPR:$Rn),
7705                asmop # "\t$Rd." # Res # ", $Rn." # Res,
7706                [(set (ResTy ResVPR:$Rd),
7707                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
7708                NoItinerary> ;
7709
7710 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
7711                           v16i8, Neon_rev64>;
7712 def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
7713                          v8i16, Neon_rev64>;
7714 def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
7715                          v4i32, Neon_rev64>;
7716 def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
7717                          v8i8, Neon_rev64>;
7718 def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
7719                          v4i16, Neon_rev64>;
7720 def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
7721                          v2i32, Neon_rev64>;
7722
7723 def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
7724 def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
7725
7726 def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
7727                           v16i8, Neon_rev32>;
7728 def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
7729                           v8i16, Neon_rev32>;
7730 def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
7731                          v8i8, Neon_rev32>;
7732 def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
7733                          v4i16, Neon_rev32>;
7734
7735 def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
7736                           v16i8, Neon_rev16>;
7737 def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
7738                          v8i8, Neon_rev16>;
7739
7740 multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
7741                              SDPatternOperator Neon_Padd> {
7742   def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7743                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7744                            asmop # "\t$Rd.8h, $Rn.16b",
7745                            [(set (v8i16 VPR128:$Rd),
7746                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
7747                            NoItinerary>;
7748
7749   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7750                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7751                           asmop # "\t$Rd.4h, $Rn.8b",
7752                           [(set (v4i16 VPR64:$Rd),
7753                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
7754                           NoItinerary>;
7755
7756   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7757                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7758                            asmop # "\t$Rd.4s, $Rn.8h",
7759                            [(set (v4i32 VPR128:$Rd),
7760                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
7761                            NoItinerary>;
7762
7763   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7764                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7765                           asmop # "\t$Rd.2s, $Rn.4h",
7766                           [(set (v2i32 VPR64:$Rd),
7767                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
7768                           NoItinerary>;
7769
7770   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7771                            (outs VPR128:$Rd), (ins VPR128:$Rn),
7772                            asmop # "\t$Rd.2d, $Rn.4s",
7773                            [(set (v2i64 VPR128:$Rd),
7774                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
7775                            NoItinerary>;
7776
7777   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7778                           (outs VPR64:$Rd), (ins VPR64:$Rn),
7779                           asmop # "\t$Rd.1d, $Rn.2s",
7780                           [(set (v1i64 VPR64:$Rd),
7781                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
7782                           NoItinerary>;
7783 }
7784
7785 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
7786                                 int_arm_neon_vpaddls>;
7787 defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
7788                                 int_arm_neon_vpaddlu>;
7789
7790 def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
7791           (SADDLP2s1d $Rn)>;
7792 def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
7793           (UADDLP2s1d $Rn)>;
7794
7795 multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
7796                              SDPatternOperator Neon_Padd> {
7797   let Constraints = "$src = $Rd" in {
7798     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
7799                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7800                              asmop # "\t$Rd.8h, $Rn.16b",
7801                              [(set (v8i16 VPR128:$Rd),
7802                                 (v8i16 (Neon_Padd
7803                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
7804                              NoItinerary>;
7805
7806     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
7807                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7808                             asmop # "\t$Rd.4h, $Rn.8b",
7809                             [(set (v4i16 VPR64:$Rd),
7810                                (v4i16 (Neon_Padd
7811                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
7812                             NoItinerary>;
7813
7814     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
7815                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7816                             asmop # "\t$Rd.4s, $Rn.8h",
7817                             [(set (v4i32 VPR128:$Rd),
7818                                (v4i32 (Neon_Padd
7819                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
7820                             NoItinerary>;
7821
7822     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
7823                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7824                             asmop # "\t$Rd.2s, $Rn.4h",
7825                             [(set (v2i32 VPR64:$Rd),
7826                                (v2i32 (Neon_Padd
7827                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
7828                             NoItinerary>;
7829
7830     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
7831                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7832                             asmop # "\t$Rd.2d, $Rn.4s",
7833                             [(set (v2i64 VPR128:$Rd),
7834                                (v2i64 (Neon_Padd
7835                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
7836                             NoItinerary>;
7837
7838     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
7839                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7840                             asmop # "\t$Rd.1d, $Rn.2s",
7841                             [(set (v1i64 VPR64:$Rd),
7842                                (v1i64 (Neon_Padd
7843                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
7844                             NoItinerary>;
7845   }
7846 }
7847
7848 defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
7849                                    int_arm_neon_vpadals>;
7850 defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
7851                                    int_arm_neon_vpadalu>;
7852
7853 multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
7854   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7855                          (outs VPR128:$Rd), (ins VPR128:$Rn),
7856                          asmop # "\t$Rd.16b, $Rn.16b",
7857                          [], NoItinerary>;
7858
7859   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7860                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7861                         asmop # "\t$Rd.8h, $Rn.8h",
7862                         [], NoItinerary>;
7863
7864   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7865                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7866                         asmop # "\t$Rd.4s, $Rn.4s",
7867                         [], NoItinerary>;
7868
7869   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7870                         (outs VPR128:$Rd), (ins VPR128:$Rn),
7871                         asmop # "\t$Rd.2d, $Rn.2d",
7872                         [], NoItinerary>;
7873
7874   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7875                          (outs VPR64:$Rd), (ins VPR64:$Rn),
7876                          asmop # "\t$Rd.8b, $Rn.8b",
7877                          [], NoItinerary>;
7878
7879   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7880                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7881                         asmop # "\t$Rd.4h, $Rn.4h",
7882                         [], NoItinerary>;
7883
7884   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7885                         (outs VPR64:$Rd), (ins VPR64:$Rn),
7886                         asmop # "\t$Rd.2s, $Rn.2s",
7887                         [], NoItinerary>;
7888 }
7889
7890 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
7891 defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
7892 defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
7893 defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
7894
7895 multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
7896                                           SDPatternOperator Neon_Op> {
7897   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
7898             (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
7899
7900   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
7901             (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
7902
7903   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
7904             (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
7905
7906   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
7907             (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
7908
7909   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
7910             (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
7911
7912   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
7913             (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
7914
7915   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
7916             (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
7917 }
7918
7919 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
7920 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
7921 defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
7922
7923 def : Pat<(v16i8 (sub
7924             (v16i8 Neon_AllZero),
7925             (v16i8 VPR128:$Rn))),
7926           (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
7927 def : Pat<(v8i8 (sub
7928             (v8i8 Neon_AllZero),
7929             (v8i8 VPR64:$Rn))),
7930           (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
7931 def : Pat<(v8i16 (sub
7932             (v8i16 (bitconvert (v16i8 Neon_AllZero))),
7933             (v8i16 VPR128:$Rn))),
7934           (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
7935 def : Pat<(v4i16 (sub
7936             (v4i16 (bitconvert (v8i8 Neon_AllZero))),
7937             (v4i16 VPR64:$Rn))),
7938           (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
7939 def : Pat<(v4i32 (sub
7940             (v4i32 (bitconvert (v16i8 Neon_AllZero))),
7941             (v4i32 VPR128:$Rn))),
7942           (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
7943 def : Pat<(v2i32 (sub
7944             (v2i32 (bitconvert (v8i8 Neon_AllZero))),
7945             (v2i32 VPR64:$Rn))),
7946           (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
7947 def : Pat<(v2i64 (sub
7948             (v2i64 (bitconvert (v16i8 Neon_AllZero))),
7949             (v2i64 VPR128:$Rn))),
7950           (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
7951
7952 multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
7953   let Constraints = "$src = $Rd" in {
7954     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
7955                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7956                            asmop # "\t$Rd.16b, $Rn.16b",
7957                            [], NoItinerary>;
7958
7959     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
7960                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7961                           asmop # "\t$Rd.8h, $Rn.8h",
7962                           [], NoItinerary>;
7963
7964     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
7965                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7966                           asmop # "\t$Rd.4s, $Rn.4s",
7967                           [], NoItinerary>;
7968
7969     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
7970                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
7971                           asmop # "\t$Rd.2d, $Rn.2d",
7972                           [], NoItinerary>;
7973
7974     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
7975                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7976                           asmop # "\t$Rd.8b, $Rn.8b",
7977                           [], NoItinerary>;
7978
7979     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
7980                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7981                           asmop # "\t$Rd.4h, $Rn.4h",
7982                           [], NoItinerary>;
7983
7984     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
7985                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
7986                           asmop # "\t$Rd.2s, $Rn.2s",
7987                           [], NoItinerary>;
7988   }
7989 }
7990
7991 defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
7992 defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
7993
7994 multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
7995                                            SDPatternOperator Neon_Op> {
7996   def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
7997             (v16i8 (!cast<Instruction>(Prefix # 16b)
7998               (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
7999
8000   def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
8001             (v8i16 (!cast<Instruction>(Prefix # 8h)
8002               (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
8003
8004   def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
8005             (v4i32 (!cast<Instruction>(Prefix # 4s)
8006               (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
8007
8008   def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
8009             (v2i64 (!cast<Instruction>(Prefix # 2d)
8010               (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
8011
8012   def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
8013             (v8i8 (!cast<Instruction>(Prefix # 8b)
8014               (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
8015
8016   def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
8017             (v4i16 (!cast<Instruction>(Prefix # 4h)
8018               (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
8019
8020   def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
8021             (v2i32 (!cast<Instruction>(Prefix # 2s)
8022               (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
8023 }
8024
8025 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
8026 defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
8027
8028 multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
8029                           SDPatternOperator Neon_Op> {
8030   def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
8031                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8032                          asmop # "\t$Rd.16b, $Rn.16b",
8033                          [(set (v16i8 VPR128:$Rd),
8034                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
8035                          NoItinerary>;
8036
8037   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
8038                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8039                         asmop # "\t$Rd.8h, $Rn.8h",
8040                         [(set (v8i16 VPR128:$Rd),
8041                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
8042                         NoItinerary>;
8043
8044   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
8045                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8046                         asmop # "\t$Rd.4s, $Rn.4s",
8047                         [(set (v4i32 VPR128:$Rd),
8048                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8049                         NoItinerary>;
8050
8051   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
8052                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8053                         asmop # "\t$Rd.8b, $Rn.8b",
8054                         [(set (v8i8 VPR64:$Rd),
8055                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
8056                         NoItinerary>;
8057
8058   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
8059                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8060                         asmop # "\t$Rd.4h, $Rn.4h",
8061                         [(set (v4i16 VPR64:$Rd),
8062                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
8063                         NoItinerary>;
8064
8065   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
8066                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8067                         asmop # "\t$Rd.2s, $Rn.2s",
8068                         [(set (v2i32 VPR64:$Rd),
8069                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8070                         NoItinerary>;
8071 }
8072
8073 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
8074 defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
8075
8076 multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
8077                               bits<5> Opcode> {
8078   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
8079                          (outs VPR128:$Rd), (ins VPR128:$Rn),
8080                          asmop # "\t$Rd.16b, $Rn.16b",
8081                          [], NoItinerary>;
8082
8083   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
8084                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8085                         asmop # "\t$Rd.8b, $Rn.8b",
8086                         [], NoItinerary>;
8087 }
8088
8089 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
8090 defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
8091 defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
8092
8093 def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
8094                     (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
8095 def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
8096                     (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
8097
8098 def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
8099           (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
8100 def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
8101           (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
8102
8103 def : Pat<(v16i8 (xor
8104             (v16i8 VPR128:$Rn),
8105             (v16i8 Neon_AllOne))),
8106           (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
8107 def : Pat<(v8i8 (xor
8108             (v8i8 VPR64:$Rn),
8109             (v8i8 Neon_AllOne))),
8110           (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
8111 def : Pat<(v8i16 (xor
8112             (v8i16 VPR128:$Rn),
8113             (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
8114           (NOT16b VPR128:$Rn)>;
8115 def : Pat<(v4i16 (xor
8116             (v4i16 VPR64:$Rn),
8117             (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
8118           (NOT8b VPR64:$Rn)>;
8119 def : Pat<(v4i32 (xor
8120             (v4i32 VPR128:$Rn),
8121             (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
8122           (NOT16b VPR128:$Rn)>;
8123 def : Pat<(v2i32 (xor
8124             (v2i32 VPR64:$Rn),
8125             (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
8126           (NOT8b VPR64:$Rn)>;
8127 def : Pat<(v2i64 (xor
8128             (v2i64 VPR128:$Rn),
8129             (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
8130           (NOT16b VPR128:$Rn)>;
8131
8132 def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
8133           (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
8134 def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
8135           (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
8136
8137 multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
8138                                 SDPatternOperator Neon_Op> {
8139   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8140                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8141                         asmop # "\t$Rd.4s, $Rn.4s",
8142                         [(set (v4f32 VPR128:$Rd),
8143                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
8144                         NoItinerary>;
8145
8146   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
8147                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8148                         asmop # "\t$Rd.2d, $Rn.2d",
8149                         [(set (v2f64 VPR128:$Rd),
8150                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
8151                         NoItinerary>;
8152
8153   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8154                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8155                         asmop # "\t$Rd.2s, $Rn.2s",
8156                         [(set (v2f32 VPR64:$Rd),
8157                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
8158                         NoItinerary>;
8159 }
8160
8161 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
8162 defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
8163
8164 multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
8165   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
8166                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8167                           asmop # "\t$Rd.8b, $Rn.8h",
8168                           [], NoItinerary>;
8169
8170   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
8171                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8172                           asmop # "\t$Rd.4h, $Rn.4s",
8173                           [], NoItinerary>;
8174
8175   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
8176                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8177                           asmop # "\t$Rd.2s, $Rn.2d",
8178                           [], NoItinerary>;
8179
8180   let Constraints = "$Rd = $src" in {
8181     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
8182                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8183                              asmop # "2\t$Rd.16b, $Rn.8h",
8184                              [], NoItinerary>;
8185
8186     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
8187                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8188                             asmop # "2\t$Rd.8h, $Rn.4s",
8189                             [], NoItinerary>;
8190
8191     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
8192                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8193                             asmop # "2\t$Rd.4s, $Rn.2d",
8194                             [], NoItinerary>;
8195   }
8196 }
8197
8198 defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
8199 defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
8200 defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
8201 defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
8202
8203 multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
8204                                         SDPatternOperator Neon_Op> {
8205   def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
8206             (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
8207
8208   def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
8209             (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
8210
8211   def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
8212             (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
8213
8214   def : Pat<(v16i8 (concat_vectors
8215               (v8i8 VPR64:$src),
8216               (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
8217             (!cast<Instruction>(Prefix # 8h16b)
8218               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8219               VPR128:$Rn)>;
8220
8221   def : Pat<(v8i16 (concat_vectors
8222               (v4i16 VPR64:$src),
8223               (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
8224             (!cast<Instruction>(Prefix # 4s8h)
8225               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8226               VPR128:$Rn)>;
8227
8228   def : Pat<(v4i32 (concat_vectors
8229               (v2i32 VPR64:$src),
8230               (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
8231             (!cast<Instruction>(Prefix # 2d4s)
8232               (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
8233               VPR128:$Rn)>;
8234 }
8235
8236 defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
8237 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
8238 defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
8239 defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
8240
8241 multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
8242   let DecoderMethod = "DecodeSHLLInstruction" in {
8243     def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8244                             (outs VPR128:$Rd),
8245                             (ins VPR64:$Rn, uimm_exact8:$Imm),
8246                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
8247                             [], NoItinerary>;
8248
8249     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8250                             (outs VPR128:$Rd),
8251                             (ins VPR64:$Rn, uimm_exact16:$Imm),
8252                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
8253                             [], NoItinerary>;
8254
8255     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
8256                             (outs VPR128:$Rd),
8257                             (ins VPR64:$Rn, uimm_exact32:$Imm),
8258                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
8259                             [], NoItinerary>;
8260
8261     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8262                             (outs VPR128:$Rd),
8263                             (ins VPR128:$Rn, uimm_exact8:$Imm),
8264                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
8265                             [], NoItinerary>;
8266
8267     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8268                             (outs VPR128:$Rd),
8269                             (ins VPR128:$Rn, uimm_exact16:$Imm),
8270                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
8271                             [], NoItinerary>;
8272
8273     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
8274                             (outs VPR128:$Rd),
8275                             (ins VPR128:$Rn, uimm_exact32:$Imm),
8276                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
8277                             [], NoItinerary>;
8278   }
8279 }
8280
8281 defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
8282
8283 class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
8284                           SDPatternOperator ExtOp, Operand Neon_Imm,
8285                           string suffix>
8286   : Pat<(DesTy (shl
8287           (DesTy (ExtOp (OpTy VPR64:$Rn))),
8288             (DesTy (Neon_vdup
8289               (i32 Neon_Imm:$Imm))))),
8290         (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
8291
8292 class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
8293                                SDPatternOperator ExtOp, Operand Neon_Imm,
8294                                string suffix, PatFrag GetHigh>
8295   : Pat<(DesTy (shl
8296           (DesTy (ExtOp
8297             (OpTy (GetHigh VPR128:$Rn)))),
8298               (DesTy (Neon_vdup
8299                 (i32 Neon_Imm:$Imm))))),
8300         (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
8301
8302 def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
8303 def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
8304 def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
8305 def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
8306 def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
8307 def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
8308 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
8309                                Neon_High16B>;
8310 def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
8311                                Neon_High16B>;
8312 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
8313                                Neon_High8H>;
8314 def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
8315                                Neon_High8H>;
8316 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
8317                                Neon_High4S>;
8318 def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
8319                                Neon_High4S>;
8320
8321 multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
8322   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
8323                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8324                           asmop # "\t$Rd.4h, $Rn.4s",
8325                           [], NoItinerary>;
8326
8327   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8328                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8329                           asmop # "\t$Rd.2s, $Rn.2d",
8330                           [], NoItinerary>;
8331
8332   let Constraints = "$src = $Rd" in {
8333     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
8334                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8335                             asmop # "2\t$Rd.8h, $Rn.4s",
8336                             [], NoItinerary>;
8337
8338     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8339                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8340                             asmop # "2\t$Rd.4s, $Rn.2d",
8341                             [], NoItinerary>;
8342   }
8343 }
8344
8345 defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
8346
8347 multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
8348                                        SDPatternOperator f32_to_f16_Op,
8349                                        SDPatternOperator f64_to_f32_Op> {
8350
8351   def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
8352               (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
8353
8354   def : Pat<(v8i16 (concat_vectors
8355                 (v4i16 VPR64:$src),
8356                 (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
8357                   (!cast<Instruction>(prefix # "4s8h")
8358                     (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8359                     (v4f32 VPR128:$Rn))>;
8360
8361   def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
8362             (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
8363
8364   def : Pat<(v4f32 (concat_vectors
8365               (v2f32 VPR64:$src),
8366               (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
8367                 (!cast<Instruction>(prefix # "2d4s")
8368                   (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8369                   (v2f64 VPR128:$Rn))>;
8370 }
8371
8372 defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
8373
8374 multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
8375                                  bits<5> opcode> {
8376   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
8377                           (outs VPR64:$Rd), (ins VPR128:$Rn),
8378                           asmop # "\t$Rd.2s, $Rn.2d",
8379                           [], NoItinerary>;
8380
8381   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
8382                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8383                           asmop # "2\t$Rd.4s, $Rn.2d",
8384                           [], NoItinerary> {
8385     let Constraints = "$src = $Rd";
8386   }
8387
8388   def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
8389             (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
8390
8391   def : Pat<(v4f32 (concat_vectors
8392               (v2f32 VPR64:$src),
8393               (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
8394             (!cast<Instruction>(prefix # "2d4s")
8395                (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
8396                VPR128:$Rn)>;
8397 }
8398
8399 defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
8400
8401 def Neon_High4Float : PatFrag<(ops node:$in),
8402                               (extract_subvector (v4f32 node:$in), (iPTR 2))>;
8403
8404 multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
8405   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
8406                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8407                           asmop # "\t$Rd.4s, $Rn.4h",
8408                           [], NoItinerary>;
8409
8410   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
8411                           (outs VPR128:$Rd), (ins VPR64:$Rn),
8412                           asmop # "\t$Rd.2d, $Rn.2s",
8413                           [], NoItinerary>;
8414
8415   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
8416                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8417                           asmop # "2\t$Rd.4s, $Rn.8h",
8418                           [], NoItinerary>;
8419
8420   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
8421                           (outs VPR128:$Rd), (ins VPR128:$Rn),
8422                           asmop # "2\t$Rd.2d, $Rn.4s",
8423                           [], NoItinerary>;
8424 }
8425
8426 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
8427
8428 multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
8429   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
8430             (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
8431
8432   def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
8433               (v4i16 (Neon_High8H
8434                 (v8i16 VPR128:$Rn))))),
8435             (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
8436
8437   def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
8438             (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
8439
8440   def : Pat<(v2f64 (fextend
8441               (v2f32 (Neon_High4Float
8442                 (v4f32 VPR128:$Rn))))),
8443             (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
8444 }
8445
8446 defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
8447
8448 multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
8449                                 ValueType ResTy4s, ValueType OpTy4s,
8450                                 ValueType ResTy2d, ValueType OpTy2d,
8451                                 ValueType ResTy2s, ValueType OpTy2s,
8452                                 SDPatternOperator Neon_Op> {
8453
8454   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8455                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8456                         asmop # "\t$Rd.4s, $Rn.4s",
8457                         [(set (ResTy4s VPR128:$Rd),
8458                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
8459                         NoItinerary>;
8460
8461   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
8462                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8463                         asmop # "\t$Rd.2d, $Rn.2d",
8464                         [(set (ResTy2d VPR128:$Rd),
8465                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
8466                         NoItinerary>;
8467
8468   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8469                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8470                         asmop # "\t$Rd.2s, $Rn.2s",
8471                         [(set (ResTy2s VPR64:$Rd),
8472                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
8473                         NoItinerary>;
8474 }
8475
8476 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
8477                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8478   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
8479                                 v2f64, v2i32, v2f32, Neon_Op>;
8480 }
8481
8482 defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
8483                                      int_arm_neon_vcvtns>;
8484 defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
8485                                      int_arm_neon_vcvtnu>;
8486 defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
8487                                      int_arm_neon_vcvtps>;
8488 defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
8489                                      int_arm_neon_vcvtpu>;
8490 defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
8491                                      int_arm_neon_vcvtms>;
8492 defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
8493                                      int_arm_neon_vcvtmu>;
8494 defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
8495 defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
8496 defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
8497                                      int_arm_neon_vcvtas>;
8498 defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
8499                                      int_arm_neon_vcvtau>;
8500
8501 multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
8502                                   bits<5> opcode, SDPatternOperator Neon_Op> {
8503   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
8504                                 v2i64, v2f32, v2i32, Neon_Op>;
8505 }
8506
8507 defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
8508 defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
8509
8510 multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
8511                                  bits<5> opcode, SDPatternOperator Neon_Op> {
8512   defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
8513                                 v2f64, v2f32, v2f32, Neon_Op>;
8514 }
8515
8516 defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
8517                                      int_aarch64_neon_frintn>;
8518 defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
8519 defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
8520 defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
8521 defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
8522 defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
8523 defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
8524 defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
8525                                     int_arm_neon_vrecpe>;
8526 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
8527                                      int_arm_neon_vrsqrte>;
8528 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
8529
8530 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
8531                                bits<5> opcode, SDPatternOperator Neon_Op> {
8532   def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
8533                         (outs VPR128:$Rd), (ins VPR128:$Rn),
8534                         asmop # "\t$Rd.4s, $Rn.4s",
8535                         [(set (v4i32 VPR128:$Rd),
8536                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
8537                         NoItinerary>;
8538
8539   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
8540                         (outs VPR64:$Rd), (ins VPR64:$Rn),
8541                         asmop # "\t$Rd.2s, $Rn.2s",
8542                         [(set (v2i32 VPR64:$Rd),
8543                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
8544                         NoItinerary>;
8545 }
8546
8547 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
8548                                   int_arm_neon_vrecpe>;
8549 defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
8550                                    int_arm_neon_vrsqrte>;
8551
8552 // Crypto Class
8553 class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
8554                          string asmop, SDPatternOperator opnode>
8555   : NeonI_Crypto_AES<size, opcode,
8556                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8557                      asmop # "\t$Rd.16b, $Rn.16b",
8558                      [(set (v16i8 VPR128:$Rd),
8559                         (v16i8 (opnode (v16i8 VPR128:$src),
8560                                        (v16i8 VPR128:$Rn))))],
8561                      NoItinerary>{
8562   let Constraints = "$src = $Rd";
8563   let Predicates = [HasNEON, HasCrypto];
8564 }
8565
8566 def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
8567 def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
8568
8569 class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
8570                       string asmop, SDPatternOperator opnode>
8571   : NeonI_Crypto_AES<size, opcode,
8572                      (outs VPR128:$Rd), (ins VPR128:$Rn),
8573                      asmop # "\t$Rd.16b, $Rn.16b",
8574                      [(set (v16i8 VPR128:$Rd),
8575                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
8576                      NoItinerary>;
8577
8578 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
8579 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
8580
8581 class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
8582                          string asmop, SDPatternOperator opnode>
8583   : NeonI_Crypto_SHA<size, opcode,
8584                      (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
8585                      asmop # "\t$Rd.4s, $Rn.4s",
8586                      [(set (v4i32 VPR128:$Rd),
8587                         (v4i32 (opnode (v4i32 VPR128:$src),
8588                                        (v4i32 VPR128:$Rn))))],
8589                      NoItinerary> {
8590   let Constraints = "$src = $Rd";
8591   let Predicates = [HasNEON, HasCrypto];
8592 }
8593
8594 def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
8595                                  int_arm_neon_sha1su1>;
8596 def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
8597                                    int_arm_neon_sha256su0>;
8598
8599 class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
8600                          string asmop, SDPatternOperator opnode>
8601   : NeonI_Crypto_SHA<size, opcode,
8602                      (outs FPR32:$Rd), (ins FPR32:$Rn),
8603                      asmop # "\t$Rd, $Rn",
8604                      [(set (v1i32 FPR32:$Rd),
8605                         (v1i32 (opnode (v1i32 FPR32:$Rn))))],
8606                      NoItinerary> {
8607   let Predicates = [HasNEON, HasCrypto];
8608 }
8609
8610 def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
8611
8612 class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
8613                            SDPatternOperator opnode>
8614   : NeonI_Crypto_3VSHA<size, opcode,
8615                        (outs VPR128:$Rd),
8616                        (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
8617                        asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
8618                        [(set (v4i32 VPR128:$Rd),
8619                           (v4i32 (opnode (v4i32 VPR128:$src),
8620                                          (v4i32 VPR128:$Rn),
8621                                          (v4i32 VPR128:$Rm))))],
8622                        NoItinerary> {
8623   let Constraints = "$src = $Rd";
8624   let Predicates = [HasNEON, HasCrypto];
8625 }
8626
8627 def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
8628                                    int_arm_neon_sha1su0>;
8629 def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
8630                                      int_arm_neon_sha256su1>;
8631
8632 class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
8633                            SDPatternOperator opnode>
8634   : NeonI_Crypto_3VSHA<size, opcode,
8635                        (outs FPR128:$Rd),
8636                        (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
8637                        asmop # "\t$Rd, $Rn, $Rm.4s",
8638                        [(set (v4i32 FPR128:$Rd),
8639                           (v4i32 (opnode (v4i32 FPR128:$src),
8640                                          (v4i32 FPR128:$Rn),
8641                                          (v4i32 VPR128:$Rm))))],
8642                        NoItinerary> {
8643   let Constraints = "$src = $Rd";
8644   let Predicates = [HasNEON, HasCrypto];
8645 }
8646
8647 def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
8648                                    int_arm_neon_sha256h>;
8649 def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
8650                                     int_arm_neon_sha256h2>;
8651
8652 class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
8653                            SDPatternOperator opnode>
8654   : NeonI_Crypto_3VSHA<size, opcode,
8655                        (outs FPR128:$Rd),
8656                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
8657                        asmop # "\t$Rd, $Rn, $Rm.4s",
8658                        [(set (v4i32 FPR128:$Rd),
8659                           (v4i32 (opnode (v4i32 FPR128:$src),
8660                                          (v1i32 FPR32:$Rn),
8661                                          (v4i32 VPR128:$Rm))))],
8662                        NoItinerary> {
8663   let Constraints = "$src = $Rd";
8664   let Predicates = [HasNEON, HasCrypto];
8665 }
8666
8667 def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
8668 def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
8669 def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
8670
8671 // Additional patterns to match shl to USHL.
8672 def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8673           (USHLvvv_8B $Rn, $Rm)>;
8674 def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8675           (USHLvvv_4H $Rn, $Rm)>;
8676 def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8677           (USHLvvv_2S $Rn, $Rm)>;
8678 def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8679           (USHLddd $Rn, $Rm)>;
8680 def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8681           (USHLvvv_16B $Rn, $Rm)>;
8682 def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8683           (USHLvvv_8H $Rn, $Rm)>;
8684 def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8685           (USHLvvv_4S $Rn, $Rm)>;
8686 def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8687           (USHLvvv_2D $Rn, $Rm)>;
8688
8689 // Additional patterns to match sra, srl.
8690 // For a vector right shift by vector, the shift amounts of SSHL/USHL are
8691 // negative. Negate the vector of shift amount first.
8692 def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8693           (USHLvvv_8B $Rn, (NEG8b $Rm))>;
8694 def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8695           (USHLvvv_4H $Rn, (NEG4h $Rm))>;
8696 def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8697           (USHLvvv_2S $Rn, (NEG2s $Rm))>;
8698 def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8699           (USHLddd $Rn, (NEGdd $Rm))>;
8700 def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8701           (USHLvvv_16B $Rn, (NEG16b $Rm))>;
8702 def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8703           (USHLvvv_8H $Rn, (NEG8h $Rm))>;
8704 def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8705           (USHLvvv_4S $Rn, (NEG4s $Rm))>;
8706 def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8707           (USHLvvv_2D $Rn, (NEG2d $Rm))>;
8708
8709 def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
8710           (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
8711 def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
8712           (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
8713 def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
8714           (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
8715 def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
8716           (SSHLddd $Rn, (NEGdd $Rm))>;
8717 def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
8718           (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
8719 def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
8720           (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
8721 def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
8722           (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
8723 def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
8724           (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
8725
8726 //
8727 // Patterns for handling half-precision values
8728 //
8729
8730 // Convert f16 value coming in as i16 value to f32
8731 def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
8732           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8733 def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
8734           (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
8735
8736 def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
8737             f32_to_f16 (f32 FPR32:$Rn))))))),
8738           (f32 FPR32:$Rn)>;
8739
8740 // Patterns for vector extract of half-precision FP value in i16 storage type
8741 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8742             (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
8743           (FCVTsh (f16 (DUPhv_H
8744             (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8745             neon_uimm2_bare:$Imm)))>;
8746
8747 def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
8748             (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
8749           (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
8750
8751 // Patterns for vector insert of half-precision FP value 0 in i16 storage type
8752 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8753             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8754             (neon_uimm3_bare:$Imm))),
8755           (v8i16 (INSELh (v8i16 VPR128:$Rn),
8756             (v8i16 (SUBREG_TO_REG (i64 0),
8757               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8758               sub_16)),
8759             neon_uimm3_bare:$Imm, 0))>;
8760
8761 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8762             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
8763             (neon_uimm2_bare:$Imm))),
8764           (v4i16 (EXTRACT_SUBREG
8765             (v8i16 (INSELh
8766               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8767               (v8i16 (SUBREG_TO_REG (i64 0),
8768                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
8769                 sub_16)),
8770               neon_uimm2_bare:$Imm, 0)),
8771             sub_64))>;
8772
8773 // Patterns for vector insert of half-precision FP value in i16 storage type
8774 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8775             (i32 (assertsext (i32 (fp_to_sint
8776               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8777             (neon_uimm3_bare:$Imm))),
8778           (v8i16 (INSELh (v8i16 VPR128:$Rn),
8779             (v8i16 (SUBREG_TO_REG (i64 0),
8780               (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8781               sub_16)),
8782             neon_uimm3_bare:$Imm, 0))>;
8783
8784 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8785             (i32 (assertsext (i32 (fp_to_sint
8786               (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
8787             (neon_uimm2_bare:$Imm))),
8788           (v4i16 (EXTRACT_SUBREG
8789             (v8i16 (INSELh
8790               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8791               (v8i16 (SUBREG_TO_REG (i64 0),
8792                 (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
8793                 sub_16)),
8794               neon_uimm2_bare:$Imm, 0)),
8795             sub_64))>;
8796
8797 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8798             (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8799               (neon_uimm3_bare:$Imm1))),
8800           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8801             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8802
8803 // Patterns for vector copy of half-precision FP value in i16 storage type
8804 def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
8805             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8806               (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
8807               65535)))))))),
8808             (neon_uimm3_bare:$Imm1))),
8809           (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
8810             neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
8811
8812 def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
8813             (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
8814               (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
8815               65535)))))))),
8816             (neon_uimm3_bare:$Imm1))),
8817           (v4i16 (EXTRACT_SUBREG
8818             (v8i16 (INSELh
8819               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
8820               (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
8821               neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
8822             sub_64))>;
8823
8824